polars-df 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +272 -191
  4. data/Cargo.toml +0 -1
  5. data/README.md +2 -2
  6. data/ext/polars/Cargo.toml +8 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +106 -4
  12. data/ext/polars/src/dataframe.rs +19 -17
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/expr/general.rs +933 -0
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +205 -303
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +99 -539
  38. data/lib/polars/convert.rb +2 -2
  39. data/lib/polars/data_frame.rb +201 -50
  40. data/lib/polars/data_types.rb +6 -4
  41. data/lib/polars/date_time_expr.rb +142 -2
  42. data/lib/polars/expr.rb +70 -10
  43. data/lib/polars/lazy_frame.rb +4 -3
  44. data/lib/polars/lazy_functions.rb +4 -1
  45. data/lib/polars/list_expr.rb +68 -19
  46. data/lib/polars/series.rb +181 -73
  47. data/lib/polars/string_expr.rb +149 -43
  48. data/lib/polars/string_name_space.rb +4 -4
  49. data/lib/polars/struct_name_space.rb +32 -0
  50. data/lib/polars/utils.rb +41 -7
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +2 -2
  53. metadata +26 -11
  54. data/ext/polars/src/lazy/dsl.rs +0 -1775
  55. data/ext/polars/src/lazy/mod.rs +0 -5
  56. data/ext/polars/src/lazy/utils.rs +0 -13
  57. data/ext/polars/src/list_construction.rs +0 -100
  58. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  59. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -3,31 +3,27 @@ mod batched_csv;
3
3
  mod conversion;
4
4
  mod dataframe;
5
5
  mod error;
6
+ mod expr;
6
7
  mod file;
7
- mod lazy;
8
- mod list_construction;
9
- mod numo;
8
+ mod functions;
9
+ mod lazyframe;
10
+ mod lazygroupby;
10
11
  mod object;
11
12
  mod prelude;
12
13
  pub(crate) mod rb_modules;
13
14
  mod series;
14
- mod set;
15
15
  mod utils;
16
16
 
17
17
  use batched_csv::RbBatchedCsv;
18
18
  use conversion::*;
19
19
  use dataframe::RbDataFrame;
20
20
  use error::{RbPolarsErr, RbValueError};
21
- use file::get_file_like;
22
- use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
23
- use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
24
- use lazy::utils::rb_exprs_to_exprs;
25
- use magnus::{define_module, function, method, prelude::*, Error, IntoValue, RArray, RHash, Value};
26
- use polars::datatypes::{DataType, TimeUnit, IDX_DTYPE};
27
- use polars::error::PolarsResult;
28
- use polars::frame::DataFrame;
29
- use polars::functions::{diag_concat_df, hor_concat_df};
30
- use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
21
+ use expr::rb_exprs_to_exprs;
22
+ use expr::RbExpr;
23
+ use functions::whenthen::{RbWhen, RbWhenThen};
24
+ use lazyframe::RbLazyFrame;
25
+ use lazygroupby::RbLazyGroupBy;
26
+ use magnus::{define_module, function, method, prelude::*, Error};
31
27
  use series::RbSeries;
32
28
 
33
29
  #[cfg(target_os = "linux")]
@@ -49,22 +45,70 @@ type RbResult<T> = Result<T, Error>;
49
45
  #[magnus::init]
50
46
  fn init() -> RbResult<()> {
51
47
  let module = define_module("Polars")?;
52
- module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
53
- module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
54
- module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
55
- module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
56
- module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
57
- module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
58
- module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
59
- module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
60
- module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
61
- module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
62
- module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
63
- module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
64
- module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
65
- module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
66
- module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
67
- module.define_singleton_method("_get_idx_type", function!(get_idx_type, 0))?;
48
+ module.define_singleton_method(
49
+ "_dtype_cols",
50
+ function!(crate::functions::lazy::dtype_cols2, 1),
51
+ )?;
52
+ module.define_singleton_method(
53
+ "_rb_duration",
54
+ function!(crate::functions::lazy::duration, 8),
55
+ )?;
56
+ module.define_singleton_method(
57
+ "_concat_df",
58
+ function!(crate::functions::eager::concat_df, 1),
59
+ )?;
60
+ module.define_singleton_method(
61
+ "_concat_lf",
62
+ function!(crate::functions::lazy::concat_lf, 3),
63
+ )?;
64
+ module.define_singleton_method(
65
+ "_diag_concat_df",
66
+ function!(crate::functions::eager::diag_concat_df, 1),
67
+ )?;
68
+ module.define_singleton_method(
69
+ "_hor_concat_df",
70
+ function!(crate::functions::eager::hor_concat_df, 1),
71
+ )?;
72
+ module.define_singleton_method(
73
+ "_concat_series",
74
+ function!(crate::functions::eager::concat_series, 1),
75
+ )?;
76
+ module.define_singleton_method(
77
+ "_ipc_schema",
78
+ function!(crate::functions::io::read_ipc_schema, 1),
79
+ )?;
80
+ module.define_singleton_method(
81
+ "_parquet_schema",
82
+ function!(crate::functions::io::read_parquet_schema, 1),
83
+ )?;
84
+ module.define_singleton_method(
85
+ "_collect_all",
86
+ function!(crate::functions::lazy::collect_all, 1),
87
+ )?;
88
+ module.define_singleton_method(
89
+ "_rb_date_range",
90
+ function!(crate::functions::eager::date_range, 7),
91
+ )?;
92
+ module.define_singleton_method(
93
+ "_coalesce_exprs",
94
+ function!(crate::functions::lazy::coalesce, 1),
95
+ )?;
96
+ module.define_singleton_method(
97
+ "_sum_exprs",
98
+ function!(crate::functions::lazy::sum_exprs, 1),
99
+ )?;
100
+ module.define_singleton_method(
101
+ "_as_struct",
102
+ function!(crate::functions::lazy::as_struct, 1),
103
+ )?;
104
+ module.define_singleton_method(
105
+ "_arg_where",
106
+ function!(crate::functions::lazy::arg_where, 1),
107
+ )?;
108
+ module.define_singleton_method(
109
+ "_get_idx_type",
110
+ function!(crate::functions::meta::get_idx_type, 0),
111
+ )?;
68
112
 
69
113
  let class = module.define_class("RbBatchedCsv", Default::default())?;
70
114
  class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
@@ -209,7 +253,7 @@ fn init() -> RbResult<()> {
209
253
  class.define_method("unique_stable", method!(RbExpr::unique_stable, 0))?;
210
254
  class.define_method("first", method!(RbExpr::first, 0))?;
211
255
  class.define_method("last", method!(RbExpr::last, 0))?;
212
- class.define_method("list", method!(RbExpr::list, 0))?;
256
+ class.define_method("implode", method!(RbExpr::implode, 0))?;
213
257
  class.define_method("quantile", method!(RbExpr::quantile, 2))?;
214
258
  class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
215
259
  class.define_method("count", method!(RbExpr::count, 0))?;
@@ -219,7 +263,8 @@ fn init() -> RbResult<()> {
219
263
  class.define_method("cast", method!(RbExpr::cast, 2))?;
220
264
  class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
221
265
  class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
222
- class.define_method("top_k", method!(RbExpr::top_k, 2))?;
266
+ class.define_method("top_k", method!(RbExpr::top_k, 1))?;
267
+ class.define_method("bottom_k", method!(RbExpr::bottom_k, 1))?;
223
268
  class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
224
269
  class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
225
270
  class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
@@ -242,6 +287,7 @@ fn init() -> RbResult<()> {
242
287
  class.define_method("std", method!(RbExpr::std, 1))?;
243
288
  class.define_method("var", method!(RbExpr::var, 1))?;
244
289
  class.define_method("is_unique", method!(RbExpr::is_unique, 0))?;
290
+ class.define_method("approx_unique", method!(RbExpr::approx_unique, 0))?;
245
291
  class.define_method("is_first", method!(RbExpr::is_first, 0))?;
246
292
  class.define_method("explode", method!(RbExpr::explode, 0))?;
247
293
  class.define_method("take_every", method!(RbExpr::take_every, 1))?;
@@ -284,9 +330,9 @@ fn init() -> RbResult<()> {
284
330
  class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
285
331
  class.define_method("product", method!(RbExpr::product, 0))?;
286
332
  class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
287
- class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
288
- class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 6))?;
289
- class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
333
+ class.define_method("str_parse_date", method!(RbExpr::str_to_date, 4))?;
334
+ class.define_method("str_parse_datetime", method!(RbExpr::str_to_datetime, 8))?;
335
+ class.define_method("str_parse_time", method!(RbExpr::str_to_time, 3))?;
290
336
  class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
291
337
  class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
292
338
  class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
@@ -295,7 +341,7 @@ fn init() -> RbResult<()> {
295
341
  class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
296
342
  class.define_method("str_lengths", method!(RbExpr::str_lengths, 0))?;
297
343
  class.define_method("str_n_chars", method!(RbExpr::str_n_chars, 0))?;
298
- class.define_method("str_replace", method!(RbExpr::str_replace, 3))?;
344
+ class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
299
345
  class.define_method("str_replace_all", method!(RbExpr::str_replace_all, 3))?;
300
346
  class.define_method("str_zfill", method!(RbExpr::str_zfill, 1))?;
301
347
  class.define_method("str_ljust", method!(RbExpr::str_ljust, 2))?;
@@ -303,22 +349,24 @@ fn init() -> RbResult<()> {
303
349
  class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
304
350
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
305
351
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
306
- class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
307
- class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
308
- class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
352
+ class.define_method("binary_contains", method!(RbExpr::bin_contains, 1))?;
353
+ class.define_method("binary_ends_with", method!(RbExpr::bin_ends_with, 1))?;
354
+ class.define_method("binary_starts_with", method!(RbExpr::bin_starts_with, 1))?;
309
355
  class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
310
356
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
311
357
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
312
358
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
313
- class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
314
- class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
359
+ class.define_method("str_parse_int", method!(RbExpr::str_parse_int, 2))?;
360
+ class.define_method("str_json_extract", method!(RbExpr::str_json_extract, 1))?;
361
+ class.define_method("binary_hex_encode", method!(RbExpr::bin_hex_encode, 0))?;
362
+ class.define_method("binary_hex_decode", method!(RbExpr::bin_hex_decode, 1))?;
315
363
  class.define_method(
316
364
  "binary_base64_encode",
317
- method!(RbExpr::binary_base64_encode, 0),
365
+ method!(RbExpr::bin_base64_encode, 0),
318
366
  )?;
319
367
  class.define_method(
320
368
  "binary_base64_decode",
321
- method!(RbExpr::binary_base64_decode, 1),
369
+ method!(RbExpr::bin_base64_decode, 1),
322
370
  )?;
323
371
  class.define_method(
324
372
  "str_json_path_match",
@@ -326,8 +374,8 @@ fn init() -> RbResult<()> {
326
374
  )?;
327
375
  class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
328
376
  class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
329
- class.define_method("count_match", method!(RbExpr::count_match, 1))?;
330
- class.define_method("strftime", method!(RbExpr::strftime, 1))?;
377
+ class.define_method("count_match", method!(RbExpr::str_count_match, 1))?;
378
+ class.define_method("strftime", method!(RbExpr::dt_to_string, 1))?;
331
379
  class.define_method("str_split", method!(RbExpr::str_split, 1))?;
332
380
  class.define_method(
333
381
  "str_split_inclusive",
@@ -339,22 +387,27 @@ fn init() -> RbResult<()> {
339
387
  method!(RbExpr::str_split_exact_inclusive, 2),
340
388
  )?;
341
389
  class.define_method("str_splitn", method!(RbExpr::str_splitn, 2))?;
342
- class.define_method("arr_lengths", method!(RbExpr::arr_lengths, 0))?;
343
- class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
344
- class.define_method("year", method!(RbExpr::year, 0))?;
345
- class.define_method("iso_year", method!(RbExpr::iso_year, 0))?;
346
- class.define_method("quarter", method!(RbExpr::quarter, 0))?;
347
- class.define_method("month", method!(RbExpr::month, 0))?;
348
- class.define_method("week", method!(RbExpr::week, 0))?;
349
- class.define_method("weekday", method!(RbExpr::weekday, 0))?;
350
- class.define_method("day", method!(RbExpr::day, 0))?;
351
- class.define_method("ordinal_day", method!(RbExpr::ordinal_day, 0))?;
352
- class.define_method("hour", method!(RbExpr::hour, 0))?;
353
- class.define_method("minute", method!(RbExpr::minute, 0))?;
354
- class.define_method("second", method!(RbExpr::second, 0))?;
355
- class.define_method("millisecond", method!(RbExpr::millisecond, 0))?;
356
- class.define_method("microsecond", method!(RbExpr::microsecond, 0))?;
357
- class.define_method("nanosecond", method!(RbExpr::nanosecond, 0))?;
390
+ class.define_method("list_lengths", method!(RbExpr::list_lengths, 0))?;
391
+ class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
392
+ class.define_method("list_count_match", method!(RbExpr::list_count_match, 1))?;
393
+ class.define_method("year", method!(RbExpr::dt_year, 0))?;
394
+ class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
395
+ class.define_method("iso_year", method!(RbExpr::dt_iso_year, 0))?;
396
+ class.define_method("quarter", method!(RbExpr::dt_quarter, 0))?;
397
+ class.define_method("month", method!(RbExpr::dt_month, 0))?;
398
+ class.define_method("week", method!(RbExpr::dt_week, 0))?;
399
+ class.define_method("weekday", method!(RbExpr::dt_weekday, 0))?;
400
+ class.define_method("day", method!(RbExpr::dt_day, 0))?;
401
+ class.define_method("ordinal_day", method!(RbExpr::dt_ordinal_day, 0))?;
402
+ class.define_method("dt_time", method!(RbExpr::dt_time, 0))?;
403
+ class.define_method("dt_date", method!(RbExpr::dt_date, 0))?;
404
+ class.define_method("dt_datetime", method!(RbExpr::dt_datetime, 0))?;
405
+ class.define_method("hour", method!(RbExpr::dt_hour, 0))?;
406
+ class.define_method("minute", method!(RbExpr::dt_minute, 0))?;
407
+ class.define_method("second", method!(RbExpr::dt_second, 0))?;
408
+ class.define_method("millisecond", method!(RbExpr::dt_millisecond, 0))?;
409
+ class.define_method("microsecond", method!(RbExpr::dt_microsecond, 0))?;
410
+ class.define_method("nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
358
411
  class.define_method("duration_days", method!(RbExpr::duration_days, 0))?;
359
412
  class.define_method("duration_hours", method!(RbExpr::duration_hours, 0))?;
360
413
  class.define_method("duration_minutes", method!(RbExpr::duration_minutes, 0))?;
@@ -371,7 +424,7 @@ fn init() -> RbResult<()> {
371
424
  "duration_milliseconds",
372
425
  method!(RbExpr::duration_milliseconds, 0),
373
426
  )?;
374
- class.define_method("timestamp", method!(RbExpr::timestamp, 1))?;
427
+ class.define_method("timestamp", method!(RbExpr::dt_timestamp, 1))?;
375
428
  class.define_method("dt_offset_by", method!(RbExpr::dt_offset_by, 1))?;
376
429
  class.define_method("dt_epoch_seconds", method!(RbExpr::dt_epoch_seconds, 0))?;
377
430
  class.define_method("dt_with_time_unit", method!(RbExpr::dt_with_time_unit, 1))?;
@@ -382,11 +435,14 @@ fn init() -> RbResult<()> {
382
435
  class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
383
436
  class.define_method(
384
437
  "dt_replace_time_zone",
385
- method!(RbExpr::dt_replace_time_zone, 1),
438
+ method!(RbExpr::dt_replace_time_zone, 2),
386
439
  )?;
387
440
  class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
388
441
  class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
442
+ class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
443
+ class.define_method("dt_month_end", method!(RbExpr::dt_month_end, 0))?;
389
444
  class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
445
+ class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
390
446
  class.define_method("map", method!(RbExpr::map, 3))?;
391
447
  class.define_method("dot", method!(RbExpr::dot, 1))?;
392
448
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
@@ -408,24 +464,25 @@ fn init() -> RbResult<()> {
408
464
  class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
409
465
  class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
410
466
  class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
411
- class.define_method("lst_max", method!(RbExpr::lst_max, 0))?;
412
- class.define_method("lst_min", method!(RbExpr::lst_min, 0))?;
413
- class.define_method("lst_sum", method!(RbExpr::lst_sum, 0))?;
414
- class.define_method("lst_mean", method!(RbExpr::lst_mean, 0))?;
415
- class.define_method("lst_sort", method!(RbExpr::lst_sort, 1))?;
416
- class.define_method("lst_reverse", method!(RbExpr::lst_reverse, 0))?;
417
- class.define_method("lst_unique", method!(RbExpr::lst_unique, 0))?;
418
- class.define_method("lst_get", method!(RbExpr::lst_get, 1))?;
419
- class.define_method("lst_join", method!(RbExpr::lst_join, 1))?;
420
- class.define_method("lst_arg_min", method!(RbExpr::lst_arg_min, 0))?;
421
- class.define_method("lst_arg_max", method!(RbExpr::lst_arg_max, 0))?;
422
- class.define_method("lst_diff", method!(RbExpr::lst_diff, 2))?;
423
- class.define_method("lst_shift", method!(RbExpr::lst_shift, 1))?;
424
- class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
425
- class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
467
+ class.define_method("list_max", method!(RbExpr::list_max, 0))?;
468
+ class.define_method("list_min", method!(RbExpr::list_min, 0))?;
469
+ class.define_method("list_sum", method!(RbExpr::list_sum, 0))?;
470
+ class.define_method("list_take", method!(RbExpr::list_take, 2))?;
471
+ class.define_method("list_mean", method!(RbExpr::list_mean, 0))?;
472
+ class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
473
+ class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
474
+ class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
475
+ class.define_method("list_get", method!(RbExpr::list_get, 1))?;
476
+ class.define_method("list_join", method!(RbExpr::list_join, 1))?;
477
+ class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
478
+ class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?;
479
+ class.define_method("list_diff", method!(RbExpr::list_diff, 2))?;
480
+ class.define_method("list_shift", method!(RbExpr::list_shift, 1))?;
481
+ class.define_method("list_slice", method!(RbExpr::list_slice, 2))?;
482
+ class.define_method("list_eval", method!(RbExpr::list_eval, 2))?;
426
483
  class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
427
- class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
428
- class.define_method("rank", method!(RbExpr::rank, 2))?;
484
+ class.define_method("list_to_struct", method!(RbExpr::list_to_struct, 3))?;
485
+ class.define_method("rank", method!(RbExpr::rank, 3))?;
429
486
  class.define_method("diff", method!(RbExpr::diff, 2))?;
430
487
  class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
431
488
  class.define_method("skew", method!(RbExpr::skew, 1))?;
@@ -464,31 +521,51 @@ fn init() -> RbResult<()> {
464
521
  // meta
465
522
  class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
466
523
  class.define_method("meta_eq", method!(RbExpr::meta_eq, 1))?;
467
- class.define_method("meta_roots", method!(RbExpr::meta_roots, 0))?;
524
+ class.define_method("meta_roots", method!(RbExpr::meta_root_names, 0))?;
468
525
  class.define_method("meta_output_name", method!(RbExpr::meta_output_name, 0))?;
469
526
  class.define_method("meta_undo_aliases", method!(RbExpr::meta_undo_aliases, 0))?;
527
+ class.define_method(
528
+ "meta_has_multiple_outputs",
529
+ method!(RbExpr::meta_has_multiple_outputs, 0),
530
+ )?;
531
+ class.define_method(
532
+ "meta_is_regex_projection",
533
+ method!(RbExpr::meta_is_regex_projection, 0),
534
+ )?;
470
535
 
471
536
  // maybe add to different class
472
- class.define_singleton_method("col", function!(crate::lazy::dsl::col, 1))?;
473
- class.define_singleton_method("count", function!(crate::lazy::dsl::count, 0))?;
474
- class.define_singleton_method("first", function!(crate::lazy::dsl::first, 0))?;
475
- class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
476
- class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
477
- class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
478
- class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
479
- class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
480
- class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
481
- class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
482
- class.define_singleton_method("pearson_corr", function!(crate::lazy::dsl::pearson_corr, 3))?;
537
+ class.define_singleton_method("col", function!(crate::functions::lazy::col, 1))?;
538
+ class.define_singleton_method("count", function!(crate::functions::lazy::count, 0))?;
539
+ class.define_singleton_method("first", function!(crate::functions::lazy::first, 0))?;
540
+ class.define_singleton_method("last", function!(crate::functions::lazy::last, 0))?;
541
+ class.define_singleton_method("cols", function!(crate::functions::lazy::cols, 1))?;
542
+ class.define_singleton_method("fold", function!(crate::functions::lazy::fold, 3))?;
543
+ class.define_singleton_method("cumfold", function!(crate::functions::lazy::cumfold, 4))?;
544
+ class.define_singleton_method("lit", function!(crate::functions::lazy::lit, 1))?;
545
+ class.define_singleton_method("arange", function!(crate::functions::lazy::arange, 3))?;
546
+ class.define_singleton_method("repeat", function!(crate::functions::lazy::repeat, 2))?;
547
+ class.define_singleton_method(
548
+ "pearson_corr",
549
+ function!(crate::functions::lazy::pearson_corr, 3),
550
+ )?;
483
551
  class.define_singleton_method(
484
552
  "spearman_rank_corr",
485
- function!(crate::lazy::dsl::spearman_rank_corr, 4),
553
+ function!(crate::functions::lazy::spearman_rank_corr, 4),
554
+ )?;
555
+ class.define_singleton_method("cov", function!(crate::functions::lazy::cov, 2))?;
556
+ class.define_singleton_method(
557
+ "arg_sort_by",
558
+ function!(crate::functions::lazy::arg_sort_by, 2),
559
+ )?;
560
+ class.define_singleton_method("when", function!(crate::functions::whenthen::when, 1))?;
561
+ class.define_singleton_method(
562
+ "concat_str",
563
+ function!(crate::functions::lazy::concat_str, 2),
564
+ )?;
565
+ class.define_singleton_method(
566
+ "concat_lst",
567
+ function!(crate::functions::lazy::concat_lst, 1),
486
568
  )?;
487
- class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
488
- class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
489
- class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
490
- class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
491
- class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
492
569
 
493
570
  let class = module.define_class("RbLazyFrame", Default::default())?;
494
571
  class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
@@ -572,16 +649,29 @@ fn init() -> RbResult<()> {
572
649
  class.define_singleton_method("new_opt_i64", function!(RbSeries::new_opt_i64, 3))?;
573
650
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
574
651
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
652
+ class.define_singleton_method(
653
+ "new_from_anyvalues",
654
+ function!(RbSeries::new_from_anyvalues, 3),
655
+ )?;
575
656
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
576
657
  class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
658
+ class.define_singleton_method("new_null", function!(RbSeries::new_null, 3))?;
577
659
  class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
578
- class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
579
- class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
580
- class.define_singleton_method("new_opt_datetime", function!(RbSeries::new_opt_datetime, 3))?;
581
- class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
660
+ class.define_singleton_method("new_series_list", function!(RbSeries::new_series_list, 3))?;
661
+ class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
662
+ class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
663
+ class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
664
+ class.define_method(
665
+ "is_sorted_flag",
666
+ method!(RbSeries::is_sorted_ascending_flag, 0),
667
+ )?;
582
668
  class.define_method(
583
669
  "is_sorted_reverse_flag",
584
- method!(RbSeries::is_sorted_reverse_flag, 0),
670
+ method!(RbSeries::is_sorted_descending_flag, 0),
671
+ )?;
672
+ class.define_method(
673
+ "can_fast_explode_flag",
674
+ method!(RbSeries::can_fast_explode_flag, 0),
585
675
  )?;
586
676
  class.define_method("estimated_size", method!(RbSeries::estimated_size, 0))?;
587
677
  class.define_method("get_fmt", method!(RbSeries::get_fmt, 2))?;
@@ -595,7 +685,7 @@ fn init() -> RbResult<()> {
595
685
  class.define_method("rename", method!(RbSeries::rename, 1))?;
596
686
  class.define_method("dtype", method!(RbSeries::dtype, 0))?;
597
687
  class.define_method("inner_dtype", method!(RbSeries::inner_dtype, 0))?;
598
- class.define_method("set_sorted", method!(RbSeries::set_sorted, 1))?;
688
+ class.define_method("set_sorted", method!(RbSeries::set_sorted_flag, 1))?;
599
689
  class.define_method("mean", method!(RbSeries::mean, 0))?;
600
690
  class.define_method("max", method!(RbSeries::max, 0))?;
601
691
  class.define_method("min", method!(RbSeries::min, 0))?;
@@ -732,7 +822,6 @@ fn init() -> RbResult<()> {
732
822
  class.define_method("eq_i64", method!(RbSeries::eq_i64, 1))?;
733
823
  class.define_method("eq_f32", method!(RbSeries::eq_f32, 1))?;
734
824
  class.define_method("eq_f64", method!(RbSeries::eq_f64, 1))?;
735
- // class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
736
825
 
737
826
  // neq
738
827
  class.define_method("neq_u8", method!(RbSeries::neq_u8, 1))?;
@@ -745,7 +834,6 @@ fn init() -> RbResult<()> {
745
834
  class.define_method("neq_i64", method!(RbSeries::neq_i64, 1))?;
746
835
  class.define_method("neq_f32", method!(RbSeries::neq_f32, 1))?;
747
836
  class.define_method("neq_f64", method!(RbSeries::neq_f64, 1))?;
748
- // class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
749
837
 
750
838
  // gt
751
839
  class.define_method("gt_u8", method!(RbSeries::gt_u8, 1))?;
@@ -758,7 +846,6 @@ fn init() -> RbResult<()> {
758
846
  class.define_method("gt_i64", method!(RbSeries::gt_i64, 1))?;
759
847
  class.define_method("gt_f32", method!(RbSeries::gt_f32, 1))?;
760
848
  class.define_method("gt_f64", method!(RbSeries::gt_f64, 1))?;
761
- // class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
762
849
 
763
850
  // gt_eq
764
851
  class.define_method("gt_eq_u8", method!(RbSeries::gt_eq_u8, 1))?;
@@ -771,7 +858,6 @@ fn init() -> RbResult<()> {
771
858
  class.define_method("gt_eq_i64", method!(RbSeries::gt_eq_i64, 1))?;
772
859
  class.define_method("gt_eq_f32", method!(RbSeries::gt_eq_f32, 1))?;
773
860
  class.define_method("gt_eq_f64", method!(RbSeries::gt_eq_f64, 1))?;
774
- // class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
775
861
 
776
862
  // lt
777
863
  class.define_method("lt_u8", method!(RbSeries::lt_u8, 1))?;
@@ -784,7 +870,6 @@ fn init() -> RbResult<()> {
784
870
  class.define_method("lt_i64", method!(RbSeries::lt_i64, 1))?;
785
871
  class.define_method("lt_f32", method!(RbSeries::lt_f32, 1))?;
786
872
  class.define_method("lt_f64", method!(RbSeries::lt_f64, 1))?;
787
- // class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
788
873
 
789
874
  // lt_eq
790
875
  class.define_method("lt_eq_u8", method!(RbSeries::lt_eq_u8, 1))?;
@@ -797,11 +882,21 @@ fn init() -> RbResult<()> {
797
882
  class.define_method("lt_eq_i64", method!(RbSeries::lt_eq_i64, 1))?;
798
883
  class.define_method("lt_eq_f32", method!(RbSeries::lt_eq_f32, 1))?;
799
884
  class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
800
- // class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
885
+
886
+ // str comp
887
+ class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
888
+ class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
889
+ class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
890
+ class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
891
+ class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
892
+ class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
801
893
 
802
894
  // npy
803
895
  class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
804
896
 
897
+ // extra
898
+ class.define_method("extend_constant", method!(RbSeries::extend_constant, 2))?;
899
+
805
900
  let class = module.define_class("RbWhen", Default::default())?;
806
901
  class.define_method("_then", method!(RbWhen::then, 1))?;
807
902
 
@@ -810,196 +905,3 @@ fn init() -> RbResult<()> {
810
905
 
811
906
  Ok(())
812
907
  }
813
-
814
- fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
815
- let dtypes = dtypes
816
- .each()
817
- .map(|v| v?.try_convert::<Wrap<DataType>>())
818
- .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
819
- let dtypes = vec_extract_wrapped(dtypes);
820
- Ok(crate::lazy::dsl::dtype_cols(dtypes))
821
- }
822
-
823
- #[allow(clippy::too_many_arguments)]
824
- fn rb_duration(
825
- days: Option<&RbExpr>,
826
- seconds: Option<&RbExpr>,
827
- nanoseconds: Option<&RbExpr>,
828
- microseconds: Option<&RbExpr>,
829
- milliseconds: Option<&RbExpr>,
830
- minutes: Option<&RbExpr>,
831
- hours: Option<&RbExpr>,
832
- weeks: Option<&RbExpr>,
833
- ) -> RbExpr {
834
- let args = DurationArgs {
835
- days: days.map(|e| e.inner.clone()),
836
- seconds: seconds.map(|e| e.inner.clone()),
837
- nanoseconds: nanoseconds.map(|e| e.inner.clone()),
838
- microseconds: microseconds.map(|e| e.inner.clone()),
839
- milliseconds: milliseconds.map(|e| e.inner.clone()),
840
- minutes: minutes.map(|e| e.inner.clone()),
841
- hours: hours.map(|e| e.inner.clone()),
842
- weeks: weeks.map(|e| e.inner.clone()),
843
- };
844
-
845
- polars::lazy::dsl::duration(args).into()
846
- }
847
-
848
- fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
849
- let mut iter = seq.each();
850
- let first = iter.next().unwrap()?;
851
-
852
- let first_rdf = get_df(first)?;
853
- let identity_df = first_rdf.slice(0, 0);
854
-
855
- let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
856
-
857
- for item in iter {
858
- let rdf = get_df(item?)?;
859
- rdfs.push(Ok(rdf));
860
- }
861
-
862
- let identity = Ok(identity_df);
863
-
864
- let df = rdfs
865
- .into_iter()
866
- .fold(identity, |acc: PolarsResult<DataFrame>, df| {
867
- let mut acc = acc?;
868
- acc.vstack_mut(&df?)?;
869
- Ok(acc)
870
- })
871
- .map_err(RbPolarsErr::from)?;
872
-
873
- Ok(df.into())
874
- }
875
-
876
- fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
877
- let (seq, len) = get_rbseq(lfs)?;
878
- let mut lfs = Vec::with_capacity(len);
879
-
880
- for res in seq.each() {
881
- let item = res?;
882
- let lf = get_lf(item)?;
883
- lfs.push(lf);
884
- }
885
-
886
- let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
887
- Ok(lf.into())
888
- }
889
-
890
- fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
891
- let mut dfs = Vec::new();
892
- for item in seq.each() {
893
- dfs.push(get_df(item?)?);
894
- }
895
- let df = diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
896
- Ok(df.into())
897
- }
898
-
899
- fn rb_hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
900
- let mut dfs = Vec::new();
901
- for item in seq.each() {
902
- dfs.push(get_df(item?)?);
903
- }
904
- let df = hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
905
- Ok(df.into())
906
- }
907
-
908
- fn concat_series(seq: RArray) -> RbResult<RbSeries> {
909
- let mut iter = seq.each();
910
- let first = iter.next().unwrap()?;
911
-
912
- let mut s = get_series(first)?;
913
-
914
- for res in iter {
915
- let item = res?;
916
- let item = get_series(item)?;
917
- s.append(&item).map_err(RbPolarsErr::from)?;
918
- }
919
- Ok(s.into())
920
- }
921
-
922
- fn ipc_schema(rb_f: Value) -> RbResult<Value> {
923
- use polars::export::arrow::io::ipc::read::read_file_metadata;
924
- let mut r = get_file_like(rb_f, false)?;
925
- let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
926
-
927
- let dict = RHash::new();
928
- for field in metadata.schema.fields {
929
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
930
- dict.aset(field.name, dt)?;
931
- }
932
- Ok(dict.into())
933
- }
934
-
935
- fn parquet_schema(rb_f: Value) -> RbResult<Value> {
936
- use polars::export::arrow::io::parquet::read::{infer_schema, read_metadata};
937
-
938
- let mut r = get_file_like(rb_f, false)?;
939
- let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
940
- let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
941
-
942
- let dict = RHash::new();
943
- for field in arrow_schema.fields {
944
- let dt: Wrap<DataType> = Wrap((&field.data_type).into());
945
- dict.aset(field.name, dt)?;
946
- }
947
- Ok(dict.into())
948
- }
949
-
950
- fn collect_all(lfs: RArray) -> RbResult<RArray> {
951
- let lfs = lfs
952
- .each()
953
- .map(|v| v?.try_convert::<&RbLazyFrame>())
954
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
955
-
956
- Ok(RArray::from_iter(lfs.iter().map(|lf| {
957
- let df = lf.ldf.clone().collect().unwrap();
958
- RbDataFrame::new(df)
959
- })))
960
- }
961
-
962
- fn rb_date_range(
963
- start: i64,
964
- stop: i64,
965
- every: String,
966
- closed: Wrap<ClosedWindow>,
967
- name: String,
968
- tu: Wrap<TimeUnit>,
969
- tz: Option<TimeZone>,
970
- ) -> RbResult<RbSeries> {
971
- let date_range = polars::time::date_range_impl(
972
- &name,
973
- start,
974
- stop,
975
- Duration::parse(&every),
976
- closed.0,
977
- tu.0,
978
- tz.as_ref(),
979
- )
980
- .map_err(RbPolarsErr::from)?;
981
- Ok(date_range.into_series().into())
982
- }
983
-
984
- fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
985
- let exprs = rb_exprs_to_exprs(exprs)?;
986
- Ok(polars::lazy::dsl::coalesce(&exprs).into())
987
- }
988
-
989
- fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
990
- let exprs = rb_exprs_to_exprs(exprs)?;
991
- Ok(polars::lazy::dsl::sum_exprs(exprs).into())
992
- }
993
-
994
- fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
995
- let exprs = rb_exprs_to_exprs(exprs)?;
996
- Ok(polars::lazy::dsl::as_struct(&exprs).into())
997
- }
998
-
999
- fn arg_where(condition: &RbExpr) -> RbExpr {
1000
- polars::lazy::dsl::arg_where(condition.inner.clone()).into()
1001
- }
1002
-
1003
- fn get_idx_type() -> Value {
1004
- Wrap(IDX_DTYPE).into_value()
1005
- }