polars-df 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +106 -4
- data/ext/polars/src/dataframe.rs +19 -17
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +933 -0
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +205 -303
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +99 -539
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +26 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/ext/polars/src/lib.rs
CHANGED
@@ -3,31 +3,27 @@ mod batched_csv;
|
|
3
3
|
mod conversion;
|
4
4
|
mod dataframe;
|
5
5
|
mod error;
|
6
|
+
mod expr;
|
6
7
|
mod file;
|
7
|
-
mod
|
8
|
-
mod
|
9
|
-
mod
|
8
|
+
mod functions;
|
9
|
+
mod lazyframe;
|
10
|
+
mod lazygroupby;
|
10
11
|
mod object;
|
11
12
|
mod prelude;
|
12
13
|
pub(crate) mod rb_modules;
|
13
14
|
mod series;
|
14
|
-
mod set;
|
15
15
|
mod utils;
|
16
16
|
|
17
17
|
use batched_csv::RbBatchedCsv;
|
18
18
|
use conversion::*;
|
19
19
|
use dataframe::RbDataFrame;
|
20
20
|
use error::{RbPolarsErr, RbValueError};
|
21
|
-
use
|
22
|
-
use
|
23
|
-
use
|
24
|
-
use
|
25
|
-
use
|
26
|
-
use
|
27
|
-
use polars::error::PolarsResult;
|
28
|
-
use polars::frame::DataFrame;
|
29
|
-
use polars::functions::{diag_concat_df, hor_concat_df};
|
30
|
-
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
21
|
+
use expr::rb_exprs_to_exprs;
|
22
|
+
use expr::RbExpr;
|
23
|
+
use functions::whenthen::{RbWhen, RbWhenThen};
|
24
|
+
use lazyframe::RbLazyFrame;
|
25
|
+
use lazygroupby::RbLazyGroupBy;
|
26
|
+
use magnus::{define_module, function, method, prelude::*, Error};
|
31
27
|
use series::RbSeries;
|
32
28
|
|
33
29
|
#[cfg(target_os = "linux")]
|
@@ -49,22 +45,70 @@ type RbResult<T> = Result<T, Error>;
|
|
49
45
|
#[magnus::init]
|
50
46
|
fn init() -> RbResult<()> {
|
51
47
|
let module = define_module("Polars")?;
|
52
|
-
module.define_singleton_method(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
module.define_singleton_method(
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
module.define_singleton_method(
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
module.define_singleton_method(
|
65
|
-
|
66
|
-
|
67
|
-
|
48
|
+
module.define_singleton_method(
|
49
|
+
"_dtype_cols",
|
50
|
+
function!(crate::functions::lazy::dtype_cols2, 1),
|
51
|
+
)?;
|
52
|
+
module.define_singleton_method(
|
53
|
+
"_rb_duration",
|
54
|
+
function!(crate::functions::lazy::duration, 8),
|
55
|
+
)?;
|
56
|
+
module.define_singleton_method(
|
57
|
+
"_concat_df",
|
58
|
+
function!(crate::functions::eager::concat_df, 1),
|
59
|
+
)?;
|
60
|
+
module.define_singleton_method(
|
61
|
+
"_concat_lf",
|
62
|
+
function!(crate::functions::lazy::concat_lf, 3),
|
63
|
+
)?;
|
64
|
+
module.define_singleton_method(
|
65
|
+
"_diag_concat_df",
|
66
|
+
function!(crate::functions::eager::diag_concat_df, 1),
|
67
|
+
)?;
|
68
|
+
module.define_singleton_method(
|
69
|
+
"_hor_concat_df",
|
70
|
+
function!(crate::functions::eager::hor_concat_df, 1),
|
71
|
+
)?;
|
72
|
+
module.define_singleton_method(
|
73
|
+
"_concat_series",
|
74
|
+
function!(crate::functions::eager::concat_series, 1),
|
75
|
+
)?;
|
76
|
+
module.define_singleton_method(
|
77
|
+
"_ipc_schema",
|
78
|
+
function!(crate::functions::io::read_ipc_schema, 1),
|
79
|
+
)?;
|
80
|
+
module.define_singleton_method(
|
81
|
+
"_parquet_schema",
|
82
|
+
function!(crate::functions::io::read_parquet_schema, 1),
|
83
|
+
)?;
|
84
|
+
module.define_singleton_method(
|
85
|
+
"_collect_all",
|
86
|
+
function!(crate::functions::lazy::collect_all, 1),
|
87
|
+
)?;
|
88
|
+
module.define_singleton_method(
|
89
|
+
"_rb_date_range",
|
90
|
+
function!(crate::functions::eager::date_range, 7),
|
91
|
+
)?;
|
92
|
+
module.define_singleton_method(
|
93
|
+
"_coalesce_exprs",
|
94
|
+
function!(crate::functions::lazy::coalesce, 1),
|
95
|
+
)?;
|
96
|
+
module.define_singleton_method(
|
97
|
+
"_sum_exprs",
|
98
|
+
function!(crate::functions::lazy::sum_exprs, 1),
|
99
|
+
)?;
|
100
|
+
module.define_singleton_method(
|
101
|
+
"_as_struct",
|
102
|
+
function!(crate::functions::lazy::as_struct, 1),
|
103
|
+
)?;
|
104
|
+
module.define_singleton_method(
|
105
|
+
"_arg_where",
|
106
|
+
function!(crate::functions::lazy::arg_where, 1),
|
107
|
+
)?;
|
108
|
+
module.define_singleton_method(
|
109
|
+
"_get_idx_type",
|
110
|
+
function!(crate::functions::meta::get_idx_type, 0),
|
111
|
+
)?;
|
68
112
|
|
69
113
|
let class = module.define_class("RbBatchedCsv", Default::default())?;
|
70
114
|
class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
|
@@ -209,7 +253,7 @@ fn init() -> RbResult<()> {
|
|
209
253
|
class.define_method("unique_stable", method!(RbExpr::unique_stable, 0))?;
|
210
254
|
class.define_method("first", method!(RbExpr::first, 0))?;
|
211
255
|
class.define_method("last", method!(RbExpr::last, 0))?;
|
212
|
-
class.define_method("
|
256
|
+
class.define_method("implode", method!(RbExpr::implode, 0))?;
|
213
257
|
class.define_method("quantile", method!(RbExpr::quantile, 2))?;
|
214
258
|
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
215
259
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
@@ -219,7 +263,8 @@ fn init() -> RbResult<()> {
|
|
219
263
|
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
220
264
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
221
265
|
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
222
|
-
class.define_method("top_k", method!(RbExpr::top_k,
|
266
|
+
class.define_method("top_k", method!(RbExpr::top_k, 1))?;
|
267
|
+
class.define_method("bottom_k", method!(RbExpr::bottom_k, 1))?;
|
223
268
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
224
269
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
225
270
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
@@ -242,6 +287,7 @@ fn init() -> RbResult<()> {
|
|
242
287
|
class.define_method("std", method!(RbExpr::std, 1))?;
|
243
288
|
class.define_method("var", method!(RbExpr::var, 1))?;
|
244
289
|
class.define_method("is_unique", method!(RbExpr::is_unique, 0))?;
|
290
|
+
class.define_method("approx_unique", method!(RbExpr::approx_unique, 0))?;
|
245
291
|
class.define_method("is_first", method!(RbExpr::is_first, 0))?;
|
246
292
|
class.define_method("explode", method!(RbExpr::explode, 0))?;
|
247
293
|
class.define_method("take_every", method!(RbExpr::take_every, 1))?;
|
@@ -284,9 +330,9 @@ fn init() -> RbResult<()> {
|
|
284
330
|
class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
|
285
331
|
class.define_method("product", method!(RbExpr::product, 0))?;
|
286
332
|
class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
|
287
|
-
class.define_method("str_parse_date", method!(RbExpr::
|
288
|
-
class.define_method("str_parse_datetime", method!(RbExpr::
|
289
|
-
class.define_method("str_parse_time", method!(RbExpr::
|
333
|
+
class.define_method("str_parse_date", method!(RbExpr::str_to_date, 4))?;
|
334
|
+
class.define_method("str_parse_datetime", method!(RbExpr::str_to_datetime, 8))?;
|
335
|
+
class.define_method("str_parse_time", method!(RbExpr::str_to_time, 3))?;
|
290
336
|
class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
|
291
337
|
class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
|
292
338
|
class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
|
@@ -295,7 +341,7 @@ fn init() -> RbResult<()> {
|
|
295
341
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
296
342
|
class.define_method("str_lengths", method!(RbExpr::str_lengths, 0))?;
|
297
343
|
class.define_method("str_n_chars", method!(RbExpr::str_n_chars, 0))?;
|
298
|
-
class.define_method("
|
344
|
+
class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
|
299
345
|
class.define_method("str_replace_all", method!(RbExpr::str_replace_all, 3))?;
|
300
346
|
class.define_method("str_zfill", method!(RbExpr::str_zfill, 1))?;
|
301
347
|
class.define_method("str_ljust", method!(RbExpr::str_ljust, 2))?;
|
@@ -303,22 +349,24 @@ fn init() -> RbResult<()> {
|
|
303
349
|
class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
|
304
350
|
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
305
351
|
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
306
|
-
class.define_method("binary_contains", method!(RbExpr::
|
307
|
-
class.define_method("binary_ends_with", method!(RbExpr::
|
308
|
-
class.define_method("binary_starts_with", method!(RbExpr::
|
352
|
+
class.define_method("binary_contains", method!(RbExpr::bin_contains, 1))?;
|
353
|
+
class.define_method("binary_ends_with", method!(RbExpr::bin_ends_with, 1))?;
|
354
|
+
class.define_method("binary_starts_with", method!(RbExpr::bin_starts_with, 1))?;
|
309
355
|
class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
|
310
356
|
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
311
357
|
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
312
358
|
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
313
|
-
class.define_method("
|
314
|
-
class.define_method("
|
359
|
+
class.define_method("str_parse_int", method!(RbExpr::str_parse_int, 2))?;
|
360
|
+
class.define_method("str_json_extract", method!(RbExpr::str_json_extract, 1))?;
|
361
|
+
class.define_method("binary_hex_encode", method!(RbExpr::bin_hex_encode, 0))?;
|
362
|
+
class.define_method("binary_hex_decode", method!(RbExpr::bin_hex_decode, 1))?;
|
315
363
|
class.define_method(
|
316
364
|
"binary_base64_encode",
|
317
|
-
method!(RbExpr::
|
365
|
+
method!(RbExpr::bin_base64_encode, 0),
|
318
366
|
)?;
|
319
367
|
class.define_method(
|
320
368
|
"binary_base64_decode",
|
321
|
-
method!(RbExpr::
|
369
|
+
method!(RbExpr::bin_base64_decode, 1),
|
322
370
|
)?;
|
323
371
|
class.define_method(
|
324
372
|
"str_json_path_match",
|
@@ -326,8 +374,8 @@ fn init() -> RbResult<()> {
|
|
326
374
|
)?;
|
327
375
|
class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
|
328
376
|
class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
|
329
|
-
class.define_method("count_match", method!(RbExpr::
|
330
|
-
class.define_method("strftime", method!(RbExpr::
|
377
|
+
class.define_method("count_match", method!(RbExpr::str_count_match, 1))?;
|
378
|
+
class.define_method("strftime", method!(RbExpr::dt_to_string, 1))?;
|
331
379
|
class.define_method("str_split", method!(RbExpr::str_split, 1))?;
|
332
380
|
class.define_method(
|
333
381
|
"str_split_inclusive",
|
@@ -339,22 +387,27 @@ fn init() -> RbResult<()> {
|
|
339
387
|
method!(RbExpr::str_split_exact_inclusive, 2),
|
340
388
|
)?;
|
341
389
|
class.define_method("str_splitn", method!(RbExpr::str_splitn, 2))?;
|
342
|
-
class.define_method("
|
343
|
-
class.define_method("
|
344
|
-
class.define_method("
|
345
|
-
class.define_method("
|
346
|
-
class.define_method("
|
347
|
-
class.define_method("
|
348
|
-
class.define_method("
|
349
|
-
class.define_method("
|
350
|
-
class.define_method("
|
351
|
-
class.define_method("
|
352
|
-
class.define_method("
|
353
|
-
class.define_method("
|
354
|
-
class.define_method("
|
355
|
-
class.define_method("
|
356
|
-
class.define_method("
|
357
|
-
class.define_method("
|
390
|
+
class.define_method("list_lengths", method!(RbExpr::list_lengths, 0))?;
|
391
|
+
class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
|
392
|
+
class.define_method("list_count_match", method!(RbExpr::list_count_match, 1))?;
|
393
|
+
class.define_method("year", method!(RbExpr::dt_year, 0))?;
|
394
|
+
class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
|
395
|
+
class.define_method("iso_year", method!(RbExpr::dt_iso_year, 0))?;
|
396
|
+
class.define_method("quarter", method!(RbExpr::dt_quarter, 0))?;
|
397
|
+
class.define_method("month", method!(RbExpr::dt_month, 0))?;
|
398
|
+
class.define_method("week", method!(RbExpr::dt_week, 0))?;
|
399
|
+
class.define_method("weekday", method!(RbExpr::dt_weekday, 0))?;
|
400
|
+
class.define_method("day", method!(RbExpr::dt_day, 0))?;
|
401
|
+
class.define_method("ordinal_day", method!(RbExpr::dt_ordinal_day, 0))?;
|
402
|
+
class.define_method("dt_time", method!(RbExpr::dt_time, 0))?;
|
403
|
+
class.define_method("dt_date", method!(RbExpr::dt_date, 0))?;
|
404
|
+
class.define_method("dt_datetime", method!(RbExpr::dt_datetime, 0))?;
|
405
|
+
class.define_method("hour", method!(RbExpr::dt_hour, 0))?;
|
406
|
+
class.define_method("minute", method!(RbExpr::dt_minute, 0))?;
|
407
|
+
class.define_method("second", method!(RbExpr::dt_second, 0))?;
|
408
|
+
class.define_method("millisecond", method!(RbExpr::dt_millisecond, 0))?;
|
409
|
+
class.define_method("microsecond", method!(RbExpr::dt_microsecond, 0))?;
|
410
|
+
class.define_method("nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
|
358
411
|
class.define_method("duration_days", method!(RbExpr::duration_days, 0))?;
|
359
412
|
class.define_method("duration_hours", method!(RbExpr::duration_hours, 0))?;
|
360
413
|
class.define_method("duration_minutes", method!(RbExpr::duration_minutes, 0))?;
|
@@ -371,7 +424,7 @@ fn init() -> RbResult<()> {
|
|
371
424
|
"duration_milliseconds",
|
372
425
|
method!(RbExpr::duration_milliseconds, 0),
|
373
426
|
)?;
|
374
|
-
class.define_method("timestamp", method!(RbExpr::
|
427
|
+
class.define_method("timestamp", method!(RbExpr::dt_timestamp, 1))?;
|
375
428
|
class.define_method("dt_offset_by", method!(RbExpr::dt_offset_by, 1))?;
|
376
429
|
class.define_method("dt_epoch_seconds", method!(RbExpr::dt_epoch_seconds, 0))?;
|
377
430
|
class.define_method("dt_with_time_unit", method!(RbExpr::dt_with_time_unit, 1))?;
|
@@ -382,11 +435,14 @@ fn init() -> RbResult<()> {
|
|
382
435
|
class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
|
383
436
|
class.define_method(
|
384
437
|
"dt_replace_time_zone",
|
385
|
-
method!(RbExpr::dt_replace_time_zone,
|
438
|
+
method!(RbExpr::dt_replace_time_zone, 2),
|
386
439
|
)?;
|
387
440
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
388
441
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
442
|
+
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
443
|
+
class.define_method("dt_month_end", method!(RbExpr::dt_month_end, 0))?;
|
389
444
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
445
|
+
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
390
446
|
class.define_method("map", method!(RbExpr::map, 3))?;
|
391
447
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
392
448
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
@@ -408,24 +464,25 @@ fn init() -> RbResult<()> {
|
|
408
464
|
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
409
465
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
410
466
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
411
|
-
class.define_method("
|
412
|
-
class.define_method("
|
413
|
-
class.define_method("
|
414
|
-
class.define_method("
|
415
|
-
class.define_method("
|
416
|
-
class.define_method("
|
417
|
-
class.define_method("
|
418
|
-
class.define_method("
|
419
|
-
class.define_method("
|
420
|
-
class.define_method("
|
421
|
-
class.define_method("
|
422
|
-
class.define_method("
|
423
|
-
class.define_method("
|
424
|
-
class.define_method("
|
425
|
-
class.define_method("
|
467
|
+
class.define_method("list_max", method!(RbExpr::list_max, 0))?;
|
468
|
+
class.define_method("list_min", method!(RbExpr::list_min, 0))?;
|
469
|
+
class.define_method("list_sum", method!(RbExpr::list_sum, 0))?;
|
470
|
+
class.define_method("list_take", method!(RbExpr::list_take, 2))?;
|
471
|
+
class.define_method("list_mean", method!(RbExpr::list_mean, 0))?;
|
472
|
+
class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
|
473
|
+
class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
|
474
|
+
class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
|
475
|
+
class.define_method("list_get", method!(RbExpr::list_get, 1))?;
|
476
|
+
class.define_method("list_join", method!(RbExpr::list_join, 1))?;
|
477
|
+
class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
|
478
|
+
class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?;
|
479
|
+
class.define_method("list_diff", method!(RbExpr::list_diff, 2))?;
|
480
|
+
class.define_method("list_shift", method!(RbExpr::list_shift, 1))?;
|
481
|
+
class.define_method("list_slice", method!(RbExpr::list_slice, 2))?;
|
482
|
+
class.define_method("list_eval", method!(RbExpr::list_eval, 2))?;
|
426
483
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
427
|
-
class.define_method("
|
428
|
-
class.define_method("rank", method!(RbExpr::rank,
|
484
|
+
class.define_method("list_to_struct", method!(RbExpr::list_to_struct, 3))?;
|
485
|
+
class.define_method("rank", method!(RbExpr::rank, 3))?;
|
429
486
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
430
487
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
431
488
|
class.define_method("skew", method!(RbExpr::skew, 1))?;
|
@@ -464,31 +521,51 @@ fn init() -> RbResult<()> {
|
|
464
521
|
// meta
|
465
522
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
466
523
|
class.define_method("meta_eq", method!(RbExpr::meta_eq, 1))?;
|
467
|
-
class.define_method("meta_roots", method!(RbExpr::
|
524
|
+
class.define_method("meta_roots", method!(RbExpr::meta_root_names, 0))?;
|
468
525
|
class.define_method("meta_output_name", method!(RbExpr::meta_output_name, 0))?;
|
469
526
|
class.define_method("meta_undo_aliases", method!(RbExpr::meta_undo_aliases, 0))?;
|
527
|
+
class.define_method(
|
528
|
+
"meta_has_multiple_outputs",
|
529
|
+
method!(RbExpr::meta_has_multiple_outputs, 0),
|
530
|
+
)?;
|
531
|
+
class.define_method(
|
532
|
+
"meta_is_regex_projection",
|
533
|
+
method!(RbExpr::meta_is_regex_projection, 0),
|
534
|
+
)?;
|
470
535
|
|
471
536
|
// maybe add to different class
|
472
|
-
class.define_singleton_method("col", function!(crate::lazy::
|
473
|
-
class.define_singleton_method("count", function!(crate::lazy::
|
474
|
-
class.define_singleton_method("first", function!(crate::lazy::
|
475
|
-
class.define_singleton_method("last", function!(crate::lazy::
|
476
|
-
class.define_singleton_method("cols", function!(crate::lazy::
|
477
|
-
class.define_singleton_method("fold", function!(crate::lazy::
|
478
|
-
class.define_singleton_method("cumfold", function!(crate::lazy::
|
479
|
-
class.define_singleton_method("lit", function!(crate::lazy::
|
480
|
-
class.define_singleton_method("arange", function!(crate::lazy::
|
481
|
-
class.define_singleton_method("repeat", function!(crate::lazy::
|
482
|
-
class.define_singleton_method(
|
537
|
+
class.define_singleton_method("col", function!(crate::functions::lazy::col, 1))?;
|
538
|
+
class.define_singleton_method("count", function!(crate::functions::lazy::count, 0))?;
|
539
|
+
class.define_singleton_method("first", function!(crate::functions::lazy::first, 0))?;
|
540
|
+
class.define_singleton_method("last", function!(crate::functions::lazy::last, 0))?;
|
541
|
+
class.define_singleton_method("cols", function!(crate::functions::lazy::cols, 1))?;
|
542
|
+
class.define_singleton_method("fold", function!(crate::functions::lazy::fold, 3))?;
|
543
|
+
class.define_singleton_method("cumfold", function!(crate::functions::lazy::cumfold, 4))?;
|
544
|
+
class.define_singleton_method("lit", function!(crate::functions::lazy::lit, 1))?;
|
545
|
+
class.define_singleton_method("arange", function!(crate::functions::lazy::arange, 3))?;
|
546
|
+
class.define_singleton_method("repeat", function!(crate::functions::lazy::repeat, 2))?;
|
547
|
+
class.define_singleton_method(
|
548
|
+
"pearson_corr",
|
549
|
+
function!(crate::functions::lazy::pearson_corr, 3),
|
550
|
+
)?;
|
483
551
|
class.define_singleton_method(
|
484
552
|
"spearman_rank_corr",
|
485
|
-
function!(crate::lazy::
|
553
|
+
function!(crate::functions::lazy::spearman_rank_corr, 4),
|
554
|
+
)?;
|
555
|
+
class.define_singleton_method("cov", function!(crate::functions::lazy::cov, 2))?;
|
556
|
+
class.define_singleton_method(
|
557
|
+
"arg_sort_by",
|
558
|
+
function!(crate::functions::lazy::arg_sort_by, 2),
|
559
|
+
)?;
|
560
|
+
class.define_singleton_method("when", function!(crate::functions::whenthen::when, 1))?;
|
561
|
+
class.define_singleton_method(
|
562
|
+
"concat_str",
|
563
|
+
function!(crate::functions::lazy::concat_str, 2),
|
564
|
+
)?;
|
565
|
+
class.define_singleton_method(
|
566
|
+
"concat_lst",
|
567
|
+
function!(crate::functions::lazy::concat_lst, 1),
|
486
568
|
)?;
|
487
|
-
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
488
|
-
class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
|
489
|
-
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
490
|
-
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
491
|
-
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
492
569
|
|
493
570
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
494
571
|
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
@@ -572,16 +649,29 @@ fn init() -> RbResult<()> {
|
|
572
649
|
class.define_singleton_method("new_opt_i64", function!(RbSeries::new_opt_i64, 3))?;
|
573
650
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
574
651
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
652
|
+
class.define_singleton_method(
|
653
|
+
"new_from_anyvalues",
|
654
|
+
function!(RbSeries::new_from_anyvalues, 3),
|
655
|
+
)?;
|
575
656
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
576
657
|
class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
|
658
|
+
class.define_singleton_method("new_null", function!(RbSeries::new_null, 3))?;
|
577
659
|
class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
|
578
|
-
class.define_singleton_method("
|
579
|
-
class.define_singleton_method("
|
580
|
-
class.define_singleton_method("
|
581
|
-
class.define_method("
|
660
|
+
class.define_singleton_method("new_series_list", function!(RbSeries::new_series_list, 3))?;
|
661
|
+
class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
|
662
|
+
class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
|
663
|
+
class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
|
664
|
+
class.define_method(
|
665
|
+
"is_sorted_flag",
|
666
|
+
method!(RbSeries::is_sorted_ascending_flag, 0),
|
667
|
+
)?;
|
582
668
|
class.define_method(
|
583
669
|
"is_sorted_reverse_flag",
|
584
|
-
method!(RbSeries::
|
670
|
+
method!(RbSeries::is_sorted_descending_flag, 0),
|
671
|
+
)?;
|
672
|
+
class.define_method(
|
673
|
+
"can_fast_explode_flag",
|
674
|
+
method!(RbSeries::can_fast_explode_flag, 0),
|
585
675
|
)?;
|
586
676
|
class.define_method("estimated_size", method!(RbSeries::estimated_size, 0))?;
|
587
677
|
class.define_method("get_fmt", method!(RbSeries::get_fmt, 2))?;
|
@@ -595,7 +685,7 @@ fn init() -> RbResult<()> {
|
|
595
685
|
class.define_method("rename", method!(RbSeries::rename, 1))?;
|
596
686
|
class.define_method("dtype", method!(RbSeries::dtype, 0))?;
|
597
687
|
class.define_method("inner_dtype", method!(RbSeries::inner_dtype, 0))?;
|
598
|
-
class.define_method("set_sorted", method!(RbSeries::
|
688
|
+
class.define_method("set_sorted", method!(RbSeries::set_sorted_flag, 1))?;
|
599
689
|
class.define_method("mean", method!(RbSeries::mean, 0))?;
|
600
690
|
class.define_method("max", method!(RbSeries::max, 0))?;
|
601
691
|
class.define_method("min", method!(RbSeries::min, 0))?;
|
@@ -732,7 +822,6 @@ fn init() -> RbResult<()> {
|
|
732
822
|
class.define_method("eq_i64", method!(RbSeries::eq_i64, 1))?;
|
733
823
|
class.define_method("eq_f32", method!(RbSeries::eq_f32, 1))?;
|
734
824
|
class.define_method("eq_f64", method!(RbSeries::eq_f64, 1))?;
|
735
|
-
// class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
|
736
825
|
|
737
826
|
// neq
|
738
827
|
class.define_method("neq_u8", method!(RbSeries::neq_u8, 1))?;
|
@@ -745,7 +834,6 @@ fn init() -> RbResult<()> {
|
|
745
834
|
class.define_method("neq_i64", method!(RbSeries::neq_i64, 1))?;
|
746
835
|
class.define_method("neq_f32", method!(RbSeries::neq_f32, 1))?;
|
747
836
|
class.define_method("neq_f64", method!(RbSeries::neq_f64, 1))?;
|
748
|
-
// class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
|
749
837
|
|
750
838
|
// gt
|
751
839
|
class.define_method("gt_u8", method!(RbSeries::gt_u8, 1))?;
|
@@ -758,7 +846,6 @@ fn init() -> RbResult<()> {
|
|
758
846
|
class.define_method("gt_i64", method!(RbSeries::gt_i64, 1))?;
|
759
847
|
class.define_method("gt_f32", method!(RbSeries::gt_f32, 1))?;
|
760
848
|
class.define_method("gt_f64", method!(RbSeries::gt_f64, 1))?;
|
761
|
-
// class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
|
762
849
|
|
763
850
|
// gt_eq
|
764
851
|
class.define_method("gt_eq_u8", method!(RbSeries::gt_eq_u8, 1))?;
|
@@ -771,7 +858,6 @@ fn init() -> RbResult<()> {
|
|
771
858
|
class.define_method("gt_eq_i64", method!(RbSeries::gt_eq_i64, 1))?;
|
772
859
|
class.define_method("gt_eq_f32", method!(RbSeries::gt_eq_f32, 1))?;
|
773
860
|
class.define_method("gt_eq_f64", method!(RbSeries::gt_eq_f64, 1))?;
|
774
|
-
// class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
|
775
861
|
|
776
862
|
// lt
|
777
863
|
class.define_method("lt_u8", method!(RbSeries::lt_u8, 1))?;
|
@@ -784,7 +870,6 @@ fn init() -> RbResult<()> {
|
|
784
870
|
class.define_method("lt_i64", method!(RbSeries::lt_i64, 1))?;
|
785
871
|
class.define_method("lt_f32", method!(RbSeries::lt_f32, 1))?;
|
786
872
|
class.define_method("lt_f64", method!(RbSeries::lt_f64, 1))?;
|
787
|
-
// class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
|
788
873
|
|
789
874
|
// lt_eq
|
790
875
|
class.define_method("lt_eq_u8", method!(RbSeries::lt_eq_u8, 1))?;
|
@@ -797,11 +882,21 @@ fn init() -> RbResult<()> {
|
|
797
882
|
class.define_method("lt_eq_i64", method!(RbSeries::lt_eq_i64, 1))?;
|
798
883
|
class.define_method("lt_eq_f32", method!(RbSeries::lt_eq_f32, 1))?;
|
799
884
|
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
800
|
-
|
885
|
+
|
886
|
+
// str comp
|
887
|
+
class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
|
888
|
+
class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
|
889
|
+
class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
|
890
|
+
class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
|
891
|
+
class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
|
892
|
+
class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
801
893
|
|
802
894
|
// npy
|
803
895
|
class.define_method("to_numo", method!(RbSeries::to_numo, 0))?;
|
804
896
|
|
897
|
+
// extra
|
898
|
+
class.define_method("extend_constant", method!(RbSeries::extend_constant, 2))?;
|
899
|
+
|
805
900
|
let class = module.define_class("RbWhen", Default::default())?;
|
806
901
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
807
902
|
|
@@ -810,196 +905,3 @@ fn init() -> RbResult<()> {
|
|
810
905
|
|
811
906
|
Ok(())
|
812
907
|
}
|
813
|
-
|
814
|
-
fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
815
|
-
let dtypes = dtypes
|
816
|
-
.each()
|
817
|
-
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
818
|
-
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
819
|
-
let dtypes = vec_extract_wrapped(dtypes);
|
820
|
-
Ok(crate::lazy::dsl::dtype_cols(dtypes))
|
821
|
-
}
|
822
|
-
|
823
|
-
#[allow(clippy::too_many_arguments)]
|
824
|
-
fn rb_duration(
|
825
|
-
days: Option<&RbExpr>,
|
826
|
-
seconds: Option<&RbExpr>,
|
827
|
-
nanoseconds: Option<&RbExpr>,
|
828
|
-
microseconds: Option<&RbExpr>,
|
829
|
-
milliseconds: Option<&RbExpr>,
|
830
|
-
minutes: Option<&RbExpr>,
|
831
|
-
hours: Option<&RbExpr>,
|
832
|
-
weeks: Option<&RbExpr>,
|
833
|
-
) -> RbExpr {
|
834
|
-
let args = DurationArgs {
|
835
|
-
days: days.map(|e| e.inner.clone()),
|
836
|
-
seconds: seconds.map(|e| e.inner.clone()),
|
837
|
-
nanoseconds: nanoseconds.map(|e| e.inner.clone()),
|
838
|
-
microseconds: microseconds.map(|e| e.inner.clone()),
|
839
|
-
milliseconds: milliseconds.map(|e| e.inner.clone()),
|
840
|
-
minutes: minutes.map(|e| e.inner.clone()),
|
841
|
-
hours: hours.map(|e| e.inner.clone()),
|
842
|
-
weeks: weeks.map(|e| e.inner.clone()),
|
843
|
-
};
|
844
|
-
|
845
|
-
polars::lazy::dsl::duration(args).into()
|
846
|
-
}
|
847
|
-
|
848
|
-
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
849
|
-
let mut iter = seq.each();
|
850
|
-
let first = iter.next().unwrap()?;
|
851
|
-
|
852
|
-
let first_rdf = get_df(first)?;
|
853
|
-
let identity_df = first_rdf.slice(0, 0);
|
854
|
-
|
855
|
-
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
856
|
-
|
857
|
-
for item in iter {
|
858
|
-
let rdf = get_df(item?)?;
|
859
|
-
rdfs.push(Ok(rdf));
|
860
|
-
}
|
861
|
-
|
862
|
-
let identity = Ok(identity_df);
|
863
|
-
|
864
|
-
let df = rdfs
|
865
|
-
.into_iter()
|
866
|
-
.fold(identity, |acc: PolarsResult<DataFrame>, df| {
|
867
|
-
let mut acc = acc?;
|
868
|
-
acc.vstack_mut(&df?)?;
|
869
|
-
Ok(acc)
|
870
|
-
})
|
871
|
-
.map_err(RbPolarsErr::from)?;
|
872
|
-
|
873
|
-
Ok(df.into())
|
874
|
-
}
|
875
|
-
|
876
|
-
fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
877
|
-
let (seq, len) = get_rbseq(lfs)?;
|
878
|
-
let mut lfs = Vec::with_capacity(len);
|
879
|
-
|
880
|
-
for res in seq.each() {
|
881
|
-
let item = res?;
|
882
|
-
let lf = get_lf(item)?;
|
883
|
-
lfs.push(lf);
|
884
|
-
}
|
885
|
-
|
886
|
-
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
887
|
-
Ok(lf.into())
|
888
|
-
}
|
889
|
-
|
890
|
-
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
891
|
-
let mut dfs = Vec::new();
|
892
|
-
for item in seq.each() {
|
893
|
-
dfs.push(get_df(item?)?);
|
894
|
-
}
|
895
|
-
let df = diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
896
|
-
Ok(df.into())
|
897
|
-
}
|
898
|
-
|
899
|
-
fn rb_hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
900
|
-
let mut dfs = Vec::new();
|
901
|
-
for item in seq.each() {
|
902
|
-
dfs.push(get_df(item?)?);
|
903
|
-
}
|
904
|
-
let df = hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
905
|
-
Ok(df.into())
|
906
|
-
}
|
907
|
-
|
908
|
-
fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
909
|
-
let mut iter = seq.each();
|
910
|
-
let first = iter.next().unwrap()?;
|
911
|
-
|
912
|
-
let mut s = get_series(first)?;
|
913
|
-
|
914
|
-
for res in iter {
|
915
|
-
let item = res?;
|
916
|
-
let item = get_series(item)?;
|
917
|
-
s.append(&item).map_err(RbPolarsErr::from)?;
|
918
|
-
}
|
919
|
-
Ok(s.into())
|
920
|
-
}
|
921
|
-
|
922
|
-
fn ipc_schema(rb_f: Value) -> RbResult<Value> {
|
923
|
-
use polars::export::arrow::io::ipc::read::read_file_metadata;
|
924
|
-
let mut r = get_file_like(rb_f, false)?;
|
925
|
-
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
926
|
-
|
927
|
-
let dict = RHash::new();
|
928
|
-
for field in metadata.schema.fields {
|
929
|
-
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
930
|
-
dict.aset(field.name, dt)?;
|
931
|
-
}
|
932
|
-
Ok(dict.into())
|
933
|
-
}
|
934
|
-
|
935
|
-
fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
936
|
-
use polars::export::arrow::io::parquet::read::{infer_schema, read_metadata};
|
937
|
-
|
938
|
-
let mut r = get_file_like(rb_f, false)?;
|
939
|
-
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
940
|
-
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
|
941
|
-
|
942
|
-
let dict = RHash::new();
|
943
|
-
for field in arrow_schema.fields {
|
944
|
-
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
945
|
-
dict.aset(field.name, dt)?;
|
946
|
-
}
|
947
|
-
Ok(dict.into())
|
948
|
-
}
|
949
|
-
|
950
|
-
fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
951
|
-
let lfs = lfs
|
952
|
-
.each()
|
953
|
-
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
954
|
-
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
955
|
-
|
956
|
-
Ok(RArray::from_iter(lfs.iter().map(|lf| {
|
957
|
-
let df = lf.ldf.clone().collect().unwrap();
|
958
|
-
RbDataFrame::new(df)
|
959
|
-
})))
|
960
|
-
}
|
961
|
-
|
962
|
-
fn rb_date_range(
|
963
|
-
start: i64,
|
964
|
-
stop: i64,
|
965
|
-
every: String,
|
966
|
-
closed: Wrap<ClosedWindow>,
|
967
|
-
name: String,
|
968
|
-
tu: Wrap<TimeUnit>,
|
969
|
-
tz: Option<TimeZone>,
|
970
|
-
) -> RbResult<RbSeries> {
|
971
|
-
let date_range = polars::time::date_range_impl(
|
972
|
-
&name,
|
973
|
-
start,
|
974
|
-
stop,
|
975
|
-
Duration::parse(&every),
|
976
|
-
closed.0,
|
977
|
-
tu.0,
|
978
|
-
tz.as_ref(),
|
979
|
-
)
|
980
|
-
.map_err(RbPolarsErr::from)?;
|
981
|
-
Ok(date_range.into_series().into())
|
982
|
-
}
|
983
|
-
|
984
|
-
fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
985
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
986
|
-
Ok(polars::lazy::dsl::coalesce(&exprs).into())
|
987
|
-
}
|
988
|
-
|
989
|
-
fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
990
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
991
|
-
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
992
|
-
}
|
993
|
-
|
994
|
-
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
995
|
-
let exprs = rb_exprs_to_exprs(exprs)?;
|
996
|
-
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|
997
|
-
}
|
998
|
-
|
999
|
-
fn arg_where(condition: &RbExpr) -> RbExpr {
|
1000
|
-
polars::lazy::dsl::arg_where(condition.inner.clone()).into()
|
1001
|
-
}
|
1002
|
-
|
1003
|
-
fn get_idx_type() -> Value {
|
1004
|
-
Wrap(IDX_DTYPE).into_value()
|
1005
|
-
}
|