polars-df 0.9.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/README.md +7 -6
- data/ext/polars/Cargo.toml +10 -6
- data/ext/polars/src/batched_csv.rs +53 -50
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +31 -67
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +13 -4
- data/ext/polars/src/expr/general.rs +50 -9
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +12 -33
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +20 -3
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +111 -56
- data/ext/polars/src/lib.rs +68 -34
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +13 -133
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +23 -8
- data/ext/polars/src/dataframe.rs +0 -1182
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
data/ext/polars/src/lib.rs
CHANGED
@@ -5,6 +5,7 @@ mod error;
|
|
5
5
|
mod expr;
|
6
6
|
mod file;
|
7
7
|
mod functions;
|
8
|
+
mod interop;
|
8
9
|
mod lazyframe;
|
9
10
|
mod lazygroupby;
|
10
11
|
mod map;
|
@@ -23,7 +24,7 @@ use error::{RbPolarsErr, RbTypeError, RbValueError};
|
|
23
24
|
use expr::rb_exprs_to_exprs;
|
24
25
|
use expr::RbExpr;
|
25
26
|
use functions::string_cache::RbStringCacheHolder;
|
26
|
-
use functions::whenthen::{RbThen, RbWhen};
|
27
|
+
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
27
28
|
use lazyframe::RbLazyFrame;
|
28
29
|
use lazygroupby::RbLazyGroupBy;
|
29
30
|
use magnus::{define_module, function, method, prelude::*, Error, Ruby};
|
@@ -59,22 +60,27 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
59
60
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
60
61
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
61
62
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
63
|
+
class.define_singleton_method("read_ipc_stream", function!(RbDataFrame::read_ipc_stream, 6))?;
|
62
64
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
63
|
-
class.define_singleton_method("
|
64
|
-
class.define_singleton_method("
|
65
|
-
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
65
|
+
class.define_singleton_method("from_rows", function!(RbDataFrame::from_rows, 3))?;
|
66
|
+
class.define_singleton_method("from_hashes", function!(RbDataFrame::from_hashes, 5))?;
|
66
67
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
67
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
68
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
|
+
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
69
71
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
70
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
71
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
72
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
73
75
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
76
|
+
class.define_method(
|
77
|
+
"write_ipc_stream",
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 2),
|
79
|
+
)?;
|
74
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
75
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
76
82
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
77
|
-
class.define_method("write_parquet", method!(RbDataFrame::write_parquet,
|
83
|
+
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
78
84
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
79
85
|
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
80
86
|
class.define_method("div", method!(RbDataFrame::div, 1))?;
|
@@ -88,7 +94,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
88
94
|
class.define_method("sample_n", method!(RbDataFrame::sample_n, 4))?;
|
89
95
|
class.define_method("sample_frac", method!(RbDataFrame::sample_frac, 4))?;
|
90
96
|
class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
|
91
|
-
class.define_method("to_s", method!(RbDataFrame::
|
97
|
+
class.define_method("to_s", method!(RbDataFrame::as_str, 0))?;
|
92
98
|
class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
|
93
99
|
class.define_method("columns", method!(RbDataFrame::columns, 0))?;
|
94
100
|
class.define_method(
|
@@ -106,8 +112,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
106
112
|
class.define_method("vstack_mut", method!(RbDataFrame::vstack_mut, 1))?;
|
107
113
|
class.define_method("vstack", method!(RbDataFrame::vstack, 1))?;
|
108
114
|
class.define_method("drop_in_place", method!(RbDataFrame::drop_in_place, 1))?;
|
109
|
-
class.define_method("drop_nulls", method!(RbDataFrame::drop_nulls, 1))?;
|
110
|
-
class.define_method("drop", method!(RbDataFrame::drop, 1))?;
|
111
115
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
112
116
|
class.define_method(
|
113
117
|
"get_column_index",
|
@@ -115,7 +119,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
115
119
|
)?;
|
116
120
|
class.define_method("get_column", method!(RbDataFrame::get_column, 1))?;
|
117
121
|
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
118
|
-
class.define_method("take", method!(RbDataFrame::
|
122
|
+
class.define_method("take", method!(RbDataFrame::gather, 1))?;
|
119
123
|
class.define_method(
|
120
124
|
"take_with_series",
|
121
125
|
method!(RbDataFrame::take_with_series, 1),
|
@@ -134,7 +138,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
134
138
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
135
139
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
136
140
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
137
|
-
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
138
141
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
139
142
|
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
140
143
|
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
@@ -142,7 +145,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
142
145
|
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
143
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
144
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
145
|
-
class.define_method("
|
148
|
+
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
146
149
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
147
150
|
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
148
151
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
@@ -205,15 +208,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
205
208
|
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
206
209
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
207
210
|
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
208
|
-
class.define_method("top_k", method!(RbExpr::top_k,
|
209
|
-
class.define_method("bottom_k", method!(RbExpr::bottom_k,
|
211
|
+
class.define_method("top_k", method!(RbExpr::top_k, 3))?;
|
212
|
+
class.define_method("bottom_k", method!(RbExpr::bottom_k, 3))?;
|
210
213
|
class.define_method("peak_min", method!(RbExpr::peak_min, 0))?;
|
211
214
|
class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
|
212
215
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
213
216
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
214
217
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
215
218
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
216
|
-
class.define_method("sort_by", method!(RbExpr::sort_by,
|
219
|
+
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
217
220
|
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
218
221
|
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
219
222
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
@@ -312,7 +315,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
312
315
|
class.define_method("arr_reverse", method!(RbExpr::arr_reverse, 0))?;
|
313
316
|
class.define_method("arr_arg_min", method!(RbExpr::arr_arg_min, 0))?;
|
314
317
|
class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
|
315
|
-
class.define_method("arr_get", method!(RbExpr::arr_get,
|
318
|
+
class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
|
316
319
|
class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
|
317
320
|
class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
|
318
321
|
class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
|
@@ -406,7 +409,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
406
409
|
class.define_method("dt_cast_time_unit", method!(RbExpr::dt_cast_time_unit, 1))?;
|
407
410
|
class.define_method(
|
408
411
|
"dt_replace_time_zone",
|
409
|
-
method!(RbExpr::dt_replace_time_zone,
|
412
|
+
method!(RbExpr::dt_replace_time_zone, 3),
|
410
413
|
)?;
|
411
414
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
412
415
|
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
@@ -415,20 +418,31 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
415
418
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
416
419
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
417
420
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
418
|
-
class.define_method("
|
421
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
|
419
422
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
420
423
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
421
424
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
422
425
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
423
426
|
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
424
|
-
class.define_method("rolling_sum", method!(RbExpr::rolling_sum,
|
425
|
-
class.define_method("
|
426
|
-
class.define_method("
|
427
|
-
class.define_method("
|
428
|
-
class.define_method("
|
429
|
-
class.define_method("
|
430
|
-
class.define_method("
|
431
|
-
class.define_method("
|
427
|
+
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
|
428
|
+
class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
|
429
|
+
class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
|
430
|
+
class.define_method("rolling_min_by", method!(RbExpr::rolling_min_by, 4))?;
|
431
|
+
class.define_method("rolling_max", method!(RbExpr::rolling_max, 4))?;
|
432
|
+
class.define_method("rolling_max_by", method!(RbExpr::rolling_max_by, 4))?;
|
433
|
+
class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 4))?;
|
434
|
+
class.define_method("rolling_mean_by", method!(RbExpr::rolling_mean_by, 4))?;
|
435
|
+
class.define_method("rolling_std", method!(RbExpr::rolling_std, 5))?;
|
436
|
+
class.define_method("rolling_std_by", method!(RbExpr::rolling_std_by, 5))?;
|
437
|
+
class.define_method("rolling_var", method!(RbExpr::rolling_var, 5))?;
|
438
|
+
class.define_method("rolling_var_by", method!(RbExpr::rolling_var_by, 5))?;
|
439
|
+
class.define_method("rolling_median", method!(RbExpr::rolling_median, 4))?;
|
440
|
+
class.define_method("rolling_median_by", method!(RbExpr::rolling_median_by, 4))?;
|
441
|
+
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 6))?;
|
442
|
+
class.define_method(
|
443
|
+
"rolling_quantile_by",
|
444
|
+
method!(RbExpr::rolling_quantile_by, 6),
|
445
|
+
)?;
|
432
446
|
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
433
447
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
434
448
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
@@ -448,7 +462,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
448
462
|
class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
|
449
463
|
class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
|
450
464
|
class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
|
451
|
-
class.define_method("list_get", method!(RbExpr::list_get,
|
465
|
+
class.define_method("list_get", method!(RbExpr::list_get, 2))?;
|
452
466
|
class.define_method("list_join", method!(RbExpr::list_join, 2))?;
|
453
467
|
class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
|
454
468
|
class.define_method("list_arg_max", method!(RbExpr::list_arg_max, 0))?;
|
@@ -554,7 +568,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
554
568
|
class.define_singleton_method("arctan2d", function!(functions::lazy::arctan2d, 2))?;
|
555
569
|
class.define_singleton_method("rolling_corr", function!(functions::lazy::rolling_corr, 5))?;
|
556
570
|
class.define_singleton_method("rolling_cov", function!(functions::lazy::rolling_cov, 5))?;
|
557
|
-
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by,
|
571
|
+
class.define_singleton_method("arg_sort_by", function!(functions::lazy::arg_sort_by, 5))?;
|
558
572
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
559
573
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
560
574
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
@@ -611,6 +625,17 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
611
625
|
)?;
|
612
626
|
class.define_singleton_method("collect_all", function!(functions::lazy::collect_all, 1))?;
|
613
627
|
class.define_singleton_method("date_range", function!(functions::range::date_range, 6))?;
|
628
|
+
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges, 6))?;
|
629
|
+
class.define_singleton_method(
|
630
|
+
"datetime_range",
|
631
|
+
function!(functions::range::datetime_range, 6),
|
632
|
+
)?;
|
633
|
+
class.define_singleton_method(
|
634
|
+
"datetime_ranges",
|
635
|
+
function!(functions::range::datetime_ranges, 6),
|
636
|
+
)?;
|
637
|
+
class.define_singleton_method("time_range", function!(functions::range::time_range, 4))?;
|
638
|
+
class.define_singleton_method("time_ranges", function!(functions::range::time_ranges, 4))?;
|
614
639
|
class.define_singleton_method(
|
615
640
|
"dtype_str_repr",
|
616
641
|
function!(functions::misc::dtype_str_repr, 1),
|
@@ -689,7 +714,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
689
714
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
690
715
|
class.define_singleton_method(
|
691
716
|
"new_from_parquet",
|
692
|
-
function!(RbLazyFrame::new_from_parquet,
|
717
|
+
function!(RbLazyFrame::new_from_parquet, 12),
|
693
718
|
)?;
|
694
719
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
695
720
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -702,8 +727,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
702
727
|
"optimization_toggle",
|
703
728
|
method!(RbLazyFrame::optimization_toggle, 9),
|
704
729
|
)?;
|
705
|
-
class.define_method("sort", method!(RbLazyFrame::sort,
|
706
|
-
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs,
|
730
|
+
class.define_method("sort", method!(RbLazyFrame::sort, 5))?;
|
731
|
+
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
707
732
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
708
733
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
709
734
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
@@ -835,7 +860,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
835
860
|
class.define_method("mul", method!(RbSeries::mul, 1))?;
|
836
861
|
class.define_method("div", method!(RbSeries::div, 1))?;
|
837
862
|
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
838
|
-
class.define_method("sort", method!(RbSeries::sort,
|
863
|
+
class.define_method("sort", method!(RbSeries::sort, 3))?;
|
839
864
|
class.define_method("value_counts", method!(RbSeries::value_counts, 1))?;
|
840
865
|
class.define_method("any", method!(RbSeries::any, 1))?;
|
841
866
|
class.define_method("all", method!(RbSeries::all, 1))?;
|
@@ -1032,11 +1057,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1032
1057
|
// extra
|
1033
1058
|
class.define_method("extend_constant", method!(RbSeries::extend_constant, 2))?;
|
1034
1059
|
|
1060
|
+
// when then
|
1035
1061
|
let class = module.define_class("RbWhen", ruby.class_object())?;
|
1036
|
-
class.define_method("
|
1062
|
+
class.define_method("then", method!(RbWhen::then, 1))?;
|
1063
|
+
|
1064
|
+
let class = module.define_class("RbThen", ruby.class_object())?;
|
1065
|
+
class.define_method("when", method!(RbThen::when, 1))?;
|
1066
|
+
class.define_method("otherwise", method!(RbThen::otherwise, 1))?;
|
1067
|
+
|
1068
|
+
let class = module.define_class("RbChainedWhen", ruby.class_object())?;
|
1069
|
+
class.define_method("then", method!(RbChainedWhen::then, 1))?;
|
1037
1070
|
|
1038
|
-
let class = module.define_class("
|
1039
|
-
class.define_method("
|
1071
|
+
let class = module.define_class("RbChainedThen", ruby.class_object())?;
|
1072
|
+
class.define_method("when", method!(RbChainedThen::when, 1))?;
|
1073
|
+
class.define_method("otherwise", method!(RbChainedThen::otherwise, 1))?;
|
1040
1074
|
|
1041
1075
|
// sql
|
1042
1076
|
let class = module.define_class("RbSQLContext", ruby.class_object())?;
|
@@ -265,9 +265,9 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
265
265
|
// to the row. Before we mutate the row buf again, the reference is dropped.
|
266
266
|
// we only cannot prove it to the compiler.
|
267
267
|
// we still do this because it saves a Vec allocation in a hot loop.
|
268
|
-
unsafe { &*ptr }
|
268
|
+
Ok(unsafe { &*ptr })
|
269
269
|
}
|
270
|
-
None => &null_row,
|
270
|
+
None => Ok(&null_row),
|
271
271
|
}
|
272
272
|
}
|
273
273
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -277,22 +277,30 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
277
277
|
// first rows for schema inference
|
278
278
|
let mut buf = Vec::with_capacity(inference_size);
|
279
279
|
buf.push(first_value);
|
280
|
-
|
281
|
-
|
280
|
+
for v in (&mut row_iter).take(inference_size) {
|
281
|
+
buf.push(v?.clone());
|
282
|
+
}
|
283
|
+
|
284
|
+
let schema = rows_to_schema_first_non_null(&buf, Some(50))?;
|
282
285
|
|
283
286
|
if init_null_count > 0 {
|
284
287
|
// Safety: we know the iterators size
|
285
288
|
let iter = unsafe {
|
286
289
|
(0..init_null_count)
|
287
|
-
.map(|_| &null_row)
|
288
|
-
.chain(buf.iter())
|
290
|
+
.map(|_| Ok(&null_row))
|
291
|
+
.chain(buf.iter().map(Ok))
|
289
292
|
.chain(row_iter)
|
290
293
|
.trust_my_length(df.height())
|
291
294
|
};
|
292
|
-
DataFrame::
|
295
|
+
DataFrame::try_from_rows_iter_and_schema(iter, &schema)
|
293
296
|
} else {
|
294
297
|
// Safety: we know the iterators size
|
295
|
-
let iter = unsafe {
|
296
|
-
|
298
|
+
let iter = unsafe {
|
299
|
+
buf.iter()
|
300
|
+
.map(Ok)
|
301
|
+
.chain(row_iter)
|
302
|
+
.trust_my_length(df.height())
|
303
|
+
};
|
304
|
+
DataFrame::try_from_rows_iter_and_schema(iter, &schema)
|
297
305
|
}
|
298
306
|
}
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -8,31 +8,11 @@ pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Opt
|
|
8
8
|
}
|
9
9
|
|
10
10
|
pub fn map_single(
|
11
|
-
|
11
|
+
_rbexpr: &RbExpr,
|
12
12
|
_lambda: Value,
|
13
|
-
|
14
|
-
|
13
|
+
_output_type: Option<Wrap<DataType>>,
|
14
|
+
_agg_list: bool,
|
15
|
+
_is_elementwise: bool,
|
15
16
|
) -> RbExpr {
|
16
|
-
|
17
|
-
|
18
|
-
let output_type2 = output_type.clone();
|
19
|
-
let function = move |_s: Series| {
|
20
|
-
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
21
|
-
|
22
|
-
todo!();
|
23
|
-
};
|
24
|
-
|
25
|
-
let output_map = GetOutput::map_field(move |fld| match output_type {
|
26
|
-
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
27
|
-
None => {
|
28
|
-
let mut fld = fld.clone();
|
29
|
-
fld.coerce(DataType::Unknown);
|
30
|
-
fld
|
31
|
-
}
|
32
|
-
});
|
33
|
-
if agg_list {
|
34
|
-
rbexpr.clone().inner.map_list(function, output_map).into()
|
35
|
-
} else {
|
36
|
-
rbexpr.clone().inner.map(function, output_map).into()
|
37
|
-
}
|
17
|
+
todo!();
|
38
18
|
}
|
@@ -33,7 +33,12 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
33
33
|
.apply_lambda_with_utf8_out_type(lambda, null_count, Some(first_value.as_str()))
|
34
34
|
.map(|ca| ca.into_series().into())
|
35
35
|
} else if out.respond_to("_s", true)? {
|
36
|
-
|
36
|
+
let rb_rbseries: &RbSeries = out.funcall("_s", ()).unwrap();
|
37
|
+
let series = rb_rbseries.series.borrow();
|
38
|
+
let dt = series.dtype();
|
39
|
+
applyer
|
40
|
+
.apply_lambda_with_list_out_type(lambda, null_count, &series, dt)
|
41
|
+
.map(|ca| ca.into_series().into())
|
37
42
|
} else if out.is_kind_of(class::array()) {
|
38
43
|
todo!()
|
39
44
|
} else if out.is_kind_of(class::hash()) {
|
@@ -66,6 +71,7 @@ pub trait ApplyLambda<'a> {
|
|
66
71
|
fn apply_lambda_unknown(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
67
72
|
|
68
73
|
/// Apply a lambda that doesn't change output types
|
74
|
+
#[allow(dead_code)]
|
69
75
|
fn apply_lambda(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
70
76
|
|
71
77
|
// Used to store a struct type
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use crate::error::RbPolarsErr;
|
2
2
|
use crate::prelude::*;
|
3
|
-
use crate::{RbResult, RbSeries
|
3
|
+
use crate::{RbResult, RbSeries};
|
4
4
|
use magnus::{IntoValue, Value};
|
5
5
|
|
6
6
|
impl RbSeries {
|
@@ -36,31 +36,52 @@ impl RbSeries {
|
|
36
36
|
Ok(Wrap(
|
37
37
|
self.series
|
38
38
|
.borrow()
|
39
|
-
.
|
39
|
+
.max_reduce()
|
40
40
|
.map_err(RbPolarsErr::from)?
|
41
|
-
.
|
42
|
-
.map_err(RbPolarsErr::from)?,
|
41
|
+
.as_any_value(),
|
43
42
|
)
|
44
43
|
.into_value())
|
45
44
|
}
|
46
45
|
|
47
|
-
pub fn mean(&self) ->
|
46
|
+
pub fn mean(&self) -> RbResult<Value> {
|
48
47
|
match self.series.borrow().dtype() {
|
49
|
-
DataType::Boolean =>
|
50
|
-
|
51
|
-
|
48
|
+
DataType::Boolean => Ok(Wrap(
|
49
|
+
self.series
|
50
|
+
.borrow()
|
51
|
+
.cast(&DataType::UInt8)
|
52
|
+
.unwrap()
|
53
|
+
.mean_reduce()
|
54
|
+
.as_any_value(),
|
55
|
+
)
|
56
|
+
.into_value()),
|
57
|
+
DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => {
|
58
|
+
Ok(Wrap(self.series.borrow().mean_reduce().as_any_value()).into_value())
|
52
59
|
}
|
53
|
-
_ => self.series.borrow().mean(),
|
60
|
+
_ => Ok(self.series.borrow().mean().into_value()),
|
54
61
|
}
|
55
62
|
}
|
56
63
|
|
57
|
-
pub fn median(&self) ->
|
64
|
+
pub fn median(&self) -> RbResult<Value> {
|
58
65
|
match self.series.borrow().dtype() {
|
59
|
-
DataType::Boolean =>
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
DataType::Boolean => Ok(Wrap(
|
67
|
+
self.series
|
68
|
+
.borrow()
|
69
|
+
.cast(&DataType::UInt8)
|
70
|
+
.unwrap()
|
71
|
+
.median_reduce()
|
72
|
+
.map_err(RbPolarsErr::from)?
|
73
|
+
.as_any_value(),
|
74
|
+
)
|
75
|
+
.into_value()),
|
76
|
+
DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => Ok(Wrap(
|
77
|
+
self.series
|
78
|
+
.borrow()
|
79
|
+
.median_reduce()
|
80
|
+
.map_err(RbPolarsErr::from)?
|
81
|
+
.as_any_value(),
|
82
|
+
)
|
83
|
+
.into_value()),
|
84
|
+
_ => Ok(self.series.borrow().median().into_value()),
|
64
85
|
}
|
65
86
|
}
|
66
87
|
|
@@ -68,10 +89,9 @@ impl RbSeries {
|
|
68
89
|
Ok(Wrap(
|
69
90
|
self.series
|
70
91
|
.borrow()
|
71
|
-
.
|
92
|
+
.min_reduce()
|
72
93
|
.map_err(RbPolarsErr::from)?
|
73
|
-
.
|
74
|
-
.map_err(RbPolarsErr::from)?,
|
94
|
+
.as_any_value(),
|
75
95
|
)
|
76
96
|
.into_value())
|
77
97
|
}
|
@@ -81,25 +101,22 @@ impl RbSeries {
|
|
81
101
|
quantile: f64,
|
82
102
|
interpolation: Wrap<QuantileInterpolOptions>,
|
83
103
|
) -> RbResult<Value> {
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
)
|
92
|
-
.into_value())
|
104
|
+
let bind = self
|
105
|
+
.series
|
106
|
+
.borrow()
|
107
|
+
.quantile_reduce(quantile, interpolation.0);
|
108
|
+
let sc = bind.map_err(RbPolarsErr::from)?;
|
109
|
+
|
110
|
+
Ok(Wrap(sc.as_any_value()).into_value())
|
93
111
|
}
|
94
112
|
|
95
113
|
pub fn sum(&self) -> RbResult<Value> {
|
96
114
|
Ok(Wrap(
|
97
115
|
self.series
|
98
116
|
.borrow()
|
99
|
-
.
|
117
|
+
.sum_reduce()
|
100
118
|
.map_err(RbPolarsErr::from)?
|
101
|
-
.
|
102
|
-
.map_err(RbPolarsErr::from)?,
|
119
|
+
.as_any_value(),
|
103
120
|
)
|
104
121
|
.into_value())
|
105
122
|
}
|