polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/ext/polars/src/lib.rs
CHANGED
@@ -60,12 +60,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
60
60
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
61
61
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
62
62
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
63
|
-
class.define_singleton_method(
|
63
|
+
class.define_singleton_method(
|
64
|
+
"read_ipc_stream",
|
65
|
+
function!(RbDataFrame::read_ipc_stream, 6),
|
66
|
+
)?;
|
64
67
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
65
68
|
class.define_singleton_method("from_rows", function!(RbDataFrame::from_rows, 3))?;
|
66
69
|
class.define_singleton_method("from_hashes", function!(RbDataFrame::from_hashes, 5))?;
|
67
|
-
class.define_singleton_method("read_json", function!(RbDataFrame::read_json,
|
68
|
-
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson,
|
70
|
+
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 4))?;
|
71
|
+
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
69
72
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
73
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
71
74
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
@@ -135,7 +138,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
135
138
|
class.define_method("equals", method!(RbDataFrame::equals, 2))?;
|
136
139
|
class.define_method("with_row_index", method!(RbDataFrame::with_row_index, 2))?;
|
137
140
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
138
|
-
class.define_method("
|
141
|
+
class.define_method("unpivot", method!(RbDataFrame::unpivot, 4))?;
|
139
142
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
140
143
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
141
144
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
@@ -149,7 +152,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
149
152
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
150
153
|
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
151
154
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
152
|
-
class.define_method("upsample", method!(RbDataFrame::upsample,
|
155
|
+
class.define_method("upsample", method!(RbDataFrame::upsample, 4))?;
|
153
156
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
154
157
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
155
158
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
@@ -202,20 +205,23 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
202
205
|
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
203
206
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
204
207
|
class.define_method("len", method!(RbExpr::len, 0))?;
|
205
|
-
class.define_method("value_counts", method!(RbExpr::value_counts,
|
208
|
+
class.define_method("value_counts", method!(RbExpr::value_counts, 4))?;
|
206
209
|
class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
|
207
210
|
class.define_method("null_count", method!(RbExpr::null_count, 0))?;
|
208
211
|
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
209
212
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
210
213
|
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
211
|
-
class.define_method("top_k", method!(RbExpr::top_k,
|
212
|
-
class.define_method("
|
214
|
+
class.define_method("top_k", method!(RbExpr::top_k, 1))?;
|
215
|
+
class.define_method("top_k_by", method!(RbExpr::top_k_by, 3))?;
|
216
|
+
class.define_method("bottom_k", method!(RbExpr::bottom_k, 1))?;
|
217
|
+
class.define_method("bottom_k_by", method!(RbExpr::bottom_k_by, 3))?;
|
213
218
|
class.define_method("peak_min", method!(RbExpr::peak_min, 0))?;
|
214
219
|
class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
|
215
220
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
216
221
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
217
222
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
218
223
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
224
|
+
class.define_method("get", method!(RbExpr::get, 1))?;
|
219
225
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
220
226
|
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
221
227
|
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
@@ -233,6 +239,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
233
239
|
class.define_method("std", method!(RbExpr::std, 1))?;
|
234
240
|
class.define_method("var", method!(RbExpr::var, 1))?;
|
235
241
|
class.define_method("is_unique", method!(RbExpr::is_unique, 0))?;
|
242
|
+
class.define_method("is_between", method!(RbExpr::is_between, 3))?;
|
236
243
|
class.define_method("approx_n_unique", method!(RbExpr::approx_n_unique, 0))?;
|
237
244
|
class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?;
|
238
245
|
class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?;
|
@@ -290,7 +297,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
290
297
|
class.define_method("str_strip_prefix", method!(RbExpr::str_strip_prefix, 1))?;
|
291
298
|
class.define_method("str_strip_suffix", method!(RbExpr::str_strip_suffix, 1))?;
|
292
299
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
293
|
-
class.define_method("str_explode", method!(RbExpr::str_explode, 0))?;
|
294
300
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
295
301
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
296
302
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
@@ -364,24 +370,24 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
364
370
|
class.define_method("list_len", method!(RbExpr::list_len, 0))?;
|
365
371
|
class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
|
366
372
|
class.define_method("list_count_matches", method!(RbExpr::list_count_matches, 1))?;
|
367
|
-
class.define_method("
|
373
|
+
class.define_method("dt_year", method!(RbExpr::dt_year, 0))?;
|
368
374
|
class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
|
369
|
-
class.define_method("
|
370
|
-
class.define_method("
|
371
|
-
class.define_method("
|
372
|
-
class.define_method("
|
373
|
-
class.define_method("
|
374
|
-
class.define_method("
|
375
|
-
class.define_method("
|
375
|
+
class.define_method("dt_iso_year", method!(RbExpr::dt_iso_year, 0))?;
|
376
|
+
class.define_method("dt_quarter", method!(RbExpr::dt_quarter, 0))?;
|
377
|
+
class.define_method("dt_month", method!(RbExpr::dt_month, 0))?;
|
378
|
+
class.define_method("dt_week", method!(RbExpr::dt_week, 0))?;
|
379
|
+
class.define_method("dt_weekday", method!(RbExpr::dt_weekday, 0))?;
|
380
|
+
class.define_method("dt_day", method!(RbExpr::dt_day, 0))?;
|
381
|
+
class.define_method("dt_ordinal_day", method!(RbExpr::dt_ordinal_day, 0))?;
|
376
382
|
class.define_method("dt_time", method!(RbExpr::dt_time, 0))?;
|
377
383
|
class.define_method("dt_date", method!(RbExpr::dt_date, 0))?;
|
378
384
|
class.define_method("dt_datetime", method!(RbExpr::dt_datetime, 0))?;
|
379
|
-
class.define_method("
|
380
|
-
class.define_method("
|
381
|
-
class.define_method("
|
382
|
-
class.define_method("
|
383
|
-
class.define_method("
|
384
|
-
class.define_method("
|
385
|
+
class.define_method("dt_hour", method!(RbExpr::dt_hour, 0))?;
|
386
|
+
class.define_method("dt_minute", method!(RbExpr::dt_minute, 0))?;
|
387
|
+
class.define_method("dt_second", method!(RbExpr::dt_second, 0))?;
|
388
|
+
class.define_method("dt_millisecond", method!(RbExpr::dt_millisecond, 0))?;
|
389
|
+
class.define_method("dt_microsecond", method!(RbExpr::dt_microsecond, 0))?;
|
390
|
+
class.define_method("dt_nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
|
385
391
|
class.define_method("dt_total_days", method!(RbExpr::dt_total_days, 0))?;
|
386
392
|
class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours, 0))?;
|
387
393
|
class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes, 0))?;
|
@@ -398,7 +404,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
398
404
|
"dt_total_milliseconds",
|
399
405
|
method!(RbExpr::dt_total_milliseconds, 0),
|
400
406
|
)?;
|
401
|
-
class.define_method("
|
407
|
+
class.define_method("dt_timestamp", method!(RbExpr::dt_timestamp, 1))?;
|
408
|
+
class.define_method("dt_to_string", method!(RbExpr::dt_to_string, 1))?;
|
402
409
|
class.define_method("dt_offset_by", method!(RbExpr::dt_offset_by, 1))?;
|
403
410
|
class.define_method("dt_epoch_seconds", method!(RbExpr::dt_epoch_seconds, 0))?;
|
404
411
|
class.define_method("dt_with_time_unit", method!(RbExpr::dt_with_time_unit, 1))?;
|
@@ -411,12 +418,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
411
418
|
"dt_replace_time_zone",
|
412
419
|
method!(RbExpr::dt_replace_time_zone, 3),
|
413
420
|
)?;
|
414
|
-
class.define_method("dt_truncate", method!(RbExpr::dt_truncate,
|
421
|
+
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 1))?;
|
415
422
|
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
416
423
|
class.define_method("dt_month_end", method!(RbExpr::dt_month_end, 0))?;
|
417
424
|
class.define_method("dt_base_utc_offset", method!(RbExpr::dt_base_utc_offset, 0))?;
|
418
425
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
419
|
-
class.define_method("dt_round", method!(RbExpr::dt_round,
|
426
|
+
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
420
427
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
421
428
|
class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
|
422
429
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
@@ -479,8 +486,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
479
486
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
480
487
|
class.define_method("skew", method!(RbExpr::skew, 1))?;
|
481
488
|
class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
|
482
|
-
class.define_method("
|
483
|
-
class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
|
489
|
+
class.define_method("str_join", method!(RbExpr::str_join, 2))?;
|
484
490
|
class.define_method("cat_get_categories", method!(RbExpr::cat_get_categories, 0))?;
|
485
491
|
class.define_method("reshape", method!(RbExpr::reshape, 1))?;
|
486
492
|
class.define_method("cum_count", method!(RbExpr::cum_count, 1))?;
|
@@ -512,7 +518,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
512
518
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
513
519
|
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
514
520
|
class.define_method("set_sorted_flag", method!(RbExpr::set_sorted_flag, 1))?;
|
515
|
-
class.define_method("replace", method!(RbExpr::replace,
|
521
|
+
class.define_method("replace", method!(RbExpr::replace, 2))?;
|
522
|
+
class.define_method("replace_strict", method!(RbExpr::replace_strict, 4))?;
|
516
523
|
|
517
524
|
// meta
|
518
525
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -545,7 +552,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
545
552
|
|
546
553
|
// maybe add to different class
|
547
554
|
let class = module.define_module("Plr")?;
|
548
|
-
class.define_singleton_method("dtype_cols", function!(functions::lazy::
|
555
|
+
class.define_singleton_method("dtype_cols", function!(functions::lazy::dtype_cols, 1))?;
|
556
|
+
class.define_singleton_method("index_cols", function!(functions::lazy::index_cols, 1))?;
|
549
557
|
class.define_singleton_method("col", function!(functions::lazy::col, 1))?;
|
550
558
|
class.define_singleton_method("len", function!(functions::lazy::len, 0))?;
|
551
559
|
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
@@ -624,8 +632,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
624
632
|
function!(functions::io::read_parquet_schema, 1),
|
625
633
|
)?;
|
626
634
|
class.define_singleton_method("collect_all", function!(functions::lazy::collect_all, 1))?;
|
627
|
-
class.define_singleton_method("date_range", function!(functions::range::date_range,
|
628
|
-
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges,
|
635
|
+
class.define_singleton_method("date_range", function!(functions::range::date_range, 4))?;
|
636
|
+
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges, 4))?;
|
629
637
|
class.define_singleton_method(
|
630
638
|
"datetime_range",
|
631
639
|
function!(functions::range::datetime_range, 6),
|
@@ -714,7 +722,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
714
722
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
715
723
|
class.define_singleton_method(
|
716
724
|
"new_from_parquet",
|
717
|
-
function!(RbLazyFrame::new_from_parquet,
|
725
|
+
function!(RbLazyFrame::new_from_parquet, 13),
|
718
726
|
)?;
|
719
727
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
720
728
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -733,17 +741,17 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
733
741
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
734
742
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
735
743
|
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 3))?;
|
736
|
-
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv,
|
744
|
+
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
737
745
|
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 2))?;
|
738
746
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
739
747
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
740
748
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
741
749
|
class.define_method("select_seq", method!(RbLazyFrame::select_seq, 1))?;
|
742
750
|
class.define_method("group_by", method!(RbLazyFrame::group_by, 2))?;
|
743
|
-
class.define_method("rolling", method!(RbLazyFrame::rolling,
|
751
|
+
class.define_method("rolling", method!(RbLazyFrame::rolling, 5))?;
|
744
752
|
class.define_method(
|
745
753
|
"group_by_dynamic",
|
746
|
-
method!(RbLazyFrame::group_by_dynamic,
|
754
|
+
method!(RbLazyFrame::group_by_dynamic, 9),
|
747
755
|
)?;
|
748
756
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
749
757
|
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
@@ -772,16 +780,13 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
772
780
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
773
781
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
774
782
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
775
|
-
class.define_method("
|
783
|
+
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 5))?;
|
776
784
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
777
785
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
778
786
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
779
787
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
780
|
-
class.define_method("
|
781
|
-
class.define_method("dtypes", method!(RbLazyFrame::dtypes, 0))?;
|
782
|
-
class.define_method("schema", method!(RbLazyFrame::schema, 0))?;
|
788
|
+
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
783
789
|
class.define_method("unnest", method!(RbLazyFrame::unnest, 1))?;
|
784
|
-
class.define_method("width", method!(RbLazyFrame::width, 0))?;
|
785
790
|
class.define_method("count", method!(RbLazyFrame::count, 0))?;
|
786
791
|
class.define_method("merge_sorted", method!(RbLazyFrame::merge_sorted, 2))?;
|
787
792
|
|
@@ -803,8 +808,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
803
808
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
804
809
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
805
810
|
class.define_singleton_method(
|
806
|
-
"
|
807
|
-
function!(RbSeries::
|
811
|
+
"new_from_any_values",
|
812
|
+
function!(RbSeries::new_from_any_values, 3),
|
813
|
+
)?;
|
814
|
+
class.define_singleton_method(
|
815
|
+
"new_from_any_values_and_dtype",
|
816
|
+
function!(RbSeries::new_from_any_values_and_dtype, 4),
|
808
817
|
)?;
|
809
818
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
810
819
|
class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
|
@@ -861,7 +870,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
861
870
|
class.define_method("div", method!(RbSeries::div, 1))?;
|
862
871
|
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
863
872
|
class.define_method("sort", method!(RbSeries::sort, 3))?;
|
864
|
-
class.define_method("value_counts", method!(RbSeries::value_counts,
|
873
|
+
class.define_method("value_counts", method!(RbSeries::value_counts, 4))?;
|
874
|
+
class.define_method("slice", method!(RbSeries::slice, 2))?;
|
865
875
|
class.define_method("any", method!(RbSeries::any, 1))?;
|
866
876
|
class.define_method("all", method!(RbSeries::all, 1))?;
|
867
877
|
class.define_method("arg_min", method!(RbSeries::arg_min, 0))?;
|
@@ -871,7 +881,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
871
881
|
class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
|
872
882
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
873
883
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
874
|
-
class.define_method("equals", method!(RbSeries::equals,
|
884
|
+
class.define_method("equals", method!(RbSeries::equals, 4))?;
|
875
885
|
class.define_method("eq", method!(RbSeries::eq, 1))?;
|
876
886
|
class.define_method("neq", method!(RbSeries::neq, 1))?;
|
877
887
|
class.define_method("gt", method!(RbSeries::gt, 1))?;
|
@@ -255,8 +255,8 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
255
255
|
match RArray::try_convert(val).ok() {
|
256
256
|
Some(tuple) => {
|
257
257
|
row_buf.0.clear();
|
258
|
-
for v in tuple.
|
259
|
-
let v = Wrap::<AnyValue>::try_convert(v
|
258
|
+
for v in tuple.into_iter() {
|
259
|
+
let v = Wrap::<AnyValue>::try_convert(v).unwrap().0;
|
260
260
|
row_buf.0.push(v);
|
261
261
|
}
|
262
262
|
let ptr = &row_buf as *const Row;
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{value::Lazy, Module, RClass, RModule, Ruby};
|
1
|
+
use magnus::{value::Lazy, ExceptionClass, Module, RClass, RModule, Ruby};
|
2
2
|
|
3
3
|
static POLARS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Polars").unwrap());
|
4
4
|
|
@@ -38,3 +38,27 @@ static DATETIME: Lazy<RClass> =
|
|
38
38
|
pub(crate) fn datetime() -> RClass {
|
39
39
|
Ruby::get().unwrap().get_inner(&DATETIME)
|
40
40
|
}
|
41
|
+
|
42
|
+
static ERROR: Lazy<ExceptionClass> =
|
43
|
+
Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Error").unwrap());
|
44
|
+
|
45
|
+
pub(crate) fn error() -> ExceptionClass {
|
46
|
+
Ruby::get().unwrap().get_inner(&ERROR)
|
47
|
+
}
|
48
|
+
|
49
|
+
static COMPUTE_ERROR: Lazy<ExceptionClass> =
|
50
|
+
Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("ComputeError").unwrap());
|
51
|
+
|
52
|
+
pub(crate) fn compute_error() -> ExceptionClass {
|
53
|
+
Ruby::get().unwrap().get_inner(&COMPUTE_ERROR)
|
54
|
+
}
|
55
|
+
|
56
|
+
static INVALID_OPERATION_ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| {
|
57
|
+
ruby.get_inner(&POLARS)
|
58
|
+
.const_get("InvalidOperationError")
|
59
|
+
.unwrap()
|
60
|
+
});
|
61
|
+
|
62
|
+
pub(crate) fn invalid_operation_error() -> ExceptionClass {
|
63
|
+
Ruby::get().unwrap().get_inner(&INVALID_OPERATION_ERROR)
|
64
|
+
}
|
@@ -54,7 +54,8 @@ impl RbSeries {
|
|
54
54
|
.as_any_value(),
|
55
55
|
)
|
56
56
|
.into_value()),
|
57
|
-
|
57
|
+
// For non-numeric output types we require mean_reduce.
|
58
|
+
dt if dt.is_temporal() => {
|
58
59
|
Ok(Wrap(self.series.borrow().mean_reduce().as_any_value()).into_value())
|
59
60
|
}
|
60
61
|
_ => Ok(self.series.borrow().mean().into_value()),
|
@@ -73,7 +74,8 @@ impl RbSeries {
|
|
73
74
|
.as_any_value(),
|
74
75
|
)
|
75
76
|
.into_value()),
|
76
|
-
|
77
|
+
// For non-numeric output types we require median_reduce.
|
78
|
+
dt if dt.is_temporal() => Ok(Wrap(
|
77
79
|
self.series
|
78
80
|
.borrow()
|
79
81
|
.median_reduce()
|
@@ -1,24 +1,34 @@
|
|
1
|
-
use crate::{RbResult, RbSeries};
|
1
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
2
2
|
|
3
3
|
impl RbSeries {
|
4
|
-
pub fn add(&self, other: &RbSeries) -> Self {
|
5
|
-
(&*self.series.borrow() + &*other.series.borrow())
|
4
|
+
pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
|
5
|
+
(&*self.series.borrow() + &*other.series.borrow())
|
6
|
+
.map(Into::into)
|
7
|
+
.map_err(RbPolarsErr::from)
|
6
8
|
}
|
7
9
|
|
8
|
-
pub fn sub(&self, other: &RbSeries) -> Self {
|
9
|
-
(&*self.series.borrow() - &*other.series.borrow())
|
10
|
+
pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
|
11
|
+
(&*self.series.borrow() - &*other.series.borrow())
|
12
|
+
.map(Into::into)
|
13
|
+
.map_err(RbPolarsErr::from)
|
10
14
|
}
|
11
15
|
|
12
|
-
pub fn mul(&self, other: &RbSeries) -> Self {
|
13
|
-
(&*self.series.borrow() * &*other.series.borrow())
|
16
|
+
pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
|
17
|
+
(&*self.series.borrow() * &*other.series.borrow())
|
18
|
+
.map(Into::into)
|
19
|
+
.map_err(RbPolarsErr::from)
|
14
20
|
}
|
15
21
|
|
16
|
-
pub fn div(&self, other: &RbSeries) -> Self {
|
17
|
-
(&*self.series.borrow() / &*other.series.borrow())
|
22
|
+
pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
|
23
|
+
(&*self.series.borrow() / &*other.series.borrow())
|
24
|
+
.map(Into::into)
|
25
|
+
.map_err(RbPolarsErr::from)
|
18
26
|
}
|
19
27
|
|
20
|
-
pub fn rem(&self, other: &RbSeries) -> Self {
|
21
|
-
(&*self.series.borrow() % &*other.series.borrow())
|
28
|
+
pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
|
29
|
+
(&*self.series.borrow() % &*other.series.borrow())
|
30
|
+
.map(Into::into)
|
31
|
+
.map_err(RbPolarsErr::from)
|
22
32
|
}
|
23
33
|
}
|
24
34
|
|
@@ -1,10 +1,11 @@
|
|
1
1
|
use magnus::{prelude::*, RArray};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
+
use crate::any_value::rb_object_to_any_value;
|
4
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
5
6
|
use crate::prelude::ObjectValue;
|
6
7
|
use crate::series::to_series_collection;
|
7
|
-
use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
|
8
|
+
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
8
9
|
|
9
10
|
impl RbSeries {
|
10
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
@@ -35,36 +36,28 @@ impl RbSeries {
|
|
35
36
|
}
|
36
37
|
}
|
37
38
|
|
38
|
-
fn new_primitive<T>(name: &str,
|
39
|
+
fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
|
39
40
|
where
|
40
41
|
T: PolarsNumericType,
|
41
42
|
ChunkedArray<T>: IntoSeries,
|
42
43
|
T::Native: magnus::TryConvert,
|
43
44
|
{
|
44
|
-
let len =
|
45
|
+
let len = values.len();
|
45
46
|
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
Err(e) => {
|
55
|
-
if strict {
|
56
|
-
return Err(e);
|
57
|
-
}
|
58
|
-
builder.append_null()
|
59
|
-
}
|
60
|
-
}
|
61
|
-
}
|
48
|
+
for res in values.into_iter() {
|
49
|
+
let value = res;
|
50
|
+
if value.is_nil() {
|
51
|
+
builder.append_null()
|
52
|
+
} else {
|
53
|
+
let v = <T::Native>::try_convert(value)?;
|
54
|
+
builder.append_value(v)
|
62
55
|
}
|
63
56
|
}
|
64
|
-
let ca = builder.finish();
|
65
57
|
|
58
|
+
let ca = builder.finish();
|
66
59
|
let s = ca.into_series();
|
67
|
-
Ok(
|
60
|
+
Ok(s.into())
|
68
61
|
}
|
69
62
|
|
70
63
|
// Init with lists that can contain Nones
|
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
|
|
91
84
|
|
92
85
|
fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
|
93
86
|
let mut val = Vec::with_capacity(arr.len());
|
94
|
-
for v in arr.
|
95
|
-
val.push(Wrap::<AnyValue<'s>>::try_convert(v
|
87
|
+
for v in arr.into_iter() {
|
88
|
+
val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
|
96
89
|
}
|
97
90
|
Ok(val)
|
98
91
|
}
|
99
92
|
|
100
93
|
impl RbSeries {
|
101
|
-
pub fn
|
102
|
-
let
|
103
|
-
let avs = slice_extract_wrapped(&val);
|
94
|
+
pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
95
|
+
let any_values_result = vec_wrap_any_value(values);
|
104
96
|
// from anyvalues is fallible
|
105
|
-
let
|
97
|
+
let result = any_values_result.and_then(|avs| {
|
98
|
+
let avs = slice_extract_wrapped(&avs);
|
99
|
+
let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
|
100
|
+
RbTypeError::new_err(format!(
|
101
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
|
+
))
|
103
|
+
})?;
|
104
|
+
Ok(s.into())
|
105
|
+
});
|
106
|
+
|
107
|
+
// Fall back to Object type for non-strict construction.
|
108
|
+
if !strict && result.is_err() {
|
109
|
+
return Self::new_object(name, values, strict);
|
110
|
+
}
|
111
|
+
|
112
|
+
result
|
113
|
+
}
|
114
|
+
|
115
|
+
pub fn new_from_any_values_and_dtype(
|
116
|
+
name: String,
|
117
|
+
values: RArray,
|
118
|
+
dtype: Wrap<DataType>,
|
119
|
+
strict: bool,
|
120
|
+
) -> RbResult<Self> {
|
121
|
+
let any_values = values
|
122
|
+
.into_iter()
|
123
|
+
.map(|v| rb_object_to_any_value(v, strict))
|
124
|
+
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
+
let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
|
126
|
+
.map_err(|e| {
|
127
|
+
RbTypeError::new_err(format!(
|
128
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
|
+
))
|
130
|
+
})?;
|
106
131
|
Ok(s.into())
|
107
132
|
}
|
108
133
|
|
@@ -125,9 +150,9 @@ impl RbSeries {
|
|
125
150
|
|
126
151
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
127
152
|
let val = val
|
128
|
-
.
|
129
|
-
.map(
|
130
|
-
.collect::<
|
153
|
+
.into_iter()
|
154
|
+
.map(ObjectValue::from)
|
155
|
+
.collect::<Vec<ObjectValue>>();
|
131
156
|
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
132
157
|
Ok(s.into())
|
133
158
|
}
|
@@ -163,15 +188,8 @@ impl RbSeries {
|
|
163
188
|
}
|
164
189
|
}
|
165
190
|
|
166
|
-
pub fn new_decimal(name: String,
|
167
|
-
|
168
|
-
// TODO: do we have to respect 'strict' here? it's possible if we want to
|
169
|
-
let avs = slice_extract_wrapped(&val);
|
170
|
-
// create a fake dtype with a placeholder "none" scale, to be inferred later
|
171
|
-
let dtype = DataType::Decimal(None, None);
|
172
|
-
let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
|
173
|
-
.map_err(RbPolarsErr::from)?;
|
174
|
-
Ok(s.into())
|
191
|
+
pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
192
|
+
Self::new_from_any_values(name, values, strict)
|
175
193
|
}
|
176
194
|
|
177
195
|
pub fn repeat(
|
@@ -36,8 +36,8 @@ impl RbSeries {
|
|
36
36
|
|
37
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
38
38
|
let mut series = Vec::new();
|
39
|
-
for item in rs.
|
40
|
-
series.push(<&RbSeries>::try_convert(item
|
39
|
+
for item in rs.into_iter() {
|
40
|
+
series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
|
41
41
|
}
|
42
42
|
Ok(series)
|
43
43
|
}
|
@@ -247,13 +247,24 @@ impl RbSeries {
|
|
247
247
|
.into())
|
248
248
|
}
|
249
249
|
|
250
|
-
pub fn value_counts(
|
251
|
-
|
250
|
+
pub fn value_counts(
|
251
|
+
&self,
|
252
|
+
sort: bool,
|
253
|
+
parallel: bool,
|
254
|
+
name: String,
|
255
|
+
normalize: bool,
|
256
|
+
) -> RbResult<RbDataFrame> {
|
257
|
+
let out = self
|
252
258
|
.series
|
253
259
|
.borrow()
|
254
|
-
.value_counts(
|
260
|
+
.value_counts(sort, parallel, name, normalize)
|
255
261
|
.map_err(RbPolarsErr::from)?;
|
256
|
-
Ok(
|
262
|
+
Ok(out.into())
|
263
|
+
}
|
264
|
+
|
265
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
266
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
267
|
+
self.series.borrow().slice(offset, length).into()
|
257
268
|
}
|
258
269
|
|
259
270
|
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
@@ -301,10 +312,20 @@ impl RbSeries {
|
|
301
312
|
Ok(s.into())
|
302
313
|
}
|
303
314
|
|
304
|
-
pub fn equals(
|
305
|
-
|
306
|
-
|
307
|
-
|
315
|
+
pub fn equals(
|
316
|
+
&self,
|
317
|
+
other: &RbSeries,
|
318
|
+
check_dtypes: bool,
|
319
|
+
check_names: bool,
|
320
|
+
null_equal: bool,
|
321
|
+
) -> bool {
|
322
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
326
|
+
return false;
|
327
|
+
}
|
328
|
+
if null_equal {
|
308
329
|
self.series.borrow().equals_missing(&other.series.borrow())
|
309
330
|
} else {
|
310
331
|
self.series.borrow().equals(&other.series.borrow())
|
data/ext/polars/src/sql.rs
CHANGED
@@ -37,7 +37,9 @@ impl RbSQLContext {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
-
self.context
|
40
|
+
self.context
|
41
|
+
.borrow_mut()
|
42
|
+
.register(&name, lf.ldf.borrow().clone())
|
41
43
|
}
|
42
44
|
|
43
45
|
pub fn unregister(&self, name: String) {
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|