polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/ext/polars/src/lib.rs
CHANGED
@@ -60,12 +60,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
60
60
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
61
61
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
62
62
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
63
|
-
class.define_singleton_method(
|
63
|
+
class.define_singleton_method(
|
64
|
+
"read_ipc_stream",
|
65
|
+
function!(RbDataFrame::read_ipc_stream, 6),
|
66
|
+
)?;
|
64
67
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
65
68
|
class.define_singleton_method("from_rows", function!(RbDataFrame::from_rows, 3))?;
|
66
69
|
class.define_singleton_method("from_hashes", function!(RbDataFrame::from_hashes, 5))?;
|
67
|
-
class.define_singleton_method("read_json", function!(RbDataFrame::read_json,
|
68
|
-
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson,
|
70
|
+
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 4))?;
|
71
|
+
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
69
72
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
73
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
71
74
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
@@ -135,7 +138,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
135
138
|
class.define_method("equals", method!(RbDataFrame::equals, 2))?;
|
136
139
|
class.define_method("with_row_index", method!(RbDataFrame::with_row_index, 2))?;
|
137
140
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
138
|
-
class.define_method("
|
141
|
+
class.define_method("unpivot", method!(RbDataFrame::unpivot, 4))?;
|
139
142
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
140
143
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
141
144
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
@@ -149,7 +152,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
149
152
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
150
153
|
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
151
154
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
152
|
-
class.define_method("upsample", method!(RbDataFrame::upsample,
|
155
|
+
class.define_method("upsample", method!(RbDataFrame::upsample, 4))?;
|
153
156
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
154
157
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
155
158
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
@@ -202,20 +205,23 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
202
205
|
class.define_method("agg_groups", method!(RbExpr::agg_groups, 0))?;
|
203
206
|
class.define_method("count", method!(RbExpr::count, 0))?;
|
204
207
|
class.define_method("len", method!(RbExpr::len, 0))?;
|
205
|
-
class.define_method("value_counts", method!(RbExpr::value_counts,
|
208
|
+
class.define_method("value_counts", method!(RbExpr::value_counts, 4))?;
|
206
209
|
class.define_method("unique_counts", method!(RbExpr::unique_counts, 0))?;
|
207
210
|
class.define_method("null_count", method!(RbExpr::null_count, 0))?;
|
208
211
|
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
209
212
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
210
213
|
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
211
|
-
class.define_method("top_k", method!(RbExpr::top_k,
|
212
|
-
class.define_method("
|
214
|
+
class.define_method("top_k", method!(RbExpr::top_k, 1))?;
|
215
|
+
class.define_method("top_k_by", method!(RbExpr::top_k_by, 3))?;
|
216
|
+
class.define_method("bottom_k", method!(RbExpr::bottom_k, 1))?;
|
217
|
+
class.define_method("bottom_k_by", method!(RbExpr::bottom_k_by, 3))?;
|
213
218
|
class.define_method("peak_min", method!(RbExpr::peak_min, 0))?;
|
214
219
|
class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
|
215
220
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
216
221
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
217
222
|
class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
|
218
223
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
224
|
+
class.define_method("get", method!(RbExpr::get, 1))?;
|
219
225
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
220
226
|
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
221
227
|
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
@@ -233,6 +239,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
233
239
|
class.define_method("std", method!(RbExpr::std, 1))?;
|
234
240
|
class.define_method("var", method!(RbExpr::var, 1))?;
|
235
241
|
class.define_method("is_unique", method!(RbExpr::is_unique, 0))?;
|
242
|
+
class.define_method("is_between", method!(RbExpr::is_between, 3))?;
|
236
243
|
class.define_method("approx_n_unique", method!(RbExpr::approx_n_unique, 0))?;
|
237
244
|
class.define_method("is_first_distinct", method!(RbExpr::is_first_distinct, 0))?;
|
238
245
|
class.define_method("is_last_distinct", method!(RbExpr::is_last_distinct, 0))?;
|
@@ -290,7 +297,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
290
297
|
class.define_method("str_strip_prefix", method!(RbExpr::str_strip_prefix, 1))?;
|
291
298
|
class.define_method("str_strip_suffix", method!(RbExpr::str_strip_suffix, 1))?;
|
292
299
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
293
|
-
class.define_method("str_explode", method!(RbExpr::str_explode, 0))?;
|
294
300
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
295
301
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
296
302
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
@@ -364,24 +370,24 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
364
370
|
class.define_method("list_len", method!(RbExpr::list_len, 0))?;
|
365
371
|
class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
|
366
372
|
class.define_method("list_count_matches", method!(RbExpr::list_count_matches, 1))?;
|
367
|
-
class.define_method("
|
373
|
+
class.define_method("dt_year", method!(RbExpr::dt_year, 0))?;
|
368
374
|
class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
|
369
|
-
class.define_method("
|
370
|
-
class.define_method("
|
371
|
-
class.define_method("
|
372
|
-
class.define_method("
|
373
|
-
class.define_method("
|
374
|
-
class.define_method("
|
375
|
-
class.define_method("
|
375
|
+
class.define_method("dt_iso_year", method!(RbExpr::dt_iso_year, 0))?;
|
376
|
+
class.define_method("dt_quarter", method!(RbExpr::dt_quarter, 0))?;
|
377
|
+
class.define_method("dt_month", method!(RbExpr::dt_month, 0))?;
|
378
|
+
class.define_method("dt_week", method!(RbExpr::dt_week, 0))?;
|
379
|
+
class.define_method("dt_weekday", method!(RbExpr::dt_weekday, 0))?;
|
380
|
+
class.define_method("dt_day", method!(RbExpr::dt_day, 0))?;
|
381
|
+
class.define_method("dt_ordinal_day", method!(RbExpr::dt_ordinal_day, 0))?;
|
376
382
|
class.define_method("dt_time", method!(RbExpr::dt_time, 0))?;
|
377
383
|
class.define_method("dt_date", method!(RbExpr::dt_date, 0))?;
|
378
384
|
class.define_method("dt_datetime", method!(RbExpr::dt_datetime, 0))?;
|
379
|
-
class.define_method("
|
380
|
-
class.define_method("
|
381
|
-
class.define_method("
|
382
|
-
class.define_method("
|
383
|
-
class.define_method("
|
384
|
-
class.define_method("
|
385
|
+
class.define_method("dt_hour", method!(RbExpr::dt_hour, 0))?;
|
386
|
+
class.define_method("dt_minute", method!(RbExpr::dt_minute, 0))?;
|
387
|
+
class.define_method("dt_second", method!(RbExpr::dt_second, 0))?;
|
388
|
+
class.define_method("dt_millisecond", method!(RbExpr::dt_millisecond, 0))?;
|
389
|
+
class.define_method("dt_microsecond", method!(RbExpr::dt_microsecond, 0))?;
|
390
|
+
class.define_method("dt_nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
|
385
391
|
class.define_method("dt_total_days", method!(RbExpr::dt_total_days, 0))?;
|
386
392
|
class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours, 0))?;
|
387
393
|
class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes, 0))?;
|
@@ -398,7 +404,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
398
404
|
"dt_total_milliseconds",
|
399
405
|
method!(RbExpr::dt_total_milliseconds, 0),
|
400
406
|
)?;
|
401
|
-
class.define_method("
|
407
|
+
class.define_method("dt_timestamp", method!(RbExpr::dt_timestamp, 1))?;
|
408
|
+
class.define_method("dt_to_string", method!(RbExpr::dt_to_string, 1))?;
|
402
409
|
class.define_method("dt_offset_by", method!(RbExpr::dt_offset_by, 1))?;
|
403
410
|
class.define_method("dt_epoch_seconds", method!(RbExpr::dt_epoch_seconds, 0))?;
|
404
411
|
class.define_method("dt_with_time_unit", method!(RbExpr::dt_with_time_unit, 1))?;
|
@@ -411,12 +418,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
411
418
|
"dt_replace_time_zone",
|
412
419
|
method!(RbExpr::dt_replace_time_zone, 3),
|
413
420
|
)?;
|
414
|
-
class.define_method("dt_truncate", method!(RbExpr::dt_truncate,
|
421
|
+
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 1))?;
|
415
422
|
class.define_method("dt_month_start", method!(RbExpr::dt_month_start, 0))?;
|
416
423
|
class.define_method("dt_month_end", method!(RbExpr::dt_month_end, 0))?;
|
417
424
|
class.define_method("dt_base_utc_offset", method!(RbExpr::dt_base_utc_offset, 0))?;
|
418
425
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
419
|
-
class.define_method("dt_round", method!(RbExpr::dt_round,
|
426
|
+
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
420
427
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
421
428
|
class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
|
422
429
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
@@ -479,8 +486,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
479
486
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
480
487
|
class.define_method("skew", method!(RbExpr::skew, 1))?;
|
481
488
|
class.define_method("kurtosis", method!(RbExpr::kurtosis, 2))?;
|
482
|
-
class.define_method("
|
483
|
-
class.define_method("cat_set_ordering", method!(RbExpr::cat_set_ordering, 1))?;
|
489
|
+
class.define_method("str_join", method!(RbExpr::str_join, 2))?;
|
484
490
|
class.define_method("cat_get_categories", method!(RbExpr::cat_get_categories, 0))?;
|
485
491
|
class.define_method("reshape", method!(RbExpr::reshape, 1))?;
|
486
492
|
class.define_method("cum_count", method!(RbExpr::cum_count, 1))?;
|
@@ -512,7 +518,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
512
518
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
513
519
|
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
514
520
|
class.define_method("set_sorted_flag", method!(RbExpr::set_sorted_flag, 1))?;
|
515
|
-
class.define_method("replace", method!(RbExpr::replace,
|
521
|
+
class.define_method("replace", method!(RbExpr::replace, 2))?;
|
522
|
+
class.define_method("replace_strict", method!(RbExpr::replace_strict, 4))?;
|
516
523
|
|
517
524
|
// meta
|
518
525
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -545,7 +552,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
545
552
|
|
546
553
|
// maybe add to different class
|
547
554
|
let class = module.define_module("Plr")?;
|
548
|
-
class.define_singleton_method("dtype_cols", function!(functions::lazy::
|
555
|
+
class.define_singleton_method("dtype_cols", function!(functions::lazy::dtype_cols, 1))?;
|
556
|
+
class.define_singleton_method("index_cols", function!(functions::lazy::index_cols, 1))?;
|
549
557
|
class.define_singleton_method("col", function!(functions::lazy::col, 1))?;
|
550
558
|
class.define_singleton_method("len", function!(functions::lazy::len, 0))?;
|
551
559
|
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
@@ -624,8 +632,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
624
632
|
function!(functions::io::read_parquet_schema, 1),
|
625
633
|
)?;
|
626
634
|
class.define_singleton_method("collect_all", function!(functions::lazy::collect_all, 1))?;
|
627
|
-
class.define_singleton_method("date_range", function!(functions::range::date_range,
|
628
|
-
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges,
|
635
|
+
class.define_singleton_method("date_range", function!(functions::range::date_range, 4))?;
|
636
|
+
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges, 4))?;
|
629
637
|
class.define_singleton_method(
|
630
638
|
"datetime_range",
|
631
639
|
function!(functions::range::datetime_range, 6),
|
@@ -714,7 +722,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
714
722
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
715
723
|
class.define_singleton_method(
|
716
724
|
"new_from_parquet",
|
717
|
-
function!(RbLazyFrame::new_from_parquet,
|
725
|
+
function!(RbLazyFrame::new_from_parquet, 13),
|
718
726
|
)?;
|
719
727
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
720
728
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -733,17 +741,17 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
733
741
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
734
742
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
|
735
743
|
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 3))?;
|
736
|
-
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv,
|
744
|
+
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
737
745
|
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 2))?;
|
738
746
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
739
747
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
740
748
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
741
749
|
class.define_method("select_seq", method!(RbLazyFrame::select_seq, 1))?;
|
742
750
|
class.define_method("group_by", method!(RbLazyFrame::group_by, 2))?;
|
743
|
-
class.define_method("rolling", method!(RbLazyFrame::rolling,
|
751
|
+
class.define_method("rolling", method!(RbLazyFrame::rolling, 5))?;
|
744
752
|
class.define_method(
|
745
753
|
"group_by_dynamic",
|
746
|
-
method!(RbLazyFrame::group_by_dynamic,
|
754
|
+
method!(RbLazyFrame::group_by_dynamic, 9),
|
747
755
|
)?;
|
748
756
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
749
757
|
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
@@ -772,16 +780,13 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
772
780
|
class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
|
773
781
|
class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
|
774
782
|
class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
|
775
|
-
class.define_method("
|
783
|
+
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 5))?;
|
776
784
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
777
785
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
778
786
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
779
787
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
780
|
-
class.define_method("
|
781
|
-
class.define_method("dtypes", method!(RbLazyFrame::dtypes, 0))?;
|
782
|
-
class.define_method("schema", method!(RbLazyFrame::schema, 0))?;
|
788
|
+
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
783
789
|
class.define_method("unnest", method!(RbLazyFrame::unnest, 1))?;
|
784
|
-
class.define_method("width", method!(RbLazyFrame::width, 0))?;
|
785
790
|
class.define_method("count", method!(RbLazyFrame::count, 0))?;
|
786
791
|
class.define_method("merge_sorted", method!(RbLazyFrame::merge_sorted, 2))?;
|
787
792
|
|
@@ -803,8 +808,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
803
808
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
804
809
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
805
810
|
class.define_singleton_method(
|
806
|
-
"
|
807
|
-
function!(RbSeries::
|
811
|
+
"new_from_any_values",
|
812
|
+
function!(RbSeries::new_from_any_values, 3),
|
813
|
+
)?;
|
814
|
+
class.define_singleton_method(
|
815
|
+
"new_from_any_values_and_dtype",
|
816
|
+
function!(RbSeries::new_from_any_values_and_dtype, 4),
|
808
817
|
)?;
|
809
818
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
810
819
|
class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
|
@@ -861,7 +870,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
861
870
|
class.define_method("div", method!(RbSeries::div, 1))?;
|
862
871
|
class.define_method("rem", method!(RbSeries::rem, 1))?;
|
863
872
|
class.define_method("sort", method!(RbSeries::sort, 3))?;
|
864
|
-
class.define_method("value_counts", method!(RbSeries::value_counts,
|
873
|
+
class.define_method("value_counts", method!(RbSeries::value_counts, 4))?;
|
874
|
+
class.define_method("slice", method!(RbSeries::slice, 2))?;
|
865
875
|
class.define_method("any", method!(RbSeries::any, 1))?;
|
866
876
|
class.define_method("all", method!(RbSeries::all, 1))?;
|
867
877
|
class.define_method("arg_min", method!(RbSeries::arg_min, 0))?;
|
@@ -871,7 +881,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
871
881
|
class.define_method("has_validity", method!(RbSeries::has_validity, 0))?;
|
872
882
|
class.define_method("sample_n", method!(RbSeries::sample_n, 4))?;
|
873
883
|
class.define_method("sample_frac", method!(RbSeries::sample_frac, 4))?;
|
874
|
-
class.define_method("equals", method!(RbSeries::equals,
|
884
|
+
class.define_method("equals", method!(RbSeries::equals, 4))?;
|
875
885
|
class.define_method("eq", method!(RbSeries::eq, 1))?;
|
876
886
|
class.define_method("neq", method!(RbSeries::neq, 1))?;
|
877
887
|
class.define_method("gt", method!(RbSeries::gt, 1))?;
|
@@ -255,8 +255,8 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
255
255
|
match RArray::try_convert(val).ok() {
|
256
256
|
Some(tuple) => {
|
257
257
|
row_buf.0.clear();
|
258
|
-
for v in tuple.
|
259
|
-
let v = Wrap::<AnyValue>::try_convert(v
|
258
|
+
for v in tuple.into_iter() {
|
259
|
+
let v = Wrap::<AnyValue>::try_convert(v).unwrap().0;
|
260
260
|
row_buf.0.push(v);
|
261
261
|
}
|
262
262
|
let ptr = &row_buf as *const Row;
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{value::Lazy, Module, RClass, RModule, Ruby};
|
1
|
+
use magnus::{value::Lazy, ExceptionClass, Module, RClass, RModule, Ruby};
|
2
2
|
|
3
3
|
static POLARS: Lazy<RModule> = Lazy::new(|ruby| ruby.class_object().const_get("Polars").unwrap());
|
4
4
|
|
@@ -38,3 +38,27 @@ static DATETIME: Lazy<RClass> =
|
|
38
38
|
pub(crate) fn datetime() -> RClass {
|
39
39
|
Ruby::get().unwrap().get_inner(&DATETIME)
|
40
40
|
}
|
41
|
+
|
42
|
+
static ERROR: Lazy<ExceptionClass> =
|
43
|
+
Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("Error").unwrap());
|
44
|
+
|
45
|
+
pub(crate) fn error() -> ExceptionClass {
|
46
|
+
Ruby::get().unwrap().get_inner(&ERROR)
|
47
|
+
}
|
48
|
+
|
49
|
+
static COMPUTE_ERROR: Lazy<ExceptionClass> =
|
50
|
+
Lazy::new(|ruby| ruby.get_inner(&POLARS).const_get("ComputeError").unwrap());
|
51
|
+
|
52
|
+
pub(crate) fn compute_error() -> ExceptionClass {
|
53
|
+
Ruby::get().unwrap().get_inner(&COMPUTE_ERROR)
|
54
|
+
}
|
55
|
+
|
56
|
+
static INVALID_OPERATION_ERROR: Lazy<ExceptionClass> = Lazy::new(|ruby| {
|
57
|
+
ruby.get_inner(&POLARS)
|
58
|
+
.const_get("InvalidOperationError")
|
59
|
+
.unwrap()
|
60
|
+
});
|
61
|
+
|
62
|
+
pub(crate) fn invalid_operation_error() -> ExceptionClass {
|
63
|
+
Ruby::get().unwrap().get_inner(&INVALID_OPERATION_ERROR)
|
64
|
+
}
|
@@ -54,7 +54,8 @@ impl RbSeries {
|
|
54
54
|
.as_any_value(),
|
55
55
|
)
|
56
56
|
.into_value()),
|
57
|
-
|
57
|
+
// For non-numeric output types we require mean_reduce.
|
58
|
+
dt if dt.is_temporal() => {
|
58
59
|
Ok(Wrap(self.series.borrow().mean_reduce().as_any_value()).into_value())
|
59
60
|
}
|
60
61
|
_ => Ok(self.series.borrow().mean().into_value()),
|
@@ -73,7 +74,8 @@ impl RbSeries {
|
|
73
74
|
.as_any_value(),
|
74
75
|
)
|
75
76
|
.into_value()),
|
76
|
-
|
77
|
+
// For non-numeric output types we require median_reduce.
|
78
|
+
dt if dt.is_temporal() => Ok(Wrap(
|
77
79
|
self.series
|
78
80
|
.borrow()
|
79
81
|
.median_reduce()
|
@@ -1,24 +1,34 @@
|
|
1
|
-
use crate::{RbResult, RbSeries};
|
1
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
2
2
|
|
3
3
|
impl RbSeries {
|
4
|
-
pub fn add(&self, other: &RbSeries) -> Self {
|
5
|
-
(&*self.series.borrow() + &*other.series.borrow())
|
4
|
+
pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
|
5
|
+
(&*self.series.borrow() + &*other.series.borrow())
|
6
|
+
.map(Into::into)
|
7
|
+
.map_err(RbPolarsErr::from)
|
6
8
|
}
|
7
9
|
|
8
|
-
pub fn sub(&self, other: &RbSeries) -> Self {
|
9
|
-
(&*self.series.borrow() - &*other.series.borrow())
|
10
|
+
pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
|
11
|
+
(&*self.series.borrow() - &*other.series.borrow())
|
12
|
+
.map(Into::into)
|
13
|
+
.map_err(RbPolarsErr::from)
|
10
14
|
}
|
11
15
|
|
12
|
-
pub fn mul(&self, other: &RbSeries) -> Self {
|
13
|
-
(&*self.series.borrow() * &*other.series.borrow())
|
16
|
+
pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
|
17
|
+
(&*self.series.borrow() * &*other.series.borrow())
|
18
|
+
.map(Into::into)
|
19
|
+
.map_err(RbPolarsErr::from)
|
14
20
|
}
|
15
21
|
|
16
|
-
pub fn div(&self, other: &RbSeries) -> Self {
|
17
|
-
(&*self.series.borrow() / &*other.series.borrow())
|
22
|
+
pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
|
23
|
+
(&*self.series.borrow() / &*other.series.borrow())
|
24
|
+
.map(Into::into)
|
25
|
+
.map_err(RbPolarsErr::from)
|
18
26
|
}
|
19
27
|
|
20
|
-
pub fn rem(&self, other: &RbSeries) -> Self {
|
21
|
-
(&*self.series.borrow() % &*other.series.borrow())
|
28
|
+
pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
|
29
|
+
(&*self.series.borrow() % &*other.series.borrow())
|
30
|
+
.map(Into::into)
|
31
|
+
.map_err(RbPolarsErr::from)
|
22
32
|
}
|
23
33
|
}
|
24
34
|
|
@@ -1,10 +1,11 @@
|
|
1
1
|
use magnus::{prelude::*, RArray};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
+
use crate::any_value::rb_object_to_any_value;
|
4
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
5
6
|
use crate::prelude::ObjectValue;
|
6
7
|
use crate::series::to_series_collection;
|
7
|
-
use crate::{RbPolarsErr, RbResult, RbSeries, RbValueError};
|
8
|
+
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
8
9
|
|
9
10
|
impl RbSeries {
|
10
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
@@ -35,36 +36,28 @@ impl RbSeries {
|
|
35
36
|
}
|
36
37
|
}
|
37
38
|
|
38
|
-
fn new_primitive<T>(name: &str,
|
39
|
+
fn new_primitive<T>(name: &str, values: RArray, _strict: bool) -> RbResult<RbSeries>
|
39
40
|
where
|
40
41
|
T: PolarsNumericType,
|
41
42
|
ChunkedArray<T>: IntoSeries,
|
42
43
|
T::Native: magnus::TryConvert,
|
43
44
|
{
|
44
|
-
let len =
|
45
|
+
let len = values.len();
|
45
46
|
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
Err(e) => {
|
55
|
-
if strict {
|
56
|
-
return Err(e);
|
57
|
-
}
|
58
|
-
builder.append_null()
|
59
|
-
}
|
60
|
-
}
|
61
|
-
}
|
48
|
+
for res in values.into_iter() {
|
49
|
+
let value = res;
|
50
|
+
if value.is_nil() {
|
51
|
+
builder.append_null()
|
52
|
+
} else {
|
53
|
+
let v = <T::Native>::try_convert(value)?;
|
54
|
+
builder.append_value(v)
|
62
55
|
}
|
63
56
|
}
|
64
|
-
let ca = builder.finish();
|
65
57
|
|
58
|
+
let ca = builder.finish();
|
66
59
|
let s = ca.into_series();
|
67
|
-
Ok(
|
60
|
+
Ok(s.into())
|
68
61
|
}
|
69
62
|
|
70
63
|
// Init with lists that can contain Nones
|
@@ -91,18 +84,50 @@ init_method_opt!(new_opt_f64, Float64Type, f64);
|
|
91
84
|
|
92
85
|
fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
|
93
86
|
let mut val = Vec::with_capacity(arr.len());
|
94
|
-
for v in arr.
|
95
|
-
val.push(Wrap::<AnyValue<'s>>::try_convert(v
|
87
|
+
for v in arr.into_iter() {
|
88
|
+
val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
|
96
89
|
}
|
97
90
|
Ok(val)
|
98
91
|
}
|
99
92
|
|
100
93
|
impl RbSeries {
|
101
|
-
pub fn
|
102
|
-
let
|
103
|
-
let avs = slice_extract_wrapped(&val);
|
94
|
+
pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
95
|
+
let any_values_result = vec_wrap_any_value(values);
|
104
96
|
// from anyvalues is fallible
|
105
|
-
let
|
97
|
+
let result = any_values_result.and_then(|avs| {
|
98
|
+
let avs = slice_extract_wrapped(&avs);
|
99
|
+
let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
|
100
|
+
RbTypeError::new_err(format!(
|
101
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
|
+
))
|
103
|
+
})?;
|
104
|
+
Ok(s.into())
|
105
|
+
});
|
106
|
+
|
107
|
+
// Fall back to Object type for non-strict construction.
|
108
|
+
if !strict && result.is_err() {
|
109
|
+
return Self::new_object(name, values, strict);
|
110
|
+
}
|
111
|
+
|
112
|
+
result
|
113
|
+
}
|
114
|
+
|
115
|
+
pub fn new_from_any_values_and_dtype(
|
116
|
+
name: String,
|
117
|
+
values: RArray,
|
118
|
+
dtype: Wrap<DataType>,
|
119
|
+
strict: bool,
|
120
|
+
) -> RbResult<Self> {
|
121
|
+
let any_values = values
|
122
|
+
.into_iter()
|
123
|
+
.map(|v| rb_object_to_any_value(v, strict))
|
124
|
+
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
+
let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
|
126
|
+
.map_err(|e| {
|
127
|
+
RbTypeError::new_err(format!(
|
128
|
+
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
|
+
))
|
130
|
+
})?;
|
106
131
|
Ok(s.into())
|
107
132
|
}
|
108
133
|
|
@@ -125,9 +150,9 @@ impl RbSeries {
|
|
125
150
|
|
126
151
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
127
152
|
let val = val
|
128
|
-
.
|
129
|
-
.map(
|
130
|
-
.collect::<
|
153
|
+
.into_iter()
|
154
|
+
.map(ObjectValue::from)
|
155
|
+
.collect::<Vec<ObjectValue>>();
|
131
156
|
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
132
157
|
Ok(s.into())
|
133
158
|
}
|
@@ -163,15 +188,8 @@ impl RbSeries {
|
|
163
188
|
}
|
164
189
|
}
|
165
190
|
|
166
|
-
pub fn new_decimal(name: String,
|
167
|
-
|
168
|
-
// TODO: do we have to respect 'strict' here? it's possible if we want to
|
169
|
-
let avs = slice_extract_wrapped(&val);
|
170
|
-
// create a fake dtype with a placeholder "none" scale, to be inferred later
|
171
|
-
let dtype = DataType::Decimal(None, None);
|
172
|
-
let s = Series::from_any_values_and_dtype(&name, avs, &dtype, strict)
|
173
|
-
.map_err(RbPolarsErr::from)?;
|
174
|
-
Ok(s.into())
|
191
|
+
pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
|
192
|
+
Self::new_from_any_values(name, values, strict)
|
175
193
|
}
|
176
194
|
|
177
195
|
pub fn repeat(
|
@@ -36,8 +36,8 @@ impl RbSeries {
|
|
36
36
|
|
37
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
38
38
|
let mut series = Vec::new();
|
39
|
-
for item in rs.
|
40
|
-
series.push(<&RbSeries>::try_convert(item
|
39
|
+
for item in rs.into_iter() {
|
40
|
+
series.push(<&RbSeries>::try_convert(item)?.series.borrow().clone());
|
41
41
|
}
|
42
42
|
Ok(series)
|
43
43
|
}
|
@@ -247,13 +247,24 @@ impl RbSeries {
|
|
247
247
|
.into())
|
248
248
|
}
|
249
249
|
|
250
|
-
pub fn value_counts(
|
251
|
-
|
250
|
+
pub fn value_counts(
|
251
|
+
&self,
|
252
|
+
sort: bool,
|
253
|
+
parallel: bool,
|
254
|
+
name: String,
|
255
|
+
normalize: bool,
|
256
|
+
) -> RbResult<RbDataFrame> {
|
257
|
+
let out = self
|
252
258
|
.series
|
253
259
|
.borrow()
|
254
|
-
.value_counts(
|
260
|
+
.value_counts(sort, parallel, name, normalize)
|
255
261
|
.map_err(RbPolarsErr::from)?;
|
256
|
-
Ok(
|
262
|
+
Ok(out.into())
|
263
|
+
}
|
264
|
+
|
265
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
266
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
267
|
+
self.series.borrow().slice(offset, length).into()
|
257
268
|
}
|
258
269
|
|
259
270
|
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
@@ -301,10 +312,20 @@ impl RbSeries {
|
|
301
312
|
Ok(s.into())
|
302
313
|
}
|
303
314
|
|
304
|
-
pub fn equals(
|
305
|
-
|
306
|
-
|
307
|
-
|
315
|
+
pub fn equals(
|
316
|
+
&self,
|
317
|
+
other: &RbSeries,
|
318
|
+
check_dtypes: bool,
|
319
|
+
check_names: bool,
|
320
|
+
null_equal: bool,
|
321
|
+
) -> bool {
|
322
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
323
|
+
return false;
|
324
|
+
}
|
325
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
326
|
+
return false;
|
327
|
+
}
|
328
|
+
if null_equal {
|
308
329
|
self.series.borrow().equals_missing(&other.series.borrow())
|
309
330
|
} else {
|
310
331
|
self.series.borrow().equals(&other.series.borrow())
|
data/ext/polars/src/sql.rs
CHANGED
@@ -37,7 +37,9 @@ impl RbSQLContext {
|
|
37
37
|
}
|
38
38
|
|
39
39
|
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
-
self.context
|
40
|
+
self.context
|
41
|
+
.borrow_mut()
|
42
|
+
.register(&name, lf.ldf.borrow().clone())
|
41
43
|
}
|
42
44
|
|
43
45
|
pub fn unregister(&self, name: String) {
|
data/lib/polars/array_expr.rb
CHANGED
@@ -358,7 +358,7 @@ module Polars
|
|
358
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
359
359
|
# # └───────────────┴─────┴──────┘
|
360
360
|
def get(index, null_on_oob: true)
|
361
|
-
index = Utils.
|
361
|
+
index = Utils.parse_into_expression(index)
|
362
362
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
363
363
|
end
|
364
364
|
|
@@ -446,7 +446,7 @@ module Polars
|
|
446
446
|
# # │ ["x", "y"] ┆ _ ┆ x_y │
|
447
447
|
# # └───────────────┴───────────┴──────┘
|
448
448
|
def join(separator, ignore_nulls: true)
|
449
|
-
separator = Utils.
|
449
|
+
separator = Utils.parse_into_expression(separator, str_as_lit: true)
|
450
450
|
Utils.wrap_expr(_rbexpr.arr_join(separator, ignore_nulls))
|
451
451
|
end
|
452
452
|
|
@@ -502,7 +502,7 @@ module Polars
|
|
502
502
|
# # │ ["a", "c"] ┆ true │
|
503
503
|
# # └───────────────┴──────────┘
|
504
504
|
def contains(item)
|
505
|
-
item = Utils.
|
505
|
+
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
506
|
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
507
507
|
end
|
508
508
|
|
@@ -530,7 +530,7 @@ module Polars
|
|
530
530
|
# # │ [2, 2] ┆ 2 │
|
531
531
|
# # └───────────────┴────────────────┘
|
532
532
|
def count_matches(element)
|
533
|
-
element = Utils.
|
533
|
+
element = Utils.parse_into_expression(element, str_as_lit: true)
|
534
534
|
Utils.wrap_expr(_rbexpr.arr_count_matches(element))
|
535
535
|
end
|
536
536
|
end
|