polars-df 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
1
+ use magnus::Value;
2
+ use polars::lazy::frame::LazyFrame;
3
+ use polars::prelude::*;
4
+ use std::io::Read;
5
+
6
+ use crate::file::get_file_like;
7
+ use crate::{RbLazyFrame, RbResult, RbValueError};
8
+
9
+ impl RbLazyFrame {
10
+ // TODO change to serialize_json
11
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
12
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
13
+ // so don't bother with files.
14
+ let mut json = String::new();
15
+ let _ = get_file_like(rb_f, false)?
16
+ .read_to_string(&mut json)
17
+ .unwrap();
18
+
19
+ // Safety
20
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
21
+ // so we actually don't have a lifetime at all when serializing.
22
+
23
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
24
+ // in this scope
25
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
+
27
+ let lp = serde_json::from_str::<DslPlan>(json)
28
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
29
+ Ok(LazyFrame::from(lp).into())
30
+ }
31
+ }
@@ -3,6 +3,7 @@ mod batched_csv;
3
3
  mod conversion;
4
4
  mod dataframe;
5
5
  mod error;
6
+ mod exceptions;
6
7
  mod expr;
7
8
  mod file;
8
9
  mod functions;
@@ -21,21 +22,31 @@ mod utils;
21
22
  use batched_csv::RbBatchedCsv;
22
23
  use conversion::*;
23
24
  use dataframe::RbDataFrame;
24
- use error::{RbPolarsErr, RbTypeError, RbValueError};
25
+ use error::RbPolarsErr;
26
+ use exceptions::{RbTypeError, RbValueError};
25
27
  use expr::rb_exprs_to_exprs;
26
28
  use expr::RbExpr;
27
29
  use functions::string_cache::RbStringCacheHolder;
28
30
  use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
31
+ use interop::arrow::to_ruby::RbArrowArrayStream;
29
32
  use lazyframe::RbLazyFrame;
30
33
  use lazygroupby::RbLazyGroupBy;
31
- use magnus::{define_module, function, method, prelude::*, Error, Ruby};
34
+ use magnus::{define_module, function, method, prelude::*, Ruby};
32
35
  use series::RbSeries;
33
36
  use sql::RbSQLContext;
34
37
 
35
- type RbResult<T> = Result<T, Error>;
38
+ use magnus::error::Result as RbResult;
39
+ use magnus::Error as RbErr;
40
+
41
+ // TODO move
42
+ fn re_escape(pattern: String) -> String {
43
+ regex::escape(&pattern)
44
+ }
36
45
 
37
46
  #[magnus::init]
38
47
  fn init(ruby: &Ruby) -> RbResult<()> {
48
+ crate::on_startup::register_startup_deps();
49
+
39
50
  let module = define_module("Polars")?;
40
51
 
41
52
  let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
45
56
  let class = module.define_class("RbDataFrame", ruby.class_object())?;
46
57
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
47
58
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
48
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
49
59
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
50
60
  class.define_singleton_method(
51
61
  "read_ipc_stream",
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
58
68
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
59
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
60
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
61
- class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
71
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
62
72
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
63
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
64
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
65
- class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
75
+ class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
66
76
  class.define_method(
67
77
  "write_ipc_stream",
68
- method!(RbDataFrame::write_ipc_stream, 2),
78
+ method!(RbDataFrame::write_ipc_stream, 3),
69
79
  )?;
70
80
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
81
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
82
+ class.define_method(
83
+ "arrow_c_stream",
84
+ method!(RbDataFrame::__arrow_c_stream__, 0),
85
+ )?;
72
86
  class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
73
87
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
74
88
  class.define_method("add", method!(RbDataFrame::add, 1))?;
@@ -129,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
129
143
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
130
144
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
131
145
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
132
- class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
133
- class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
134
- class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
135
- class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
136
146
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
137
147
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
138
148
  class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
@@ -143,6 +153,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
153
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
144
154
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
145
155
  class.define_method("clear", method!(RbDataFrame::clear, 0))?;
156
+ class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
146
157
 
147
158
  let class = module.define_class("RbExpr", ruby.class_object())?;
148
159
  class.define_method("+", method!(RbExpr::add, 1))?;
@@ -286,6 +297,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
286
297
  class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
287
298
  class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
288
299
  class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
300
+ // class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
289
301
  class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
290
302
  class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
291
303
  class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
@@ -412,7 +424,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
412
424
  class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
413
425
  class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
414
426
  class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
415
- class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
427
+ class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
416
428
  class.define_method("dot", method!(RbExpr::dot, 1))?;
417
429
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
418
430
  class.define_method("mode", method!(RbExpr::mode, 0))?;
@@ -552,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
552
564
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
553
565
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
554
566
  class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
555
- class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 3))?;
567
+ class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
556
568
  class.define_singleton_method(
557
569
  "spearman_rank_corr",
558
- function!(functions::lazy::spearman_rank_corr, 4),
570
+ function!(functions::lazy::spearman_rank_corr, 3),
559
571
  )?;
560
572
  class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
561
573
  class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
@@ -567,6 +579,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
567
579
  class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
568
580
  class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
569
581
  class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
582
+ class.define_singleton_method(
583
+ "business_day_count",
584
+ function!(functions::business::business_day_count, 4),
585
+ )?;
570
586
  class.define_singleton_method(
571
587
  "all_horizontal",
572
588
  function!(functions::aggregation::all_horizontal, 1),
@@ -585,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
585
601
  )?;
586
602
  class.define_singleton_method(
587
603
  "sum_horizontal",
588
- function!(functions::aggregation::sum_horizontal, 1),
604
+ function!(functions::aggregation::sum_horizontal, 2),
589
605
  )?;
590
606
  class.define_singleton_method(
591
607
  "mean_horizontal",
592
- function!(functions::aggregation::mean_horizontal, 1),
608
+ function!(functions::aggregation::mean_horizontal, 2),
593
609
  )?;
594
610
  class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
595
611
  class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
@@ -640,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
640
656
  function!(functions::meta::get_index_type, 0),
641
657
  )?;
642
658
  class.define_singleton_method(
643
- "threadpool_size",
644
- function!(functions::meta::threadpool_size, 0),
659
+ "thread_pool_size",
660
+ function!(functions::meta::thread_pool_size, 0),
645
661
  )?;
646
662
  class.define_singleton_method(
647
663
  "enable_string_cache",
@@ -699,19 +715,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
699
715
  "set_random_seed",
700
716
  function!(functions::random::set_random_seed, 1),
701
717
  )?;
718
+ class.define_singleton_method("re_escape", function!(re_escape, 1))?;
702
719
 
703
720
  let class = module.define_class("RbLazyFrame", ruby.class_object())?;
704
721
  class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
705
722
  class.define_singleton_method(
706
723
  "new_from_ndjson",
707
- function!(RbLazyFrame::new_from_ndjson, 7),
724
+ function!(RbLazyFrame::new_from_ndjson, 8),
708
725
  )?;
709
726
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
710
727
  class.define_singleton_method(
711
728
  "new_from_parquet",
712
- function!(RbLazyFrame::new_from_parquet, 14),
729
+ function!(RbLazyFrame::new_from_parquet, -1),
713
730
  )?;
714
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
731
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
715
732
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
716
733
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
717
734
  class.define_method(
@@ -726,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
726
743
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
727
744
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
728
745
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
729
- class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
730
- class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 3))?;
746
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
747
+ class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
731
748
  class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
732
- class.define_method("sink_json", method!(RbLazyFrame::sink_json, 2))?;
749
+ class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
733
750
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
734
751
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
735
752
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -741,15 +758,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
741
758
  method!(RbLazyFrame::group_by_dynamic, 9),
742
759
  )?;
743
760
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
744
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
745
- class.define_method("join", method!(RbLazyFrame::join, 8))?;
761
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
762
+ class.define_method("join", method!(RbLazyFrame::join, 10))?;
746
763
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
747
764
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
748
765
  class.define_method(
749
766
  "with_columns_seq",
750
767
  method!(RbLazyFrame::with_columns_seq, 1),
751
768
  )?;
752
- class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
769
+ class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
753
770
  class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
754
771
  class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
755
772
  class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
@@ -770,6 +787,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
770
787
  class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
771
788
  class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
772
789
  class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
790
+ class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
773
791
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
774
792
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
775
793
  class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
@@ -810,7 +828,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
810
828
  class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
811
829
  class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
812
830
  class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
831
+ class.define_singleton_method(
832
+ "from_arrow_c_stream",
833
+ function!(RbSeries::from_arrow_c_stream, 1),
834
+ )?;
813
835
  class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
836
+ class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
814
837
  class.define_method(
815
838
  "is_sorted_flag",
816
839
  method!(RbSeries::is_sorted_ascending_flag, 0),
@@ -1081,5 +1104,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1081
1104
  let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
1082
1105
  class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
1083
1106
 
1107
+ // arrow array stream
1108
+ let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1109
+ class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1110
+
1084
1111
  Ok(())
1085
1112
  }
@@ -7,13 +7,16 @@ use super::*;
7
7
  use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
8
8
 
9
9
  fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
10
- df.get_columns().iter().map(|s| s.iter()).collect()
10
+ df.get_columns()
11
+ .iter()
12
+ .map(|s| s.as_materialized_series().iter())
13
+ .collect()
11
14
  }
12
15
 
13
16
  fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
14
17
  df.get_columns()
15
18
  .iter()
16
- .map(|s| s.iter().skip(skip))
19
+ .map(|s| s.as_materialized_series().iter().skip(skip))
17
20
  .collect()
18
21
  }
19
22
 
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
113
116
  true,
114
117
  ));
115
118
  } else if out.is_kind_of(class::array()) {
116
- return Err(RbPolarsErr::other(
119
+ return Err(RbPolarsErr::Other(
117
120
  "A list output type is invalid. Do you mean to create polars List Series?\
118
121
  Then return a Series object."
119
122
  .into(),
120
- ));
123
+ )
124
+ .into());
121
125
  } else {
122
- return Err(RbPolarsErr::other("Could not determine output type".into()));
126
+ return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
123
127
  }
124
128
  }
125
- Err(RbPolarsErr::other("Could not determine output type".into()))
129
+ Err(RbPolarsErr::Other("Could not determine output type".into()).into())
126
130
  }
127
131
 
128
132
  fn apply_iter<T>(
@@ -1,10 +1,70 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::{RbExpr, Wrap};
4
+ use crate::rb_modules::*;
5
+ use crate::{RbExpr, RbSeries, Wrap};
5
6
 
6
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
7
- todo!();
7
+ fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
8
+ let rb_rbseries = match v.funcall("_s", ()) {
9
+ Ok(s) => s,
10
+ // the lambda did not return a series, we try to create a new Ruby Series
11
+ _ => {
12
+ let data = RArray::new();
13
+ data.push(v).unwrap();
14
+ let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
15
+
16
+ match res {
17
+ Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
18
+ Err(_) => {
19
+ polars_bail!(ComputeError:
20
+ "expected a something that could convert to a `Series` but got: {}",
21
+ unsafe { v.classname() }
22
+ )
23
+ }
24
+ }
25
+ }
26
+ };
27
+ // Finally get the actual Series
28
+ Ok(rb_rbseries.series.borrow().clone())
29
+ }
30
+
31
+ pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
32
+ // create a RbSeries struct/object for Ruby
33
+ let rbseries_a = RbSeries::new(a);
34
+ let rbseries_b = RbSeries::new(b);
35
+
36
+ // Wrap this RbSeries object in the Ruby side Series wrapper
37
+ let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
38
+ let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
39
+
40
+ // call the lambda and get a Ruby side Series wrapper
41
+ let result_series_wrapper: Value =
42
+ match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
43
+ Ok(rbobj) => rbobj,
44
+ Err(e) => polars_bail!(
45
+ ComputeError: "custom Ruby function failed: {}", e,
46
+ ),
47
+ };
48
+ let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
49
+ {
50
+ let expr = rbexpr.inner.clone();
51
+ let df = DataFrame::empty();
52
+ let out = df
53
+ .lazy()
54
+ .select([expr])
55
+ .with_predicate_pushdown(false)
56
+ .with_projection_pushdown(false)
57
+ .collect()?;
58
+
59
+ let s = out.select_at_idx(0).unwrap().clone();
60
+ RbSeries::new(s.take_materialized_series())
61
+ } else {
62
+ return Some(to_series(result_series_wrapper, "")).transpose();
63
+ };
64
+
65
+ // Finally get the actual Series
66
+ let binding = rbseries.series.borrow();
67
+ Ok(Some(binding.clone()))
8
68
  }
9
69
 
10
70
  pub fn map_single(
@@ -13,6 +73,7 @@ pub fn map_single(
13
73
  _output_type: Option<Wrap<DataType>>,
14
74
  _agg_list: bool,
15
75
  _is_elementwise: bool,
76
+ _returns_scalar: bool,
16
77
  ) -> RbExpr {
17
78
  todo!();
18
79
  }
@@ -35,7 +35,7 @@ fn iterator_to_struct(
35
35
  av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
36
36
  AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
37
37
  _ => {
38
- return Err(crate::error::ComputeError::new_err(format!(
38
+ return Err(crate::exceptions::ComputeError::new_err(format!(
39
39
  "expected struct got {first_value:?}",
40
40
  )))
41
41
  }
@@ -70,7 +70,7 @@ fn iterator_to_struct(
70
70
  Some(dict) => {
71
71
  let dict = RHash::try_convert(dict)?;
72
72
  if dict.len() != struct_width {
73
- return Err(crate::error::ComputeError::new_err(
73
+ return Err(crate::exceptions::ComputeError::new_err(
74
74
  format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
75
75
  ));
76
76
  }
@@ -93,10 +93,12 @@ fn iterator_to_struct(
93
93
  .collect::<Vec<_>>()
94
94
  });
95
95
 
96
- Ok(StructChunked::from_series(name, &fields)
97
- .unwrap()
98
- .into_series()
99
- .into())
96
+ Ok(
97
+ StructChunked::from_series(name, fields[0].len(), fields.iter())
98
+ .unwrap()
99
+ .into_series()
100
+ .into(),
101
+ )
100
102
  }
101
103
 
102
104
  fn iterator_to_primitive<T>(
@@ -232,8 +234,7 @@ fn iterator_to_list(
232
234
  name: PlSmallStr,
233
235
  capacity: usize,
234
236
  ) -> RbResult<ListChunked> {
235
- let mut builder =
236
- get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
237
+ let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
237
238
  for _ in 0..init_null_count {
238
239
  builder.append_null()
239
240
  }
@@ -11,7 +11,7 @@ use polars_core::prelude::AnyValue;
11
11
  use crate::prelude::ObjectValue;
12
12
  use crate::Wrap;
13
13
 
14
- pub(crate) fn register_object_builder() {
14
+ pub(crate) fn register_startup_deps() {
15
15
  if !registry::is_object_builder_registered() {
16
16
  let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
17
17
  Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
@@ -98,11 +98,7 @@ impl RbSeries {
98
98
  .into_value())
99
99
  }
100
100
 
101
- pub fn quantile(
102
- &self,
103
- quantile: f64,
104
- interpolation: Wrap<QuantileInterpolOptions>,
105
- ) -> RbResult<Value> {
101
+ pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> RbResult<Value> {
106
102
  let bind = self
107
103
  .series
108
104
  .borrow()
@@ -2,33 +2,33 @@ use crate::{RbPolarsErr, RbResult, RbSeries};
2
2
 
3
3
  impl RbSeries {
4
4
  pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
5
- (&*self.series.borrow() + &*other.series.borrow())
5
+ Ok((&*self.series.borrow() + &*other.series.borrow())
6
6
  .map(Into::into)
7
- .map_err(RbPolarsErr::from)
7
+ .map_err(RbPolarsErr::from)?)
8
8
  }
9
9
 
10
10
  pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
11
- (&*self.series.borrow() - &*other.series.borrow())
11
+ Ok((&*self.series.borrow() - &*other.series.borrow())
12
12
  .map(Into::into)
13
- .map_err(RbPolarsErr::from)
13
+ .map_err(RbPolarsErr::from)?)
14
14
  }
15
15
 
16
16
  pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
17
- (&*self.series.borrow() * &*other.series.borrow())
17
+ Ok((&*self.series.borrow() * &*other.series.borrow())
18
18
  .map(Into::into)
19
- .map_err(RbPolarsErr::from)
19
+ .map_err(RbPolarsErr::from)?)
20
20
  }
21
21
 
22
22
  pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
23
- (&*self.series.borrow() / &*other.series.borrow())
23
+ Ok((&*self.series.borrow() / &*other.series.borrow())
24
24
  .map(Into::into)
25
- .map_err(RbPolarsErr::from)
25
+ .map_err(RbPolarsErr::from)?)
26
26
  }
27
27
 
28
28
  pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
29
- (&*self.series.borrow() % &*other.series.borrow())
29
+ Ok((&*self.series.borrow() % &*other.series.borrow())
30
30
  .map(Into::into)
31
- .map_err(RbPolarsErr::from)
31
+ .map_err(RbPolarsErr::from)?)
32
32
  }
33
33
  }
34
34
 
@@ -4,7 +4,7 @@ use polars_core::prelude::*;
4
4
  use crate::any_value::rb_object_to_any_value;
5
5
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
6
6
  use crate::prelude::ObjectValue;
7
- use crate::series::to_series_collection;
7
+ use crate::series::to_series;
8
8
  use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
9
9
 
10
10
  impl RbSeries {
@@ -185,7 +185,7 @@ impl RbSeries {
185
185
  }
186
186
 
187
187
  pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
188
- let series_vec = to_series_collection(val)?;
188
+ let series_vec = to_series(val)?;
189
189
  Ok(Series::new(name.into(), &series_vec).into())
190
190
  }
191
191
 
@@ -31,7 +31,7 @@ impl RbSeries {
31
31
  for i in 0..series.len() {
32
32
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
33
33
  match obj {
34
- Some(val) => v.push(val.to_object()).unwrap(),
34
+ Some(val) => v.push(val.to_value()).unwrap(),
35
35
  None => v.push(qnil()).unwrap(),
36
36
  };
37
37
  }