polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
1
+ use magnus::Value;
2
+ use polars::lazy::frame::LazyFrame;
3
+ use polars::prelude::*;
4
+ use std::io::Read;
5
+
6
+ use crate::file::get_file_like;
7
+ use crate::{RbLazyFrame, RbResult, RbValueError};
8
+
9
+ impl RbLazyFrame {
10
+ // TODO change to serialize_json
11
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
12
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
13
+ // so don't bother with files.
14
+ let mut json = String::new();
15
+ let _ = get_file_like(rb_f, false)?
16
+ .read_to_string(&mut json)
17
+ .unwrap();
18
+
19
+ // Safety
20
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
21
+ // so we actually don't have a lifetime at all when serializing.
22
+
23
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
24
+ // in this scope
25
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
+
27
+ let lp = serde_json::from_str::<DslPlan>(json)
28
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
29
+ Ok(LazyFrame::from(lp).into())
30
+ }
31
+ }
@@ -3,6 +3,7 @@ mod batched_csv;
3
3
  mod conversion;
4
4
  mod dataframe;
5
5
  mod error;
6
+ mod exceptions;
6
7
  mod expr;
7
8
  mod file;
8
9
  mod functions;
@@ -21,21 +22,31 @@ mod utils;
21
22
  use batched_csv::RbBatchedCsv;
22
23
  use conversion::*;
23
24
  use dataframe::RbDataFrame;
24
- use error::{RbPolarsErr, RbTypeError, RbValueError};
25
+ use error::RbPolarsErr;
26
+ use exceptions::{RbTypeError, RbValueError};
25
27
  use expr::rb_exprs_to_exprs;
26
28
  use expr::RbExpr;
27
29
  use functions::string_cache::RbStringCacheHolder;
28
30
  use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
31
+ use interop::arrow::to_ruby::RbArrowArrayStream;
29
32
  use lazyframe::RbLazyFrame;
30
33
  use lazygroupby::RbLazyGroupBy;
31
- use magnus::{define_module, function, method, prelude::*, Error, Ruby};
34
+ use magnus::{define_module, function, method, prelude::*, Ruby};
32
35
  use series::RbSeries;
33
36
  use sql::RbSQLContext;
34
37
 
35
- type RbResult<T> = Result<T, Error>;
38
+ use magnus::error::Result as RbResult;
39
+ use magnus::Error as RbErr;
40
+
41
+ // TODO move
42
+ fn re_escape(pattern: String) -> String {
43
+ regex::escape(&pattern)
44
+ }
36
45
 
37
46
  #[magnus::init]
38
47
  fn init(ruby: &Ruby) -> RbResult<()> {
48
+ crate::on_startup::register_startup_deps();
49
+
39
50
  let module = define_module("Polars")?;
40
51
 
41
52
  let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
45
56
  let class = module.define_class("RbDataFrame", ruby.class_object())?;
46
57
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
47
58
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
48
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
49
59
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
50
60
  class.define_singleton_method(
51
61
  "read_ipc_stream",
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
58
68
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
59
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
60
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
61
- class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
71
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
62
72
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
63
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
64
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
65
- class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
75
+ class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
66
76
  class.define_method(
67
77
  "write_ipc_stream",
68
- method!(RbDataFrame::write_ipc_stream, 2),
78
+ method!(RbDataFrame::write_ipc_stream, 3),
69
79
  )?;
70
80
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
81
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
82
+ class.define_method(
83
+ "arrow_c_stream",
84
+ method!(RbDataFrame::__arrow_c_stream__, 0),
85
+ )?;
72
86
  class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
73
87
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
74
88
  class.define_method("add", method!(RbDataFrame::add, 1))?;
@@ -129,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
129
143
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
130
144
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
131
145
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
132
- class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
133
- class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
134
- class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
135
- class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
136
146
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
137
147
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
138
148
  class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
@@ -143,6 +153,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
153
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
144
154
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
145
155
  class.define_method("clear", method!(RbDataFrame::clear, 0))?;
156
+ class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
146
157
 
147
158
  let class = module.define_class("RbExpr", ruby.class_object())?;
148
159
  class.define_method("+", method!(RbExpr::add, 1))?;
@@ -286,6 +297,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
286
297
  class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
287
298
  class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
288
299
  class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
300
+ // class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
289
301
  class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
290
302
  class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
291
303
  class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
@@ -412,7 +424,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
412
424
  class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
413
425
  class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
414
426
  class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
415
- class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
427
+ class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
416
428
  class.define_method("dot", method!(RbExpr::dot, 1))?;
417
429
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
418
430
  class.define_method("mode", method!(RbExpr::mode, 0))?;
@@ -552,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
552
564
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
553
565
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
554
566
  class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
555
- class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 3))?;
567
+ class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
556
568
  class.define_singleton_method(
557
569
  "spearman_rank_corr",
558
- function!(functions::lazy::spearman_rank_corr, 4),
570
+ function!(functions::lazy::spearman_rank_corr, 3),
559
571
  )?;
560
572
  class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
561
573
  class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
@@ -567,6 +579,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
567
579
  class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
568
580
  class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
569
581
  class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
582
+ class.define_singleton_method(
583
+ "business_day_count",
584
+ function!(functions::business::business_day_count, 4),
585
+ )?;
570
586
  class.define_singleton_method(
571
587
  "all_horizontal",
572
588
  function!(functions::aggregation::all_horizontal, 1),
@@ -585,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
585
601
  )?;
586
602
  class.define_singleton_method(
587
603
  "sum_horizontal",
588
- function!(functions::aggregation::sum_horizontal, 1),
604
+ function!(functions::aggregation::sum_horizontal, 2),
589
605
  )?;
590
606
  class.define_singleton_method(
591
607
  "mean_horizontal",
592
- function!(functions::aggregation::mean_horizontal, 1),
608
+ function!(functions::aggregation::mean_horizontal, 2),
593
609
  )?;
594
610
  class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
595
611
  class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
@@ -640,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
640
656
  function!(functions::meta::get_index_type, 0),
641
657
  )?;
642
658
  class.define_singleton_method(
643
- "threadpool_size",
644
- function!(functions::meta::threadpool_size, 0),
659
+ "thread_pool_size",
660
+ function!(functions::meta::thread_pool_size, 0),
645
661
  )?;
646
662
  class.define_singleton_method(
647
663
  "enable_string_cache",
@@ -699,19 +715,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
699
715
  "set_random_seed",
700
716
  function!(functions::random::set_random_seed, 1),
701
717
  )?;
718
+ class.define_singleton_method("re_escape", function!(re_escape, 1))?;
702
719
 
703
720
  let class = module.define_class("RbLazyFrame", ruby.class_object())?;
704
721
  class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
705
722
  class.define_singleton_method(
706
723
  "new_from_ndjson",
707
- function!(RbLazyFrame::new_from_ndjson, 7),
724
+ function!(RbLazyFrame::new_from_ndjson, 8),
708
725
  )?;
709
726
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
710
727
  class.define_singleton_method(
711
728
  "new_from_parquet",
712
- function!(RbLazyFrame::new_from_parquet, 14),
729
+ function!(RbLazyFrame::new_from_parquet, -1),
713
730
  )?;
714
- class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
731
+ class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
715
732
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
716
733
  class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
717
734
  class.define_method(
@@ -726,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
726
743
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
727
744
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
728
745
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
729
- class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
730
- class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 3))?;
746
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
747
+ class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
731
748
  class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
732
- class.define_method("sink_json", method!(RbLazyFrame::sink_json, 2))?;
749
+ class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
733
750
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
734
751
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
735
752
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -741,15 +758,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
741
758
  method!(RbLazyFrame::group_by_dynamic, 9),
742
759
  )?;
743
760
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
744
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
745
- class.define_method("join", method!(RbLazyFrame::join, 8))?;
761
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
762
+ class.define_method("join", method!(RbLazyFrame::join, 10))?;
746
763
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
747
764
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
748
765
  class.define_method(
749
766
  "with_columns_seq",
750
767
  method!(RbLazyFrame::with_columns_seq, 1),
751
768
  )?;
752
- class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
769
+ class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
753
770
  class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
754
771
  class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
755
772
  class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
@@ -770,6 +787,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
770
787
  class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
771
788
  class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
772
789
  class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
790
+ class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
773
791
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
774
792
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
775
793
  class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
@@ -810,7 +828,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
810
828
  class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
811
829
  class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
812
830
  class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
831
+ class.define_singleton_method(
832
+ "from_arrow_c_stream",
833
+ function!(RbSeries::from_arrow_c_stream, 1),
834
+ )?;
813
835
  class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
836
+ class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
814
837
  class.define_method(
815
838
  "is_sorted_flag",
816
839
  method!(RbSeries::is_sorted_ascending_flag, 0),
@@ -1081,5 +1104,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1081
1104
  let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
1082
1105
  class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
1083
1106
 
1107
+ // arrow array stream
1108
+ let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1109
+ class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1110
+
1084
1111
  Ok(())
1085
1112
  }
@@ -7,13 +7,16 @@ use super::*;
7
7
  use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
8
8
 
9
9
  fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
10
- df.get_columns().iter().map(|s| s.iter()).collect()
10
+ df.get_columns()
11
+ .iter()
12
+ .map(|s| s.as_materialized_series().iter())
13
+ .collect()
11
14
  }
12
15
 
13
16
  fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
14
17
  df.get_columns()
15
18
  .iter()
16
- .map(|s| s.iter().skip(skip))
19
+ .map(|s| s.as_materialized_series().iter().skip(skip))
17
20
  .collect()
18
21
  }
19
22
 
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
113
116
  true,
114
117
  ));
115
118
  } else if out.is_kind_of(class::array()) {
116
- return Err(RbPolarsErr::other(
119
+ return Err(RbPolarsErr::Other(
117
120
  "A list output type is invalid. Do you mean to create polars List Series?\
118
121
  Then return a Series object."
119
122
  .into(),
120
- ));
123
+ )
124
+ .into());
121
125
  } else {
122
- return Err(RbPolarsErr::other("Could not determine output type".into()));
126
+ return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
123
127
  }
124
128
  }
125
- Err(RbPolarsErr::other("Could not determine output type".into()))
129
+ Err(RbPolarsErr::Other("Could not determine output type".into()).into())
126
130
  }
127
131
 
128
132
  fn apply_iter<T>(
@@ -1,10 +1,70 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::{RbExpr, Wrap};
4
+ use crate::rb_modules::*;
5
+ use crate::{RbExpr, RbSeries, Wrap};
5
6
 
6
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
7
- todo!();
7
+ fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
8
+ let rb_rbseries = match v.funcall("_s", ()) {
9
+ Ok(s) => s,
10
+ // the lambda did not return a series, we try to create a new Ruby Series
11
+ _ => {
12
+ let data = RArray::new();
13
+ data.push(v).unwrap();
14
+ let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
15
+
16
+ match res {
17
+ Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
18
+ Err(_) => {
19
+ polars_bail!(ComputeError:
20
+ "expected a something that could convert to a `Series` but got: {}",
21
+ unsafe { v.classname() }
22
+ )
23
+ }
24
+ }
25
+ }
26
+ };
27
+ // Finally get the actual Series
28
+ Ok(rb_rbseries.series.borrow().clone())
29
+ }
30
+
31
+ pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
32
+ // create a RbSeries struct/object for Ruby
33
+ let rbseries_a = RbSeries::new(a);
34
+ let rbseries_b = RbSeries::new(b);
35
+
36
+ // Wrap this RbSeries object in the Ruby side Series wrapper
37
+ let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
38
+ let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
39
+
40
+ // call the lambda and get a Ruby side Series wrapper
41
+ let result_series_wrapper: Value =
42
+ match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
43
+ Ok(rbobj) => rbobj,
44
+ Err(e) => polars_bail!(
45
+ ComputeError: "custom Ruby function failed: {}", e,
46
+ ),
47
+ };
48
+ let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
49
+ {
50
+ let expr = rbexpr.inner.clone();
51
+ let df = DataFrame::empty();
52
+ let out = df
53
+ .lazy()
54
+ .select([expr])
55
+ .with_predicate_pushdown(false)
56
+ .with_projection_pushdown(false)
57
+ .collect()?;
58
+
59
+ let s = out.select_at_idx(0).unwrap().clone();
60
+ RbSeries::new(s.take_materialized_series())
61
+ } else {
62
+ return Some(to_series(result_series_wrapper, "")).transpose();
63
+ };
64
+
65
+ // Finally get the actual Series
66
+ let binding = rbseries.series.borrow();
67
+ Ok(Some(binding.clone()))
8
68
  }
9
69
 
10
70
  pub fn map_single(
@@ -13,6 +73,7 @@ pub fn map_single(
13
73
  _output_type: Option<Wrap<DataType>>,
14
74
  _agg_list: bool,
15
75
  _is_elementwise: bool,
76
+ _returns_scalar: bool,
16
77
  ) -> RbExpr {
17
78
  todo!();
18
79
  }
@@ -35,7 +35,7 @@ fn iterator_to_struct(
35
35
  av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
36
36
  AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
37
37
  _ => {
38
- return Err(crate::error::ComputeError::new_err(format!(
38
+ return Err(crate::exceptions::ComputeError::new_err(format!(
39
39
  "expected struct got {first_value:?}",
40
40
  )))
41
41
  }
@@ -70,7 +70,7 @@ fn iterator_to_struct(
70
70
  Some(dict) => {
71
71
  let dict = RHash::try_convert(dict)?;
72
72
  if dict.len() != struct_width {
73
- return Err(crate::error::ComputeError::new_err(
73
+ return Err(crate::exceptions::ComputeError::new_err(
74
74
  format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
75
75
  ));
76
76
  }
@@ -93,10 +93,12 @@ fn iterator_to_struct(
93
93
  .collect::<Vec<_>>()
94
94
  });
95
95
 
96
- Ok(StructChunked::from_series(name, &fields)
97
- .unwrap()
98
- .into_series()
99
- .into())
96
+ Ok(
97
+ StructChunked::from_series(name, fields[0].len(), fields.iter())
98
+ .unwrap()
99
+ .into_series()
100
+ .into(),
101
+ )
100
102
  }
101
103
 
102
104
  fn iterator_to_primitive<T>(
@@ -232,8 +234,7 @@ fn iterator_to_list(
232
234
  name: PlSmallStr,
233
235
  capacity: usize,
234
236
  ) -> RbResult<ListChunked> {
235
- let mut builder =
236
- get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
237
+ let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
237
238
  for _ in 0..init_null_count {
238
239
  builder.append_null()
239
240
  }
@@ -11,7 +11,7 @@ use polars_core::prelude::AnyValue;
11
11
  use crate::prelude::ObjectValue;
12
12
  use crate::Wrap;
13
13
 
14
- pub(crate) fn register_object_builder() {
14
+ pub(crate) fn register_startup_deps() {
15
15
  if !registry::is_object_builder_registered() {
16
16
  let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
17
17
  Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
@@ -98,11 +98,7 @@ impl RbSeries {
98
98
  .into_value())
99
99
  }
100
100
 
101
- pub fn quantile(
102
- &self,
103
- quantile: f64,
104
- interpolation: Wrap<QuantileInterpolOptions>,
105
- ) -> RbResult<Value> {
101
+ pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> RbResult<Value> {
106
102
  let bind = self
107
103
  .series
108
104
  .borrow()
@@ -2,33 +2,33 @@ use crate::{RbPolarsErr, RbResult, RbSeries};
2
2
 
3
3
  impl RbSeries {
4
4
  pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
5
- (&*self.series.borrow() + &*other.series.borrow())
5
+ Ok((&*self.series.borrow() + &*other.series.borrow())
6
6
  .map(Into::into)
7
- .map_err(RbPolarsErr::from)
7
+ .map_err(RbPolarsErr::from)?)
8
8
  }
9
9
 
10
10
  pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
11
- (&*self.series.borrow() - &*other.series.borrow())
11
+ Ok((&*self.series.borrow() - &*other.series.borrow())
12
12
  .map(Into::into)
13
- .map_err(RbPolarsErr::from)
13
+ .map_err(RbPolarsErr::from)?)
14
14
  }
15
15
 
16
16
  pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
17
- (&*self.series.borrow() * &*other.series.borrow())
17
+ Ok((&*self.series.borrow() * &*other.series.borrow())
18
18
  .map(Into::into)
19
- .map_err(RbPolarsErr::from)
19
+ .map_err(RbPolarsErr::from)?)
20
20
  }
21
21
 
22
22
  pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
23
- (&*self.series.borrow() / &*other.series.borrow())
23
+ Ok((&*self.series.borrow() / &*other.series.borrow())
24
24
  .map(Into::into)
25
- .map_err(RbPolarsErr::from)
25
+ .map_err(RbPolarsErr::from)?)
26
26
  }
27
27
 
28
28
  pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
29
- (&*self.series.borrow() % &*other.series.borrow())
29
+ Ok((&*self.series.borrow() % &*other.series.borrow())
30
30
  .map(Into::into)
31
- .map_err(RbPolarsErr::from)
31
+ .map_err(RbPolarsErr::from)?)
32
32
  }
33
33
  }
34
34
 
@@ -4,7 +4,7 @@ use polars_core::prelude::*;
4
4
  use crate::any_value::rb_object_to_any_value;
5
5
  use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
6
6
  use crate::prelude::ObjectValue;
7
- use crate::series::to_series_collection;
7
+ use crate::series::to_series;
8
8
  use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
9
9
 
10
10
  impl RbSeries {
@@ -185,7 +185,7 @@ impl RbSeries {
185
185
  }
186
186
 
187
187
  pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
188
- let series_vec = to_series_collection(val)?;
188
+ let series_vec = to_series(val)?;
189
189
  Ok(Series::new(name.into(), &series_vec).into())
190
190
  }
191
191
 
@@ -31,7 +31,7 @@ impl RbSeries {
31
31
  for i in 0..series.len() {
32
32
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
33
33
  match obj {
34
- Some(val) => v.push(val.to_object()).unwrap(),
34
+ Some(val) => v.push(val.to_value()).unwrap(),
35
35
  None => v.push(qnil()).unwrap(),
36
36
  };
37
37
  }