polars-df 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
1
+ use magnus::Value;
2
+ use polars::lazy::frame::LazyFrame;
3
+ use polars::prelude::*;
4
+ use std::io::Read;
5
+
6
+ use crate::file::get_file_like;
7
+ use crate::{RbLazyFrame, RbResult, RbValueError};
8
+
9
+ impl RbLazyFrame {
10
+ // TODO change to serialize_json
11
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
12
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
13
+ // so don't bother with files.
14
+ let mut json = String::new();
15
+ let _ = get_file_like(rb_f, false)?
16
+ .read_to_string(&mut json)
17
+ .unwrap();
18
+
19
+ // Safety
20
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
21
+ // so we actually don't have a lifetime at all when serializing.
22
+
23
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
24
+ // in this scope
25
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
+
27
+ let lp = serde_json::from_str::<DslPlan>(json)
28
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
29
+ Ok(LazyFrame::from(lp).into())
30
+ }
31
+ }
@@ -3,6 +3,7 @@ mod batched_csv;
3
3
  mod conversion;
4
4
  mod dataframe;
5
5
  mod error;
6
+ mod exceptions;
6
7
  mod expr;
7
8
  mod file;
8
9
  mod functions;
@@ -21,21 +22,31 @@ mod utils;
21
22
  use batched_csv::RbBatchedCsv;
22
23
  use conversion::*;
23
24
  use dataframe::RbDataFrame;
24
- use error::{RbPolarsErr, RbTypeError, RbValueError};
25
+ use error::RbPolarsErr;
26
+ use exceptions::{RbTypeError, RbValueError};
25
27
  use expr::rb_exprs_to_exprs;
26
28
  use expr::RbExpr;
27
29
  use functions::string_cache::RbStringCacheHolder;
28
30
  use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
31
+ use interop::arrow::to_ruby::RbArrowArrayStream;
29
32
  use lazyframe::RbLazyFrame;
30
33
  use lazygroupby::RbLazyGroupBy;
31
- use magnus::{define_module, function, method, prelude::*, Error, Ruby};
34
+ use magnus::{define_module, function, method, prelude::*, Ruby};
32
35
  use series::RbSeries;
33
36
  use sql::RbSQLContext;
34
37
 
35
- type RbResult<T> = Result<T, Error>;
38
+ use magnus::error::Result as RbResult;
39
+ use magnus::Error as RbErr;
40
+
41
+ // TODO move
42
+ fn re_escape(pattern: String) -> String {
43
+ regex::escape(&pattern)
44
+ }
36
45
 
37
46
  #[magnus::init]
38
47
  fn init(ruby: &Ruby) -> RbResult<()> {
48
+ crate::on_startup::register_startup_deps();
49
+
39
50
  let module = define_module("Polars")?;
40
51
 
41
52
  let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
45
56
  let class = module.define_class("RbDataFrame", ruby.class_object())?;
46
57
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
47
58
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
48
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
49
59
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
50
60
  class.define_singleton_method(
51
61
  "read_ipc_stream",
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
58
68
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
59
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
60
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
61
- class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
71
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
62
72
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
63
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
64
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
65
- class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
75
+ class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 3))?;
66
76
  class.define_method(
67
77
  "write_ipc_stream",
68
- method!(RbDataFrame::write_ipc_stream, 2),
78
+ method!(RbDataFrame::write_ipc_stream, 3),
69
79
  )?;
70
80
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
81
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
82
+ class.define_method(
83
+ "arrow_c_stream",
84
+ method!(RbDataFrame::__arrow_c_stream__, 0),
85
+ )?;
72
86
  class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
73
87
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
74
88
  class.define_method("add", method!(RbDataFrame::add, 1))?;
@@ -143,6 +157,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
157
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
144
158
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
145
159
  class.define_method("clear", method!(RbDataFrame::clear, 0))?;
160
+ class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
146
161
 
147
162
  let class = module.define_class("RbExpr", ruby.class_object())?;
148
163
  class.define_method("+", method!(RbExpr::add, 1))?;
@@ -286,6 +301,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
286
301
  class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
287
302
  class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
288
303
  class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
304
+ // class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
289
305
  class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
290
306
  class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
291
307
  class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
@@ -412,7 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
412
428
  class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
413
429
  class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
414
430
  class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
415
- class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
431
+ class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
416
432
  class.define_method("dot", method!(RbExpr::dot, 1))?;
417
433
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
418
434
  class.define_method("mode", method!(RbExpr::mode, 0))?;
@@ -567,6 +583,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
567
583
  class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
568
584
  class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
569
585
  class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
586
+ class.define_singleton_method(
587
+ "business_day_count",
588
+ function!(functions::business::business_day_count, 4),
589
+ )?;
570
590
  class.define_singleton_method(
571
591
  "all_horizontal",
572
592
  function!(functions::aggregation::all_horizontal, 1),
@@ -699,17 +719,18 @@ fn init(ruby: &Ruby) -> RbResult<()> {
699
719
  "set_random_seed",
700
720
  function!(functions::random::set_random_seed, 1),
701
721
  )?;
722
+ class.define_singleton_method("re_escape", function!(re_escape, 1))?;
702
723
 
703
724
  let class = module.define_class("RbLazyFrame", ruby.class_object())?;
704
725
  class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
705
726
  class.define_singleton_method(
706
727
  "new_from_ndjson",
707
- function!(RbLazyFrame::new_from_ndjson, 7),
728
+ function!(RbLazyFrame::new_from_ndjson, 8),
708
729
  )?;
709
730
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
710
731
  class.define_singleton_method(
711
732
  "new_from_parquet",
712
- function!(RbLazyFrame::new_from_parquet, 14),
733
+ function!(RbLazyFrame::new_from_parquet, -1),
713
734
  )?;
714
735
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
715
736
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -741,15 +762,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
741
762
  method!(RbLazyFrame::group_by_dynamic, 9),
742
763
  )?;
743
764
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
744
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
745
- class.define_method("join", method!(RbLazyFrame::join, 8))?;
765
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
766
+ class.define_method("join", method!(RbLazyFrame::join, 10))?;
746
767
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
747
768
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
748
769
  class.define_method(
749
770
  "with_columns_seq",
750
771
  method!(RbLazyFrame::with_columns_seq, 1),
751
772
  )?;
752
- class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
773
+ class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
753
774
  class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
754
775
  class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
755
776
  class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
@@ -770,6 +791,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
770
791
  class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
771
792
  class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
772
793
  class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
794
+ class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
773
795
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
774
796
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
775
797
  class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
@@ -810,7 +832,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
810
832
  class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
811
833
  class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
812
834
  class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
835
+ class.define_singleton_method(
836
+ "from_arrow_c_stream",
837
+ function!(RbSeries::from_arrow_c_stream, 1),
838
+ )?;
813
839
  class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
840
+ class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
814
841
  class.define_method(
815
842
  "is_sorted_flag",
816
843
  method!(RbSeries::is_sorted_ascending_flag, 0),
@@ -1081,5 +1108,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1081
1108
  let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
1082
1109
  class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
1083
1110
 
1111
+ // arrow array stream
1112
+ let class = module.define_class("RbArrowArrayStream", ruby.class_object())?;
1113
+ class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1114
+
1084
1115
  Ok(())
1085
1116
  }
@@ -7,13 +7,16 @@ use super::*;
7
7
  use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
8
8
 
9
9
  fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
10
- df.get_columns().iter().map(|s| s.iter()).collect()
10
+ df.get_columns()
11
+ .iter()
12
+ .map(|s| s.as_materialized_series().iter())
13
+ .collect()
11
14
  }
12
15
 
13
16
  fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
14
17
  df.get_columns()
15
18
  .iter()
16
- .map(|s| s.iter().skip(skip))
19
+ .map(|s| s.as_materialized_series().iter().skip(skip))
17
20
  .collect()
18
21
  }
19
22
 
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
113
116
  true,
114
117
  ));
115
118
  } else if out.is_kind_of(class::array()) {
116
- return Err(RbPolarsErr::other(
119
+ return Err(RbPolarsErr::Other(
117
120
  "A list output type is invalid. Do you mean to create polars List Series?\
118
121
  Then return a Series object."
119
122
  .into(),
120
- ));
123
+ )
124
+ .into());
121
125
  } else {
122
- return Err(RbPolarsErr::other("Could not determine output type".into()));
126
+ return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
123
127
  }
124
128
  }
125
- Err(RbPolarsErr::other("Could not determine output type".into()))
129
+ Err(RbPolarsErr::Other("Could not determine output type".into()).into())
126
130
  }
127
131
 
128
132
  fn apply_iter<T>(
@@ -158,10 +162,16 @@ where
158
162
  {
159
163
  let skip = usize::from(first_value.is_some());
160
164
  if init_null_count == df.height() {
161
- ChunkedArray::full_null("apply", df.height())
165
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
162
166
  } else {
163
167
  let iter = apply_iter(df, lambda, init_null_count, skip);
164
- iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
168
+ iterator_to_primitive(
169
+ iter,
170
+ init_null_count,
171
+ first_value,
172
+ PlSmallStr::from_static("map"),
173
+ df.height(),
174
+ )
165
175
  }
166
176
  }
167
177
 
@@ -174,10 +184,16 @@ pub fn apply_lambda_with_bool_out_type(
174
184
  ) -> ChunkedArray<BooleanType> {
175
185
  let skip = usize::from(first_value.is_some());
176
186
  if init_null_count == df.height() {
177
- ChunkedArray::full_null("apply", df.height())
187
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
178
188
  } else {
179
189
  let iter = apply_iter(df, lambda, init_null_count, skip);
180
- iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
190
+ iterator_to_bool(
191
+ iter,
192
+ init_null_count,
193
+ first_value,
194
+ PlSmallStr::from_static("map"),
195
+ df.height(),
196
+ )
181
197
  }
182
198
  }
183
199
 
@@ -190,10 +206,16 @@ pub fn apply_lambda_with_utf8_out_type(
190
206
  ) -> StringChunked {
191
207
  let skip = usize::from(first_value.is_some());
192
208
  if init_null_count == df.height() {
193
- ChunkedArray::full_null("apply", df.height())
209
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
194
210
  } else {
195
211
  let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
196
- iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
212
+ iterator_to_utf8(
213
+ iter,
214
+ init_null_count,
215
+ first_value,
216
+ PlSmallStr::from_static("map"),
217
+ df.height(),
218
+ )
197
219
  }
198
220
  }
199
221
 
@@ -207,7 +229,10 @@ pub fn apply_lambda_with_list_out_type(
207
229
  ) -> RbResult<ListChunked> {
208
230
  let skip = usize::from(first_value.is_some());
209
231
  if init_null_count == df.height() {
210
- Ok(ChunkedArray::full_null("apply", df.height()))
232
+ Ok(ChunkedArray::full_null(
233
+ PlSmallStr::from_static("map"),
234
+ df.height(),
235
+ ))
211
236
  } else {
212
237
  let mut iters = get_iters_skip(df, init_null_count + skip);
213
238
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
@@ -229,7 +254,14 @@ pub fn apply_lambda_with_list_out_type(
229
254
  Err(e) => panic!("ruby function failed {}", e),
230
255
  }
231
256
  });
232
- iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
257
+ iterator_to_list(
258
+ dt,
259
+ iter,
260
+ init_null_count,
261
+ first_value,
262
+ PlSmallStr::from_static("map"),
263
+ df.height(),
264
+ )
233
265
  }
234
266
  }
235
267
 
@@ -1,10 +1,70 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::{RbExpr, Wrap};
4
+ use crate::rb_modules::*;
5
+ use crate::{RbExpr, RbSeries, Wrap};
5
6
 
6
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
7
- todo!();
7
+ fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
8
+ let rb_rbseries = match v.funcall("_s", ()) {
9
+ Ok(s) => s,
10
+ // the lambda did not return a series, we try to create a new Ruby Series
11
+ _ => {
12
+ let data = RArray::new();
13
+ data.push(v).unwrap();
14
+ let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
15
+
16
+ match res {
17
+ Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
18
+ Err(_) => {
19
+ polars_bail!(ComputeError:
20
+ "expected a something that could convert to a `Series` but got: {}",
21
+ unsafe { v.classname() }
22
+ )
23
+ }
24
+ }
25
+ }
26
+ };
27
+ // Finally get the actual Series
28
+ Ok(rb_rbseries.series.borrow().clone())
29
+ }
30
+
31
+ pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
32
+ // create a RbSeries struct/object for Ruby
33
+ let rbseries_a = RbSeries::new(a);
34
+ let rbseries_b = RbSeries::new(b);
35
+
36
+ // Wrap this RbSeries object in the Ruby side Series wrapper
37
+ let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
38
+ let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
39
+
40
+ // call the lambda and get a Ruby side Series wrapper
41
+ let result_series_wrapper: Value =
42
+ match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
43
+ Ok(rbobj) => rbobj,
44
+ Err(e) => polars_bail!(
45
+ ComputeError: "custom Ruby function failed: {}", e,
46
+ ),
47
+ };
48
+ let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
49
+ {
50
+ let expr = rbexpr.inner.clone();
51
+ let df = DataFrame::empty();
52
+ let out = df
53
+ .lazy()
54
+ .select([expr])
55
+ .with_predicate_pushdown(false)
56
+ .with_projection_pushdown(false)
57
+ .collect()?;
58
+
59
+ let s = out.select_at_idx(0).unwrap().clone();
60
+ RbSeries::new(s.take_materialized_series())
61
+ } else {
62
+ return Some(to_series(result_series_wrapper, "")).transpose();
63
+ };
64
+
65
+ // Finally get the actual Series
66
+ let binding = rbseries.series.borrow();
67
+ Ok(Some(binding.clone()))
8
68
  }
9
69
 
10
70
  pub fn map_single(
@@ -13,6 +73,7 @@ pub fn map_single(
13
73
  _output_type: Option<Wrap<DataType>>,
14
74
  _agg_list: bool,
15
75
  _is_elementwise: bool,
76
+ _returns_scalar: bool,
16
77
  ) -> RbExpr {
17
78
  todo!();
18
79
  }
@@ -28,14 +28,14 @@ fn iterator_to_struct(
28
28
  it: impl Iterator<Item = Option<Value>>,
29
29
  init_null_count: usize,
30
30
  first_value: AnyValue,
31
- name: &str,
31
+ name: PlSmallStr,
32
32
  capacity: usize,
33
33
  ) -> RbResult<RbSeries> {
34
34
  let (vals, flds) = match &first_value {
35
35
  av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
36
36
  AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
37
37
  _ => {
38
- return Err(crate::error::ComputeError::new_err(format!(
38
+ return Err(crate::exceptions::ComputeError::new_err(format!(
39
39
  "expected struct got {first_value:?}",
40
40
  )))
41
41
  }
@@ -70,7 +70,7 @@ fn iterator_to_struct(
70
70
  Some(dict) => {
71
71
  let dict = RHash::try_convert(dict)?;
72
72
  if dict.len() != struct_width {
73
- return Err(crate::error::ComputeError::new_err(
73
+ return Err(crate::exceptions::ComputeError::new_err(
74
74
  format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
75
75
  ));
76
76
  }
@@ -89,21 +89,23 @@ fn iterator_to_struct(
89
89
  items
90
90
  .par_iter()
91
91
  .zip(flds)
92
- .map(|(av, fld)| Series::new(fld.name(), av))
92
+ .map(|(av, fld)| Series::new(fld.name().clone(), av))
93
93
  .collect::<Vec<_>>()
94
94
  });
95
95
 
96
- Ok(StructChunked::from_series(name, &fields)
97
- .unwrap()
98
- .into_series()
99
- .into())
96
+ Ok(
97
+ StructChunked::from_series(name, fields[0].len(), fields.iter())
98
+ .unwrap()
99
+ .into_series()
100
+ .into(),
101
+ )
100
102
  }
101
103
 
102
104
  fn iterator_to_primitive<T>(
103
105
  it: impl Iterator<Item = Option<T::Native>>,
104
106
  init_null_count: usize,
105
107
  first_value: Option<T::Native>,
106
- name: &str,
108
+ name: PlSmallStr,
107
109
  capacity: usize,
108
110
  ) -> ChunkedArray<T>
109
111
  where
@@ -136,7 +138,7 @@ fn iterator_to_bool(
136
138
  it: impl Iterator<Item = Option<bool>>,
137
139
  init_null_count: usize,
138
140
  first_value: Option<bool>,
139
- name: &str,
141
+ name: PlSmallStr,
140
142
  capacity: usize,
141
143
  ) -> ChunkedArray<BooleanType> {
142
144
  // safety: we know the iterators len
@@ -166,7 +168,7 @@ fn iterator_to_object(
166
168
  it: impl Iterator<Item = Option<ObjectValue>>,
167
169
  init_null_count: usize,
168
170
  first_value: Option<ObjectValue>,
169
- name: &str,
171
+ name: PlSmallStr,
170
172
  capacity: usize,
171
173
  ) -> ObjectChunked<ObjectValue> {
172
174
  // safety: we know the iterators len
@@ -196,7 +198,7 @@ fn iterator_to_utf8(
196
198
  it: impl Iterator<Item = Option<String>>,
197
199
  init_null_count: usize,
198
200
  first_value: Option<&str>,
199
- name: &str,
201
+ name: PlSmallStr,
200
202
  capacity: usize,
201
203
  ) -> StringChunked {
202
204
  let first_value = first_value.map(|v| v.to_string());
@@ -229,11 +231,10 @@ fn iterator_to_list(
229
231
  it: impl Iterator<Item = Option<Series>>,
230
232
  init_null_count: usize,
231
233
  first_value: Option<&Series>,
232
- name: &str,
234
+ name: PlSmallStr,
233
235
  capacity: usize,
234
236
  ) -> RbResult<ListChunked> {
235
- let mut builder =
236
- get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
237
+ let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
237
238
  for _ in 0..init_null_count {
238
239
  builder.append_null()
239
240
  }
@@ -246,7 +247,7 @@ fn iterator_to_list(
246
247
  Some(s) => {
247
248
  if s.len() == 0 && s.dtype() != dt {
248
249
  builder
249
- .append_series(&Series::full_null("", 0, dt))
250
+ .append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
250
251
  .unwrap()
251
252
  } else {
252
253
  builder.append_series(&s).map_err(RbPolarsErr::from)?