polars-df 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
1
+ use magnus::Value;
2
+ use polars::lazy::frame::LazyFrame;
3
+ use polars::prelude::*;
4
+ use std::io::Read;
5
+
6
+ use crate::file::get_file_like;
7
+ use crate::{RbLazyFrame, RbResult, RbValueError};
8
+
9
+ impl RbLazyFrame {
10
+ // TODO change to serialize_json
11
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
12
+ // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
13
+ // so don't bother with files.
14
+ let mut json = String::new();
15
+ let _ = get_file_like(rb_f, false)?
16
+ .read_to_string(&mut json)
17
+ .unwrap();
18
+
19
+ // Safety
20
+ // we skipped the serializing/deserializing of the static in lifetime in `DataType`
21
+ // so we actually don't have a lifetime at all when serializing.
22
+
23
+ // &str still has a lifetime. Bit its ok, because we drop it immediately
24
+ // in this scope
25
+ let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
+
27
+ let lp = serde_json::from_str::<DslPlan>(json)
28
+ .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
29
+ Ok(LazyFrame::from(lp).into())
30
+ }
31
+ }
@@ -3,6 +3,7 @@ mod batched_csv;
3
3
  mod conversion;
4
4
  mod dataframe;
5
5
  mod error;
6
+ mod exceptions;
6
7
  mod expr;
7
8
  mod file;
8
9
  mod functions;
@@ -21,21 +22,31 @@ mod utils;
21
22
  use batched_csv::RbBatchedCsv;
22
23
  use conversion::*;
23
24
  use dataframe::RbDataFrame;
24
- use error::{RbPolarsErr, RbTypeError, RbValueError};
25
+ use error::RbPolarsErr;
26
+ use exceptions::{RbTypeError, RbValueError};
25
27
  use expr::rb_exprs_to_exprs;
26
28
  use expr::RbExpr;
27
29
  use functions::string_cache::RbStringCacheHolder;
28
30
  use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
31
+ use interop::arrow::to_ruby::RbArrowArrayStream;
29
32
  use lazyframe::RbLazyFrame;
30
33
  use lazygroupby::RbLazyGroupBy;
31
- use magnus::{define_module, function, method, prelude::*, Error, Ruby};
34
+ use magnus::{define_module, function, method, prelude::*, Ruby};
32
35
  use series::RbSeries;
33
36
  use sql::RbSQLContext;
34
37
 
35
- type RbResult<T> = Result<T, Error>;
38
+ use magnus::error::Result as RbResult;
39
+ use magnus::Error as RbErr;
40
+
41
+ // TODO move
42
+ fn re_escape(pattern: String) -> String {
43
+ regex::escape(&pattern)
44
+ }
36
45
 
37
46
  #[magnus::init]
38
47
  fn init(ruby: &Ruby) -> RbResult<()> {
48
+ crate::on_startup::register_startup_deps();
49
+
39
50
  let module = define_module("Polars")?;
40
51
 
41
52
  let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
45
56
  let class = module.define_class("RbDataFrame", ruby.class_object())?;
46
57
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
47
58
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
48
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
49
59
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
50
60
  class.define_singleton_method(
51
61
  "read_ipc_stream",
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
58
68
  class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
59
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
60
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
61
- class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
71
+ class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
62
72
  class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
63
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
64
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
65
- class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
75
+ class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 3))?;
66
76
  class.define_method(
67
77
  "write_ipc_stream",
68
- method!(RbDataFrame::write_ipc_stream, 2),
78
+ method!(RbDataFrame::write_ipc_stream, 3),
69
79
  )?;
70
80
  class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
71
81
  class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
82
+ class.define_method(
83
+ "arrow_c_stream",
84
+ method!(RbDataFrame::__arrow_c_stream__, 0),
85
+ )?;
72
86
  class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
73
87
  class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
74
88
  class.define_method("add", method!(RbDataFrame::add, 1))?;
@@ -143,6 +157,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
157
  class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
144
158
  class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
145
159
  class.define_method("clear", method!(RbDataFrame::clear, 0))?;
160
+ class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
146
161
 
147
162
  let class = module.define_class("RbExpr", ruby.class_object())?;
148
163
  class.define_method("+", method!(RbExpr::add, 1))?;
@@ -286,6 +301,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
286
301
  class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
287
302
  class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
288
303
  class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
304
+ // class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
289
305
  class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
290
306
  class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
291
307
  class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
@@ -412,7 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
412
428
  class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
413
429
  class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
414
430
  class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
415
- class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
431
+ class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
416
432
  class.define_method("dot", method!(RbExpr::dot, 1))?;
417
433
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
418
434
  class.define_method("mode", method!(RbExpr::mode, 0))?;
@@ -567,6 +583,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
567
583
  class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
568
584
  class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
569
585
  class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
586
+ class.define_singleton_method(
587
+ "business_day_count",
588
+ function!(functions::business::business_day_count, 4),
589
+ )?;
570
590
  class.define_singleton_method(
571
591
  "all_horizontal",
572
592
  function!(functions::aggregation::all_horizontal, 1),
@@ -699,17 +719,18 @@ fn init(ruby: &Ruby) -> RbResult<()> {
699
719
  "set_random_seed",
700
720
  function!(functions::random::set_random_seed, 1),
701
721
  )?;
722
+ class.define_singleton_method("re_escape", function!(re_escape, 1))?;
702
723
 
703
724
  let class = module.define_class("RbLazyFrame", ruby.class_object())?;
704
725
  class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
705
726
  class.define_singleton_method(
706
727
  "new_from_ndjson",
707
- function!(RbLazyFrame::new_from_ndjson, 7),
728
+ function!(RbLazyFrame::new_from_ndjson, 8),
708
729
  )?;
709
730
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
710
731
  class.define_singleton_method(
711
732
  "new_from_parquet",
712
- function!(RbLazyFrame::new_from_parquet, 14),
733
+ function!(RbLazyFrame::new_from_parquet, -1),
713
734
  )?;
714
735
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
715
736
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -741,15 +762,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
741
762
  method!(RbLazyFrame::group_by_dynamic, 9),
742
763
  )?;
743
764
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
744
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
745
- class.define_method("join", method!(RbLazyFrame::join, 8))?;
765
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
766
+ class.define_method("join", method!(RbLazyFrame::join, 10))?;
746
767
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
747
768
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
748
769
  class.define_method(
749
770
  "with_columns_seq",
750
771
  method!(RbLazyFrame::with_columns_seq, 1),
751
772
  )?;
752
- class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
773
+ class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
753
774
  class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
754
775
  class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
755
776
  class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
@@ -770,6 +791,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
770
791
  class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
771
792
  class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
772
793
  class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
794
+ class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
773
795
  class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
774
796
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
775
797
  class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
@@ -810,7 +832,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
810
832
  class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
811
833
  class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
812
834
  class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
835
+ class.define_singleton_method(
836
+ "from_arrow_c_stream",
837
+ function!(RbSeries::from_arrow_c_stream, 1),
838
+ )?;
813
839
  class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
840
+ class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
814
841
  class.define_method(
815
842
  "is_sorted_flag",
816
843
  method!(RbSeries::is_sorted_ascending_flag, 0),
@@ -1081,5 +1108,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1081
1108
  let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
1082
1109
  class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
1083
1110
 
1111
+ // arrow array stream
1112
+ let class = module.define_class("RbArrowArrayStream", ruby.class_object())?;
1113
+ class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1114
+
1084
1115
  Ok(())
1085
1116
  }
@@ -7,13 +7,16 @@ use super::*;
7
7
  use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
8
8
 
9
9
  fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
10
- df.get_columns().iter().map(|s| s.iter()).collect()
10
+ df.get_columns()
11
+ .iter()
12
+ .map(|s| s.as_materialized_series().iter())
13
+ .collect()
11
14
  }
12
15
 
13
16
  fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
14
17
  df.get_columns()
15
18
  .iter()
16
- .map(|s| s.iter().skip(skip))
19
+ .map(|s| s.as_materialized_series().iter().skip(skip))
17
20
  .collect()
18
21
  }
19
22
 
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
113
116
  true,
114
117
  ));
115
118
  } else if out.is_kind_of(class::array()) {
116
- return Err(RbPolarsErr::other(
119
+ return Err(RbPolarsErr::Other(
117
120
  "A list output type is invalid. Do you mean to create polars List Series?\
118
121
  Then return a Series object."
119
122
  .into(),
120
- ));
123
+ )
124
+ .into());
121
125
  } else {
122
- return Err(RbPolarsErr::other("Could not determine output type".into()));
126
+ return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
123
127
  }
124
128
  }
125
- Err(RbPolarsErr::other("Could not determine output type".into()))
129
+ Err(RbPolarsErr::Other("Could not determine output type".into()).into())
126
130
  }
127
131
 
128
132
  fn apply_iter<T>(
@@ -158,10 +162,16 @@ where
158
162
  {
159
163
  let skip = usize::from(first_value.is_some());
160
164
  if init_null_count == df.height() {
161
- ChunkedArray::full_null("apply", df.height())
165
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
162
166
  } else {
163
167
  let iter = apply_iter(df, lambda, init_null_count, skip);
164
- iterator_to_primitive(iter, init_null_count, first_value, "apply", df.height())
168
+ iterator_to_primitive(
169
+ iter,
170
+ init_null_count,
171
+ first_value,
172
+ PlSmallStr::from_static("map"),
173
+ df.height(),
174
+ )
165
175
  }
166
176
  }
167
177
 
@@ -174,10 +184,16 @@ pub fn apply_lambda_with_bool_out_type(
174
184
  ) -> ChunkedArray<BooleanType> {
175
185
  let skip = usize::from(first_value.is_some());
176
186
  if init_null_count == df.height() {
177
- ChunkedArray::full_null("apply", df.height())
187
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
178
188
  } else {
179
189
  let iter = apply_iter(df, lambda, init_null_count, skip);
180
- iterator_to_bool(iter, init_null_count, first_value, "apply", df.height())
190
+ iterator_to_bool(
191
+ iter,
192
+ init_null_count,
193
+ first_value,
194
+ PlSmallStr::from_static("map"),
195
+ df.height(),
196
+ )
181
197
  }
182
198
  }
183
199
 
@@ -190,10 +206,16 @@ pub fn apply_lambda_with_utf8_out_type(
190
206
  ) -> StringChunked {
191
207
  let skip = usize::from(first_value.is_some());
192
208
  if init_null_count == df.height() {
193
- ChunkedArray::full_null("apply", df.height())
209
+ ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
194
210
  } else {
195
211
  let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
196
- iterator_to_utf8(iter, init_null_count, first_value, "apply", df.height())
212
+ iterator_to_utf8(
213
+ iter,
214
+ init_null_count,
215
+ first_value,
216
+ PlSmallStr::from_static("map"),
217
+ df.height(),
218
+ )
197
219
  }
198
220
  }
199
221
 
@@ -207,7 +229,10 @@ pub fn apply_lambda_with_list_out_type(
207
229
  ) -> RbResult<ListChunked> {
208
230
  let skip = usize::from(first_value.is_some());
209
231
  if init_null_count == df.height() {
210
- Ok(ChunkedArray::full_null("apply", df.height()))
232
+ Ok(ChunkedArray::full_null(
233
+ PlSmallStr::from_static("map"),
234
+ df.height(),
235
+ ))
211
236
  } else {
212
237
  let mut iters = get_iters_skip(df, init_null_count + skip);
213
238
  let iter = ((init_null_count + skip)..df.height()).map(|_| {
@@ -229,7 +254,14 @@ pub fn apply_lambda_with_list_out_type(
229
254
  Err(e) => panic!("ruby function failed {}", e),
230
255
  }
231
256
  });
232
- iterator_to_list(dt, iter, init_null_count, first_value, "apply", df.height())
257
+ iterator_to_list(
258
+ dt,
259
+ iter,
260
+ init_null_count,
261
+ first_value,
262
+ PlSmallStr::from_static("map"),
263
+ df.height(),
264
+ )
233
265
  }
234
266
  }
235
267
 
@@ -1,10 +1,70 @@
1
- use magnus::Value;
1
+ use magnus::{prelude::*, RArray, Value};
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::{RbExpr, Wrap};
4
+ use crate::rb_modules::*;
5
+ use crate::{RbExpr, RbSeries, Wrap};
5
6
 
6
- pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
7
- todo!();
7
+ fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
8
+ let rb_rbseries = match v.funcall("_s", ()) {
9
+ Ok(s) => s,
10
+ // the lambda did not return a series, we try to create a new Ruby Series
11
+ _ => {
12
+ let data = RArray::new();
13
+ data.push(v).unwrap();
14
+ let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
15
+
16
+ match res {
17
+ Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
18
+ Err(_) => {
19
+ polars_bail!(ComputeError:
20
+ "expected a something that could convert to a `Series` but got: {}",
21
+ unsafe { v.classname() }
22
+ )
23
+ }
24
+ }
25
+ }
26
+ };
27
+ // Finally get the actual Series
28
+ Ok(rb_rbseries.series.borrow().clone())
29
+ }
30
+
31
+ pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
32
+ // create a RbSeries struct/object for Ruby
33
+ let rbseries_a = RbSeries::new(a);
34
+ let rbseries_b = RbSeries::new(b);
35
+
36
+ // Wrap this RbSeries object in the Ruby side Series wrapper
37
+ let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
38
+ let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
39
+
40
+ // call the lambda and get a Ruby side Series wrapper
41
+ let result_series_wrapper: Value =
42
+ match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
43
+ Ok(rbobj) => rbobj,
44
+ Err(e) => polars_bail!(
45
+ ComputeError: "custom Ruby function failed: {}", e,
46
+ ),
47
+ };
48
+ let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
49
+ {
50
+ let expr = rbexpr.inner.clone();
51
+ let df = DataFrame::empty();
52
+ let out = df
53
+ .lazy()
54
+ .select([expr])
55
+ .with_predicate_pushdown(false)
56
+ .with_projection_pushdown(false)
57
+ .collect()?;
58
+
59
+ let s = out.select_at_idx(0).unwrap().clone();
60
+ RbSeries::new(s.take_materialized_series())
61
+ } else {
62
+ return Some(to_series(result_series_wrapper, "")).transpose();
63
+ };
64
+
65
+ // Finally get the actual Series
66
+ let binding = rbseries.series.borrow();
67
+ Ok(Some(binding.clone()))
8
68
  }
9
69
 
10
70
  pub fn map_single(
@@ -13,6 +73,7 @@ pub fn map_single(
13
73
  _output_type: Option<Wrap<DataType>>,
14
74
  _agg_list: bool,
15
75
  _is_elementwise: bool,
76
+ _returns_scalar: bool,
16
77
  ) -> RbExpr {
17
78
  todo!();
18
79
  }
@@ -28,14 +28,14 @@ fn iterator_to_struct(
28
28
  it: impl Iterator<Item = Option<Value>>,
29
29
  init_null_count: usize,
30
30
  first_value: AnyValue,
31
- name: &str,
31
+ name: PlSmallStr,
32
32
  capacity: usize,
33
33
  ) -> RbResult<RbSeries> {
34
34
  let (vals, flds) = match &first_value {
35
35
  av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
36
36
  AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
37
37
  _ => {
38
- return Err(crate::error::ComputeError::new_err(format!(
38
+ return Err(crate::exceptions::ComputeError::new_err(format!(
39
39
  "expected struct got {first_value:?}",
40
40
  )))
41
41
  }
@@ -70,7 +70,7 @@ fn iterator_to_struct(
70
70
  Some(dict) => {
71
71
  let dict = RHash::try_convert(dict)?;
72
72
  if dict.len() != struct_width {
73
- return Err(crate::error::ComputeError::new_err(
73
+ return Err(crate::exceptions::ComputeError::new_err(
74
74
  format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
75
75
  ));
76
76
  }
@@ -89,21 +89,23 @@ fn iterator_to_struct(
89
89
  items
90
90
  .par_iter()
91
91
  .zip(flds)
92
- .map(|(av, fld)| Series::new(fld.name(), av))
92
+ .map(|(av, fld)| Series::new(fld.name().clone(), av))
93
93
  .collect::<Vec<_>>()
94
94
  });
95
95
 
96
- Ok(StructChunked::from_series(name, &fields)
97
- .unwrap()
98
- .into_series()
99
- .into())
96
+ Ok(
97
+ StructChunked::from_series(name, fields[0].len(), fields.iter())
98
+ .unwrap()
99
+ .into_series()
100
+ .into(),
101
+ )
100
102
  }
101
103
 
102
104
  fn iterator_to_primitive<T>(
103
105
  it: impl Iterator<Item = Option<T::Native>>,
104
106
  init_null_count: usize,
105
107
  first_value: Option<T::Native>,
106
- name: &str,
108
+ name: PlSmallStr,
107
109
  capacity: usize,
108
110
  ) -> ChunkedArray<T>
109
111
  where
@@ -136,7 +138,7 @@ fn iterator_to_bool(
136
138
  it: impl Iterator<Item = Option<bool>>,
137
139
  init_null_count: usize,
138
140
  first_value: Option<bool>,
139
- name: &str,
141
+ name: PlSmallStr,
140
142
  capacity: usize,
141
143
  ) -> ChunkedArray<BooleanType> {
142
144
  // safety: we know the iterators len
@@ -166,7 +168,7 @@ fn iterator_to_object(
166
168
  it: impl Iterator<Item = Option<ObjectValue>>,
167
169
  init_null_count: usize,
168
170
  first_value: Option<ObjectValue>,
169
- name: &str,
171
+ name: PlSmallStr,
170
172
  capacity: usize,
171
173
  ) -> ObjectChunked<ObjectValue> {
172
174
  // safety: we know the iterators len
@@ -196,7 +198,7 @@ fn iterator_to_utf8(
196
198
  it: impl Iterator<Item = Option<String>>,
197
199
  init_null_count: usize,
198
200
  first_value: Option<&str>,
199
- name: &str,
201
+ name: PlSmallStr,
200
202
  capacity: usize,
201
203
  ) -> StringChunked {
202
204
  let first_value = first_value.map(|v| v.to_string());
@@ -229,11 +231,10 @@ fn iterator_to_list(
229
231
  it: impl Iterator<Item = Option<Series>>,
230
232
  init_null_count: usize,
231
233
  first_value: Option<&Series>,
232
- name: &str,
234
+ name: PlSmallStr,
233
235
  capacity: usize,
234
236
  ) -> RbResult<ListChunked> {
235
- let mut builder =
236
- get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
237
+ let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
237
238
  for _ in 0..init_null_count {
238
239
  builder.append_null()
239
240
  }
@@ -246,7 +247,7 @@ fn iterator_to_list(
246
247
  Some(s) => {
247
248
  if s.len() == 0 && s.dtype() != dt {
248
249
  builder
249
- .append_series(&Series::full_null("", 0, dt))
250
+ .append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
250
251
  .unwrap()
251
252
  } else {
252
253
  builder.append_series(&s).map_err(RbPolarsErr::from)?