polars-df 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/README.md +12 -0
  6. data/ext/polars/Cargo.toml +22 -11
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +26 -30
  10. data/ext/polars/src/conversion/chunked_array.rs +32 -28
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +307 -34
  13. data/ext/polars/src/dataframe/construction.rs +4 -3
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -12
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -4
  20. data/ext/polars/src/expr/array.rs +73 -4
  21. data/ext/polars/src/expr/binary.rs +26 -1
  22. data/ext/polars/src/expr/bitwise.rs +39 -0
  23. data/ext/polars/src/expr/categorical.rs +20 -0
  24. data/ext/polars/src/expr/datatype.rs +24 -1
  25. data/ext/polars/src/expr/datetime.rs +58 -14
  26. data/ext/polars/src/expr/general.rs +87 -15
  27. data/ext/polars/src/expr/list.rs +32 -24
  28. data/ext/polars/src/expr/meta.rs +15 -6
  29. data/ext/polars/src/expr/mod.rs +3 -0
  30. data/ext/polars/src/expr/name.rs +19 -14
  31. data/ext/polars/src/expr/rolling.rs +20 -0
  32. data/ext/polars/src/expr/serde.rs +28 -0
  33. data/ext/polars/src/expr/string.rs +64 -10
  34. data/ext/polars/src/expr/struct.rs +9 -1
  35. data/ext/polars/src/file.rs +15 -9
  36. data/ext/polars/src/functions/business.rs +0 -1
  37. data/ext/polars/src/functions/io.rs +25 -3
  38. data/ext/polars/src/functions/lazy.rs +11 -6
  39. data/ext/polars/src/functions/meta.rs +3 -3
  40. data/ext/polars/src/functions/string_cache.rs +3 -3
  41. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  42. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  43. data/ext/polars/src/io/mod.rs +6 -0
  44. data/ext/polars/src/lazyframe/general.rs +59 -9
  45. data/ext/polars/src/lazyframe/mod.rs +16 -1
  46. data/ext/polars/src/lazyframe/optflags.rs +58 -0
  47. data/ext/polars/src/lazyframe/serde.rs +27 -3
  48. data/ext/polars/src/lib.rs +261 -19
  49. data/ext/polars/src/map/dataframe.rs +20 -17
  50. data/ext/polars/src/map/lazy.rs +6 -5
  51. data/ext/polars/src/map/series.rs +8 -7
  52. data/ext/polars/src/on_startup.rs +12 -5
  53. data/ext/polars/src/rb_modules.rs +2 -2
  54. data/ext/polars/src/series/aggregation.rs +85 -28
  55. data/ext/polars/src/series/construction.rs +1 -0
  56. data/ext/polars/src/series/export.rs +37 -33
  57. data/ext/polars/src/series/general.rs +120 -21
  58. data/ext/polars/src/series/mod.rs +29 -4
  59. data/lib/polars/array_expr.rb +382 -3
  60. data/lib/polars/array_name_space.rb +281 -0
  61. data/lib/polars/binary_expr.rb +67 -0
  62. data/lib/polars/binary_name_space.rb +43 -0
  63. data/lib/polars/cat_expr.rb +224 -0
  64. data/lib/polars/cat_name_space.rb +138 -0
  65. data/lib/polars/config.rb +2 -2
  66. data/lib/polars/convert.rb +6 -6
  67. data/lib/polars/data_frame.rb +794 -27
  68. data/lib/polars/data_type_expr.rb +52 -0
  69. data/lib/polars/data_types.rb +26 -5
  70. data/lib/polars/date_time_expr.rb +252 -1
  71. data/lib/polars/date_time_name_space.rb +299 -0
  72. data/lib/polars/expr.rb +1248 -206
  73. data/lib/polars/functions/business.rb +95 -0
  74. data/lib/polars/functions/datatype.rb +21 -0
  75. data/lib/polars/functions/lazy.rb +14 -1
  76. data/lib/polars/io/csv.rb +1 -1
  77. data/lib/polars/io/iceberg.rb +27 -0
  78. data/lib/polars/io/json.rb +4 -4
  79. data/lib/polars/io/ndjson.rb +4 -4
  80. data/lib/polars/io/parquet.rb +32 -7
  81. data/lib/polars/io/scan_options.rb +4 -1
  82. data/lib/polars/lazy_frame.rb +1028 -28
  83. data/lib/polars/list_expr.rb +217 -17
  84. data/lib/polars/list_name_space.rb +231 -22
  85. data/lib/polars/meta_expr.rb +89 -0
  86. data/lib/polars/name_expr.rb +36 -0
  87. data/lib/polars/query_opt_flags.rb +50 -0
  88. data/lib/polars/scan_cast_options.rb +20 -1
  89. data/lib/polars/schema.rb +79 -3
  90. data/lib/polars/selector.rb +72 -0
  91. data/lib/polars/selectors.rb +3 -3
  92. data/lib/polars/series.rb +1053 -54
  93. data/lib/polars/string_expr.rb +436 -32
  94. data/lib/polars/string_name_space.rb +736 -50
  95. data/lib/polars/struct_expr.rb +103 -0
  96. data/lib/polars/struct_name_space.rb +19 -1
  97. data/lib/polars/utils/serde.rb +17 -0
  98. data/lib/polars/utils/various.rb +22 -1
  99. data/lib/polars/utils.rb +5 -1
  100. data/lib/polars/version.rb +1 -1
  101. data/lib/polars.rb +6 -0
  102. metadata +11 -1
@@ -1,4 +1,4 @@
1
- use magnus::{Ruby, Value, prelude::*, value::Opaque};
1
+ use magnus::{RArray, prelude::*};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -83,6 +83,18 @@ impl RbExpr {
83
83
  self.inner.clone().list().mean().into()
84
84
  }
85
85
 
86
+ pub fn list_median(&self) -> Self {
87
+ self.inner.clone().list().median().into()
88
+ }
89
+
90
+ pub fn list_std(&self, ddof: u8) -> Self {
91
+ self.inner.clone().list().std(ddof).into()
92
+ }
93
+
94
+ pub fn list_var(&self, ddof: u8) -> Self {
95
+ self.inner.clone().list().var(ddof).into()
96
+ }
97
+
86
98
  pub fn list_min(&self) -> Self {
87
99
  self.inner.clone().list().min().into()
88
100
  }
@@ -171,40 +183,36 @@ impl RbExpr {
171
183
  .into()
172
184
  }
173
185
 
186
+ pub fn list_gather_every(&self, n: &RbExpr, offset: &RbExpr) -> Self {
187
+ self.inner
188
+ .clone()
189
+ .list()
190
+ .gather_every(n.inner.clone(), offset.inner.clone())
191
+ .into()
192
+ }
193
+
174
194
  pub fn list_to_array(&self, width: usize) -> Self {
175
195
  self.inner.clone().list().to_array(width).into()
176
196
  }
177
197
 
178
- pub fn list_to_struct(
179
- &self,
180
- width_strat: Wrap<ListToStructWidthStrategy>,
181
- name_gen: Option<Value>,
182
- upper_bound: Option<usize>,
183
- ) -> RbResult<Self> {
184
- let name_gen = name_gen.map(|lambda| {
185
- let lambda = Opaque::from(lambda);
186
- Arc::new(move |idx: usize| {
187
- let lambda = Ruby::get().unwrap().get_inner(lambda);
188
- let out: String = lambda.funcall("call", (idx,)).unwrap();
189
- PlSmallStr::from_string(out)
190
- });
191
-
192
- // non-Ruby thread
193
- todo!();
194
- });
195
-
198
+ pub fn list_to_struct(&self, names: RArray) -> RbResult<Self> {
196
199
  Ok(self
197
200
  .inner
198
201
  .clone()
199
202
  .list()
200
- .to_struct(ListToStruct::InferWidth {
201
- infer_field_strategy: width_strat.0,
202
- get_index_name: name_gen,
203
- max_fields: upper_bound,
204
- })
203
+ .to_struct(
204
+ names
205
+ .into_iter()
206
+ .map(|x| Ok(Wrap::<PlSmallStr>::try_convert(x)?.0))
207
+ .collect::<RbResult<Arc<[_]>>>()?,
208
+ )
205
209
  .into())
206
210
  }
207
211
 
212
+ pub fn list_n_unique(&self) -> Self {
213
+ self.inner.clone().list().n_unique().into()
214
+ }
215
+
208
216
  pub fn list_unique(&self, maintain_order: bool) -> Self {
209
217
  let e = self.inner.clone();
210
218
 
@@ -1,4 +1,4 @@
1
- use magnus::RArray;
1
+ use magnus::{RArray, Ruby};
2
2
  use polars::prelude::Schema;
3
3
 
4
4
  use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
@@ -8,17 +8,15 @@ impl RbExpr {
8
8
  self.inner == other.inner
9
9
  }
10
10
 
11
- pub fn meta_pop(&self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
11
+ pub fn meta_pop(ruby: &Ruby, rb_self: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
12
12
  let schema = schema.as_ref().map(|s| &s.0);
13
- let exprs = self
13
+ let exprs = rb_self
14
14
  .inner
15
15
  .clone()
16
16
  .meta()
17
17
  .pop(schema)
18
18
  .map_err(RbPolarsErr::from)?;
19
- Ok(RArray::from_iter(
20
- exprs.iter().map(|e| RbExpr::from(e.clone())),
21
- ))
19
+ Ok(ruby.ary_from_iter(exprs.iter().map(|e| RbExpr::from(e.clone()))))
22
20
  }
23
21
 
24
22
  pub fn meta_root_names(&self) -> Vec<String> {
@@ -57,6 +55,17 @@ impl RbExpr {
57
55
  self.inner.clone().meta().is_regex_projection()
58
56
  }
59
57
 
58
+ pub fn meta_is_column_selection(&self, allow_aliasing: bool) -> bool {
59
+ self.inner
60
+ .clone()
61
+ .meta()
62
+ .is_column_selection(allow_aliasing)
63
+ }
64
+
65
+ pub fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
66
+ self.inner.clone().meta().is_literal(allow_aliasing)
67
+ }
68
+
60
69
  fn compute_tree_format(
61
70
  &self,
62
71
  display_as_dot: bool,
@@ -1,5 +1,6 @@
1
1
  mod array;
2
2
  mod binary;
3
+ mod bitwise;
3
4
  mod categorical;
4
5
  pub mod datatype;
5
6
  mod datetime;
@@ -9,6 +10,8 @@ mod meta;
9
10
  mod name;
10
11
  mod rolling;
11
12
  pub mod selector;
13
+ #[cfg(feature = "serialize_binary")]
14
+ mod serde;
12
15
  mod string;
13
16
  mod r#struct;
14
17
 
@@ -11,20 +11,17 @@ impl RbExpr {
11
11
 
12
12
  pub fn name_map(&self, lambda: Proc) -> Self {
13
13
  let lambda = Opaque::from(lambda);
14
- self.inner
15
- .clone()
16
- .name()
17
- .map(move |name| {
18
- let lambda = Ruby::get().unwrap().get_inner(lambda);
19
- let out = lambda.call::<_, String>((name.as_str(),));
20
- match out {
21
- Ok(out) => Ok(format_pl_smallstr!("{}", out)),
22
- Err(e) => Err(PolarsError::ComputeError(
23
- format!("Ruby function in 'name.map' produced an error: {e}.").into(),
24
- )),
25
- }
26
- })
27
- .into()
14
+ let func = PlanCallback::new(move |name: PlSmallStr| {
15
+ let lambda = Ruby::get().unwrap().get_inner(lambda);
16
+ let out = lambda.call::<_, String>((name.as_str(),));
17
+ match out {
18
+ Ok(out) => Ok(format_pl_smallstr!("{}", out)),
19
+ Err(e) => Err(PolarsError::ComputeError(
20
+ format!("Ruby function in 'name.map' produced an error: {e}.").into(),
21
+ )),
22
+ }
23
+ });
24
+ self.inner.clone().name().map(func).into()
28
25
  }
29
26
 
30
27
  pub fn name_prefix(&self, prefix: String) -> Self {
@@ -42,4 +39,12 @@ impl RbExpr {
42
39
  pub fn name_to_uppercase(&self) -> Self {
43
40
  self.inner.clone().name().to_uppercase().into()
44
41
  }
42
+
43
+ pub fn name_prefix_fields(&self, prefix: String) -> Self {
44
+ self.inner.clone().name().prefix_fields(&prefix).into()
45
+ }
46
+
47
+ pub fn name_suffix_fields(&self, suffix: String) -> Self {
48
+ self.inner.clone().name().suffix_fields(&suffix).into()
49
+ }
45
50
  }
@@ -337,4 +337,24 @@ impl RbExpr {
337
337
 
338
338
  self.inner.clone().rolling_skew(options).into()
339
339
  }
340
+
341
+ pub fn rolling_kurtosis(
342
+ &self,
343
+ window_size: usize,
344
+ fisher: bool,
345
+ bias: bool,
346
+ min_periods: Option<usize>,
347
+ center: bool,
348
+ ) -> Self {
349
+ let min_periods = min_periods.unwrap_or(window_size);
350
+ let options = RollingOptionsFixedWindow {
351
+ window_size,
352
+ weights: None,
353
+ min_periods,
354
+ center,
355
+ fn_params: Some(RollingFnParams::Kurtosis { fisher, bias }),
356
+ };
357
+
358
+ self.inner.clone().rolling_kurtosis(options).into()
359
+ }
340
360
  }
@@ -0,0 +1,28 @@
1
+ use std::io::{BufReader, BufWriter};
2
+
3
+ use magnus::Value;
4
+ use polars::lazy::prelude::Expr;
5
+ use polars_utils::pl_serialize;
6
+
7
+ use crate::exceptions::ComputeError;
8
+ use crate::file::get_file_like;
9
+ use crate::{RbExpr, RbResult};
10
+
11
+ impl RbExpr {
12
+ pub fn serialize_binary(&self, rb_f: Value) -> RbResult<()> {
13
+ let file = get_file_like(rb_f, true)?;
14
+ let writer = BufWriter::new(file);
15
+ pl_serialize::SerializeOptions::default()
16
+ .serialize_into_writer::<_, _, true>(writer, &self.inner)
17
+ .map_err(|err| ComputeError::new_err(err.to_string()))
18
+ }
19
+
20
+ pub fn deserialize_binary(rb_f: Value) -> RbResult<RbExpr> {
21
+ let file = get_file_like(rb_f, false)?;
22
+ let reader = BufReader::new(file);
23
+ let expr: Expr = pl_serialize::SerializeOptions::default()
24
+ .deserialize_from_reader::<_, _, true>(reader)
25
+ .map_err(|err| ComputeError::new_err(err.to_string()))?;
26
+ Ok(expr.into())
27
+ }
28
+ }
@@ -1,7 +1,7 @@
1
1
  use polars::prelude::*;
2
2
 
3
3
  use crate::conversion::Wrap;
4
- use crate::{RbExpr, RbPolarsErr, RbResult};
4
+ use crate::{RbDataTypeExpr, RbExpr, RbPolarsErr, RbResult};
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn str_join(&self, delimiter: String, ignore_nulls: bool) -> Self {
@@ -122,6 +122,14 @@ impl RbExpr {
122
122
  .into()
123
123
  }
124
124
 
125
+ pub fn str_head(&self, n: &Self) -> Self {
126
+ self.inner.clone().str().head(n.inner.clone()).into()
127
+ }
128
+
129
+ pub fn str_tail(&self, n: &Self) -> Self {
130
+ self.inner.clone().str().tail(n.inner.clone()).into()
131
+ }
132
+
125
133
  pub fn str_to_uppercase(&self) -> Self {
126
134
  self.inner.clone().str().to_uppercase().into()
127
135
  }
@@ -159,6 +167,10 @@ impl RbExpr {
159
167
  .into()
160
168
  }
161
169
 
170
+ pub fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
171
+ self.inner.clone().str().normalize(form.0).into()
172
+ }
173
+
162
174
  pub fn str_reverse(&self) -> Self {
163
175
  self.inner.clone().str().reverse().into()
164
176
  }
@@ -200,6 +212,23 @@ impl RbExpr {
200
212
  }
201
213
  }
202
214
 
215
+ pub fn str_find(&self, pat: &Self, literal: Option<bool>, strict: bool) -> Self {
216
+ match literal {
217
+ Some(true) => self
218
+ .inner
219
+ .clone()
220
+ .str()
221
+ .find_literal(pat.inner.clone())
222
+ .into(),
223
+ _ => self
224
+ .inner
225
+ .clone()
226
+ .str()
227
+ .find(pat.inner.clone(), strict)
228
+ .into(),
229
+ }
230
+ }
231
+
203
232
  pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
204
233
  self.inner.clone().str().ends_with(sub.inner.clone()).into()
205
234
  }
@@ -236,16 +265,11 @@ impl RbExpr {
236
265
  .into()
237
266
  }
238
267
 
239
- pub fn str_json_decode(
240
- &self,
241
- dtype: Option<Wrap<DataType>>,
242
- infer_schema_len: Option<usize>,
243
- ) -> Self {
244
- let dtype = dtype.map(|wrap| wrap.0);
268
+ pub fn str_json_decode(&self, dtype: &RbDataTypeExpr) -> Self {
245
269
  self.inner
246
270
  .clone()
247
271
  .str()
248
- .json_decode(dtype, infer_schema_len)
272
+ .json_decode(dtype.inner.clone())
249
273
  .into()
250
274
  }
251
275
 
@@ -323,8 +347,8 @@ impl RbExpr {
323
347
  self.inner.clone().str().splitn(by.inner.clone(), n).into()
324
348
  }
325
349
 
326
- pub fn str_to_decimal(&self, infer_len: usize) -> Self {
327
- self.inner.clone().str().to_decimal(infer_len).into()
350
+ pub fn str_to_decimal(&self, scale: usize) -> Self {
351
+ self.inner.clone().str().to_decimal(scale).into()
328
352
  }
329
353
 
330
354
  pub fn str_contains_any(&self, patterns: &RbExpr, ascii_case_insensitive: bool) -> Self {
@@ -351,4 +375,34 @@ impl RbExpr {
351
375
  )
352
376
  .into()
353
377
  }
378
+
379
+ pub fn str_extract_many(
380
+ &self,
381
+ patterns: &RbExpr,
382
+ ascii_case_insensitive: bool,
383
+ overlapping: bool,
384
+ ) -> Self {
385
+ self.inner
386
+ .clone()
387
+ .str()
388
+ .extract_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
389
+ .into()
390
+ }
391
+
392
+ pub fn str_find_many(
393
+ &self,
394
+ patterns: &RbExpr,
395
+ ascii_case_insensitive: bool,
396
+ overlapping: bool,
397
+ ) -> Self {
398
+ self.inner
399
+ .clone()
400
+ .str()
401
+ .find_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
402
+ .into()
403
+ }
404
+
405
+ pub fn str_escape_regex(&self) -> Self {
406
+ self.inner.clone().str().escape_regex().into()
407
+ }
354
408
  }
@@ -1,4 +1,6 @@
1
- use crate::RbExpr;
1
+ use magnus::RArray;
2
+
3
+ use crate::{RbExpr, RbResult, rb_exprs_to_exprs};
2
4
 
3
5
  impl RbExpr {
4
6
  pub fn struct_field_by_index(&self, index: i64) -> Self {
@@ -16,4 +18,10 @@ impl RbExpr {
16
18
  pub fn struct_json_encode(&self) -> Self {
17
19
  self.inner.clone().struct_().json_encode().into()
18
20
  }
21
+
22
+ pub fn struct_with_fields(&self, fields: RArray) -> RbResult<Self> {
23
+ let fields = rb_exprs_to_exprs(fields)?;
24
+ let e = self.inner.clone().struct_().with_fields(fields);
25
+ Ok(e.into())
26
+ }
19
27
  }
@@ -3,7 +3,7 @@ use std::io;
3
3
  use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
- use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
6
+ use magnus::{Error, RString, Ruby, Value, prelude::*, value::Opaque};
7
7
  use polars::io::cloud::CloudOptions;
8
8
  use polars::io::mmap::MmapBytesReader;
9
9
  use polars::prelude::PlPath;
@@ -67,23 +67,25 @@ impl RbFileLikeObject {
67
67
  /// ruby object has a `read`, `write`, and `seek` methods in respect to parameters.
68
68
  /// Will return a `TypeError` if object does not have `read`, `seek`, and `write` methods.
69
69
  pub fn with_requirements(object: Value, read: bool, write: bool, seek: bool) -> RbResult<Self> {
70
+ let ruby = Ruby::get_with(object);
71
+
70
72
  if read && !object.respond_to("read", false)? {
71
73
  return Err(Error::new(
72
- exception::type_error(),
74
+ ruby.exception_type_error(),
73
75
  "Object does not have a .read() method.",
74
76
  ));
75
77
  }
76
78
 
77
79
  if seek && !object.respond_to("seek", false)? {
78
80
  return Err(Error::new(
79
- exception::type_error(),
81
+ ruby.exception_type_error(),
80
82
  "Object does not have a .seek() method.",
81
83
  ));
82
84
  }
83
85
 
84
86
  if write && !object.respond_to("write", false)? {
85
87
  return Err(Error::new(
86
- exception::type_error(),
88
+ ruby.exception_type_error(),
87
89
  "Object does not have a .write() method.",
88
90
  ));
89
91
  }
@@ -113,10 +115,10 @@ impl Read for RbFileLikeObject {
113
115
 
114
116
  impl Write for RbFileLikeObject {
115
117
  fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
116
- let rbbytes = RString::from_slice(buf);
118
+ let ruby = Ruby::get().unwrap();
119
+ let rbbytes = ruby.str_from_slice(buf);
117
120
 
118
- let number_bytes_written = Ruby::get()
119
- .unwrap()
121
+ let number_bytes_written = ruby
120
122
  .get_inner(self.inner)
121
123
  .funcall::<_, _, usize>("write", (rbbytes,))
122
124
  .map_err(rberr_to_io_err)?;
@@ -265,8 +267,12 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
265
267
  RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
266
268
  RbReadBytes::Other(v) => {
267
269
  let path = PathBuf::try_convert(*v)?;
268
- let f = File::open(&path)
269
- .map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
270
+ let f = File::open(&path).map_err(|e| {
271
+ Error::new(
272
+ Ruby::get().unwrap().exception_runtime_error(),
273
+ e.to_string(),
274
+ )
275
+ })?;
270
276
  Ok((Box::new(f), Some(path)))
271
277
  }
272
278
  }
@@ -2,7 +2,6 @@ use polars::lazy::dsl;
2
2
 
3
3
  use crate::RbExpr;
4
4
 
5
- // TODO add to Ruby
6
5
  pub fn business_day_count(
7
6
  start: &RbExpr,
8
7
  end: &RbExpr,
@@ -1,6 +1,6 @@
1
1
  use std::io::BufReader;
2
2
 
3
- use magnus::{RHash, Value};
3
+ use magnus::{RHash, Ruby, Value};
4
4
  use polars::prelude::ArrowSchema;
5
5
 
6
6
  use crate::conversion::Wrap;
@@ -16,11 +16,32 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
16
16
  EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
17
17
  };
18
18
 
19
- let dict = RHash::new();
19
+ let ruby = Ruby::get_with(rb_f);
20
+ let dict = ruby.hash_new();
20
21
  fields_to_rbdict(&metadata.schema, &dict)?;
21
22
  Ok(dict)
22
23
  }
23
24
 
25
+ pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
26
+ use polars_parquet::read::read_metadata;
27
+ use polars_parquet::read::schema::read_custom_key_value_metadata;
28
+
29
+ let metadata = match get_either_file(rb_f, false)? {
30
+ EitherRustRubyFile::Rust(r) => {
31
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
32
+ }
33
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
34
+ };
35
+
36
+ let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
37
+ let ruby = Ruby::get_with(rb_f);
38
+ let dict = ruby.hash_new();
39
+ for (key, value) in key_value_metadata.into_iter() {
40
+ dict.aset(key.as_str(), value.as_str())?;
41
+ }
42
+ Ok(dict)
43
+ }
44
+
24
45
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
25
46
  use polars_parquet::read::{infer_schema, read_metadata};
26
47
 
@@ -32,7 +53,8 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
32
53
  };
33
54
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
34
55
 
35
- let dict = RHash::new();
56
+ let ruby = Ruby::get_with(rb_f);
57
+ let dict = ruby.hash_new();
36
58
  fields_to_rbdict(&arrow_schema, &dict)?;
37
59
  Ok(dict)
38
60
  }
@@ -1,6 +1,6 @@
1
- use magnus::encoding::{self, EncodingCapable};
1
+ use magnus::encoding::EncodingCapable;
2
2
  use magnus::{
3
- Float, Integer, RArray, RString, Ruby, Value, class, prelude::*, typed_data::Obj, value::Opaque,
3
+ Float, Integer, RArray, RString, Ruby, Value, prelude::*, typed_data::Obj, value::Opaque,
4
4
  };
5
5
  use polars::lazy::dsl;
6
6
  use polars::prelude::*;
@@ -85,6 +85,10 @@ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
85
85
  Ok(dsl::as_struct(exprs).into())
86
86
  }
87
87
 
88
+ pub fn field(names: Vec<String>) -> RbExpr {
89
+ dsl::Expr::Field(names.into_iter().map(|x| x.into()).collect()).into()
90
+ }
91
+
88
92
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
89
93
  let exprs = rb_exprs_to_exprs(exprs)?;
90
94
  Ok(dsl::coalesce(&exprs).into())
@@ -94,10 +98,10 @@ pub fn col(name: String) -> RbExpr {
94
98
  dsl::col(&name).into()
95
99
  }
96
100
 
97
- pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
101
+ pub fn collect_all(ruby: &Ruby, lfs: RArray) -> RbResult<RArray> {
98
102
  let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
99
103
 
100
- Ok(RArray::from_iter(lfs.iter().map(|lf| {
104
+ Ok(ruby.ary_from_iter(lfs.iter().map(|lf| {
101
105
  let df = lf.ldf.borrow().clone().collect().unwrap();
102
106
  RbDataFrame::new(df)
103
107
  })))
@@ -279,7 +283,8 @@ pub fn fold(
279
283
  }
280
284
 
281
285
  pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
282
- if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
286
+ let ruby = Ruby::get_with(value);
287
+ if value.is_kind_of(ruby.class_true_class()) || value.is_kind_of(ruby.class_false_class()) {
283
288
  Ok(dsl::lit(bool::try_convert(value)?).into())
284
289
  } else if let Some(v) = Integer::from_value(value) {
285
290
  match v.to_i64() {
@@ -298,7 +303,7 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
298
303
  } else if let Some(v) = Float::from_value(value) {
299
304
  Ok(dsl::lit(v.to_f64()).into())
300
305
  } else if let Some(v) = RString::from_value(value) {
301
- if v.enc_get() == encoding::Index::utf8() {
306
+ if v.enc_get() == ruby.utf8_encindex() {
302
307
  Ok(dsl::lit(v.to_string()?).into())
303
308
  } else {
304
309
  Ok(dsl::lit(unsafe { v.as_slice() }).into())
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, Value};
1
+ use magnus::{IntoValue, Ruby, Value};
2
2
  use polars_core;
3
3
  use polars_core::POOL;
4
4
  use polars_core::fmt::FloatFmt;
@@ -7,8 +7,8 @@ use polars_core::prelude::IDX_DTYPE;
7
7
  use crate::conversion::Wrap;
8
8
  use crate::{RbResult, RbValueError};
9
9
 
10
- pub fn get_index_type() -> Value {
11
- Wrap(IDX_DTYPE).into_value()
10
+ pub fn get_index_type(ruby: &Ruby) -> Value {
11
+ Wrap(IDX_DTYPE).into_value_with(ruby)
12
12
  }
13
13
 
14
14
  pub fn thread_pool_size() -> usize {
@@ -1,5 +1,5 @@
1
1
  use crate::RbResult;
2
- use magnus::{RArray, Ruby, Value};
2
+ use magnus::{Ruby, Value};
3
3
 
4
4
  pub fn enable_string_cache() {
5
5
  // The string cache no longer exists.
@@ -18,7 +18,7 @@ pub fn using_string_cache() -> bool {
18
18
  pub struct RbStringCacheHolder {}
19
19
 
20
20
  impl RbStringCacheHolder {
21
- pub fn hold() -> RbResult<Value> {
22
- Ruby::get().unwrap().yield_splat(RArray::new())
21
+ pub fn hold(ruby: &Ruby) -> RbResult<Value> {
22
+ ruby.yield_splat(ruby.ary_new())
23
23
  }
24
24
  }
@@ -1,6 +1,6 @@
1
1
  use arrow::datatypes::ArrowDataType;
2
2
  use arrow::ffi;
3
- use magnus::{IntoValue, Value};
3
+ use magnus::{IntoValue, Ruby, Value};
4
4
  use polars::datatypes::CompatLevel;
5
5
  use polars::frame::DataFrame;
6
6
  use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
@@ -20,11 +20,11 @@ impl RbArrowArrayStream {
20
20
  }
21
21
  }
22
22
 
23
- pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
23
+ pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value> {
24
24
  let iter = Box::new(DataFrameStreamIterator::new(df));
25
25
  let field = iter.field();
26
26
  let stream = ffi::export_iterator(iter, field);
27
- Ok(RbArrowArrayStream { stream }.into_value())
27
+ Ok(RbArrowArrayStream { stream }.into_value_with(ruby))
28
28
  }
29
29
 
30
30
  pub struct DataFrameStreamIterator {
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, Module, RArray, RClass, RModule, Value, class, prelude::*};
1
+ use magnus::{IntoValue, Module, RClass, RModule, Ruby, Value, prelude::*};
2
2
 
3
3
  use crate::RbResult;
4
4
 
@@ -44,9 +44,10 @@ impl<T: Element> RbArray1<T> {
44
44
  where
45
45
  I: IntoIterator<Item = T>,
46
46
  {
47
- class::object()
47
+ let ruby = Ruby::get().unwrap();
48
+ ruby.class_object()
48
49
  .const_get::<_, RModule>("Numo")?
49
50
  .const_get::<_, RClass>(T::class_name())?
50
- .funcall("cast", (RArray::from_iter(values),))
51
+ .funcall("cast", (ruby.ary_from_iter(values),))
51
52
  }
52
53
  }
@@ -1,6 +1,7 @@
1
1
  use std::sync::Arc;
2
2
 
3
3
  use magnus::{TryConvert, Value, value::ReprValue};
4
+ use polars::prelude::default_values::DefaultFieldValues;
4
5
  use polars::prelude::deletion::DeletionFilesList;
5
6
  use polars::prelude::{
6
7
  CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
@@ -48,6 +49,8 @@ impl RbScanOptions {
48
49
  let deletion_files: Option<Wrap<DeletionFilesList>> =
49
50
  self.0.funcall("deletion_files", ())?;
50
51
  let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
52
+ let default_values: Option<Wrap<DefaultFieldValues>> =
53
+ self.0.funcall("default_values", ())?;
51
54
 
52
55
  let cloud_options = storage_options;
53
56
 
@@ -95,6 +98,9 @@ impl RbScanOptions {
95
98
  include_file_paths: include_file_paths.map(|x| x.0),
96
99
  deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
97
100
  column_mapping: column_mapping.map(|x| x.0),
101
+ default_values: default_values
102
+ .map(|x| x.0)
103
+ .filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
98
104
  };
99
105
 
100
106
  Ok(unified_scan_args)