polars-df 0.21.1 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +120 -90
  4. data/Cargo.toml +3 -0
  5. data/README.md +20 -7
  6. data/ext/polars/Cargo.toml +18 -12
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +39 -37
  10. data/ext/polars/src/conversion/chunked_array.rs +36 -29
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +244 -51
  13. data/ext/polars/src/dataframe/construction.rs +5 -17
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -17
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -5
  20. data/ext/polars/src/expr/datatype.rs +4 -4
  21. data/ext/polars/src/expr/datetime.rs +22 -28
  22. data/ext/polars/src/expr/general.rs +3 -10
  23. data/ext/polars/src/expr/list.rs +8 -24
  24. data/ext/polars/src/expr/meta.rs +4 -6
  25. data/ext/polars/src/expr/mod.rs +2 -0
  26. data/ext/polars/src/expr/name.rs +11 -14
  27. data/ext/polars/src/expr/serde.rs +28 -0
  28. data/ext/polars/src/expr/string.rs +5 -10
  29. data/ext/polars/src/file.rs +20 -14
  30. data/ext/polars/src/functions/business.rs +0 -1
  31. data/ext/polars/src/functions/io.rs +7 -4
  32. data/ext/polars/src/functions/lazy.rs +7 -6
  33. data/ext/polars/src/functions/meta.rs +3 -3
  34. data/ext/polars/src/functions/string_cache.rs +3 -3
  35. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  36. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  37. data/ext/polars/src/io/mod.rs +23 -3
  38. data/ext/polars/src/lazyframe/general.rs +35 -50
  39. data/ext/polars/src/lazyframe/mod.rs +16 -1
  40. data/ext/polars/src/lazyframe/optflags.rs +57 -0
  41. data/ext/polars/src/lazyframe/serde.rs +27 -3
  42. data/ext/polars/src/lib.rs +144 -19
  43. data/ext/polars/src/map/dataframe.rs +18 -15
  44. data/ext/polars/src/map/lazy.rs +6 -5
  45. data/ext/polars/src/map/series.rs +7 -6
  46. data/ext/polars/src/on_startup.rs +12 -5
  47. data/ext/polars/src/rb_modules.rs +2 -2
  48. data/ext/polars/src/series/aggregation.rs +49 -29
  49. data/ext/polars/src/series/construction.rs +2 -0
  50. data/ext/polars/src/series/export.rs +38 -33
  51. data/ext/polars/src/series/general.rs +69 -31
  52. data/ext/polars/src/series/mod.rs +29 -4
  53. data/lib/polars/array_expr.rb +1 -1
  54. data/lib/polars/data_frame.rb +119 -15
  55. data/lib/polars/data_types.rb +23 -6
  56. data/lib/polars/date_time_expr.rb +36 -15
  57. data/lib/polars/expr.rb +41 -32
  58. data/lib/polars/functions/business.rb +95 -0
  59. data/lib/polars/functions/lazy.rb +1 -1
  60. data/lib/polars/iceberg_dataset.rb +113 -0
  61. data/lib/polars/io/iceberg.rb +34 -0
  62. data/lib/polars/io/ipc.rb +28 -49
  63. data/lib/polars/io/parquet.rb +7 -4
  64. data/lib/polars/io/scan_options.rb +12 -3
  65. data/lib/polars/io/utils.rb +17 -0
  66. data/lib/polars/lazy_frame.rb +97 -10
  67. data/lib/polars/list_expr.rb +21 -13
  68. data/lib/polars/list_name_space.rb +33 -21
  69. data/lib/polars/meta_expr.rb +25 -0
  70. data/lib/polars/query_opt_flags.rb +50 -0
  71. data/lib/polars/scan_cast_options.rb +23 -1
  72. data/lib/polars/schema.rb +1 -1
  73. data/lib/polars/selectors.rb +8 -8
  74. data/lib/polars/series.rb +26 -2
  75. data/lib/polars/string_expr.rb +27 -28
  76. data/lib/polars/string_name_space.rb +18 -5
  77. data/lib/polars/utils/convert.rb +2 -2
  78. data/lib/polars/utils/serde.rb +17 -0
  79. data/lib/polars/utils/various.rb +4 -0
  80. data/lib/polars/version.rb +1 -1
  81. data/lib/polars.rb +6 -0
  82. metadata +10 -1
@@ -1,10 +1,11 @@
1
- use magnus::{IntoValue, RArray, RString, Ruby, TryConvert, Value, prelude::*};
1
+ use magnus::{IntoValue, RString, Ruby, TryConvert, Value, prelude::*};
2
2
  use polars::prelude::*;
3
+ use polars_compute::decimal::DecimalFmtBuffer;
3
4
 
4
5
  use super::{Wrap, get_rbseq, struct_dict};
5
6
 
6
7
  use crate::RbResult;
7
- use crate::rb_modules::utils;
8
+ use crate::rb_modules::pl_utils;
8
9
 
9
10
  impl TryConvert for Wrap<StringChunked> {
10
11
  fn try_convert(obj: Value) -> RbResult<Self> {
@@ -39,19 +40,19 @@ impl TryConvert for Wrap<BinaryChunked> {
39
40
  }
40
41
 
41
42
  impl IntoValue for Wrap<&StringChunked> {
42
- fn into_value_with(self, _: &Ruby) -> Value {
43
+ fn into_value_with(self, ruby: &Ruby) -> Value {
43
44
  let iter = self.0.into_iter();
44
- RArray::from_iter(iter).into_value()
45
+ ruby.ary_from_iter(iter).as_value()
45
46
  }
46
47
  }
47
48
 
48
49
  impl IntoValue for Wrap<&BinaryChunked> {
49
- fn into_value_with(self, _: &Ruby) -> Value {
50
+ fn into_value_with(self, ruby: &Ruby) -> Value {
50
51
  let iter = self
51
52
  .0
52
53
  .into_iter()
53
- .map(|opt_bytes| opt_bytes.map(RString::from_slice));
54
- RArray::from_iter(iter).into_value()
54
+ .map(|opt_bytes| opt_bytes.map(|v| ruby.str_from_slice(v)));
55
+ ruby.ary_from_iter(iter).as_value()
55
56
  }
56
57
  }
57
58
 
@@ -62,19 +63,19 @@ impl IntoValue for Wrap<&StructChunked> {
62
63
  // make series::iter() accept a chunk index.
63
64
  let s = s.rechunk();
64
65
  let iter = s.iter().map(|av| match av {
65
- AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
66
+ AnyValue::Struct(_, _, flds) => struct_dict(ruby, av._iter_struct_av(), flds),
66
67
  AnyValue::Null => ruby.qnil().as_value(),
67
68
  _ => unreachable!(),
68
69
  });
69
70
 
70
- RArray::from_iter(iter).into_value()
71
+ ruby.ary_from_iter(iter).as_value()
71
72
  }
72
73
  }
73
74
 
74
75
  impl IntoValue for Wrap<&DurationChunked> {
75
- fn into_value_with(self, _: &Ruby) -> Value {
76
- let utils = utils();
77
- let time_unit = Wrap(self.0.time_unit()).into_value();
76
+ fn into_value_with(self, ruby: &Ruby) -> Value {
77
+ let utils = pl_utils();
78
+ let time_unit = Wrap(self.0.time_unit()).into_value_with(ruby);
78
79
  let iter = self.0.physical().into_iter().map(|opt_v| {
79
80
  opt_v.map(|v| {
80
81
  utils
@@ -82,15 +83,19 @@ impl IntoValue for Wrap<&DurationChunked> {
82
83
  .unwrap()
83
84
  })
84
85
  });
85
- RArray::from_iter(iter).into_value()
86
+ ruby.ary_from_iter(iter).as_value()
86
87
  }
87
88
  }
88
89
 
89
90
  impl IntoValue for Wrap<&DatetimeChunked> {
90
- fn into_value_with(self, _: &Ruby) -> Value {
91
- let utils = utils();
92
- let time_unit = Wrap(self.0.time_unit()).into_value();
93
- let time_zone = self.0.time_zone().as_deref().map(|v| v.into_value());
91
+ fn into_value_with(self, ruby: &Ruby) -> Value {
92
+ let utils = pl_utils();
93
+ let time_unit = Wrap(self.0.time_unit()).into_value_with(ruby);
94
+ let time_zone = self
95
+ .0
96
+ .time_zone()
97
+ .as_deref()
98
+ .map(|v| v.into_value_with(ruby));
94
99
  let iter = self.0.physical().into_iter().map(|opt_v| {
95
100
  opt_v.map(|v| {
96
101
  utils
@@ -98,41 +103,43 @@ impl IntoValue for Wrap<&DatetimeChunked> {
98
103
  .unwrap()
99
104
  })
100
105
  });
101
- RArray::from_iter(iter).into_value()
106
+ ruby.ary_from_iter(iter).as_value()
102
107
  }
103
108
  }
104
109
 
105
110
  impl IntoValue for Wrap<&TimeChunked> {
106
- fn into_value_with(self, _: &Ruby) -> Value {
107
- let utils = utils();
111
+ fn into_value_with(self, ruby: &Ruby) -> Value {
112
+ let utils = pl_utils();
108
113
  let iter = self.0.physical().into_iter().map(|opt_v| {
109
114
  opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
110
115
  });
111
- RArray::from_iter(iter).into_value()
116
+ ruby.ary_from_iter(iter).as_value()
112
117
  }
113
118
  }
114
119
 
115
120
  impl IntoValue for Wrap<&DateChunked> {
116
- fn into_value_with(self, _: &Ruby) -> Value {
117
- let utils = utils();
121
+ fn into_value_with(self, ruby: &Ruby) -> Value {
122
+ let utils = pl_utils();
118
123
  let iter = self.0.physical().into_iter().map(|opt_v| {
119
124
  opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
120
125
  });
121
- RArray::from_iter(iter).into_value()
126
+ ruby.ary_from_iter(iter).as_value()
122
127
  }
123
128
  }
124
129
 
125
130
  impl IntoValue for Wrap<&DecimalChunked> {
126
- fn into_value_with(self, _: &Ruby) -> Value {
127
- let utils = utils();
128
- let rb_scale = (-(self.0.scale() as i32)).into_value();
131
+ fn into_value_with(self, ruby: &Ruby) -> Value {
132
+ let utils = pl_utils();
133
+ let rb_precision = self.0.precision().into_value_with(ruby);
134
+ let mut buf = DecimalFmtBuffer::new();
129
135
  let iter = self.0.physical().into_iter().map(|opt_v| {
130
136
  opt_v.map(|v| {
137
+ let s = buf.format_dec128(v, self.0.scale(), false, false);
131
138
  utils
132
- .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
139
+ .funcall::<_, _, Value>("_to_ruby_decimal", (rb_precision, s))
133
140
  .unwrap()
134
141
  })
135
142
  });
136
- RArray::from_iter(iter).into_value()
143
+ ruby.ary_from_iter(iter).as_value()
137
144
  }
138
145
  }
@@ -0,0 +1,11 @@
1
+ use magnus::{Value, prelude::*};
2
+ use polars::prelude::*;
3
+
4
+ use crate::rb_modules::pl_utils;
5
+
6
+ pub fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value {
7
+ let tu = tu.to_ascii();
8
+ pl_utils()
9
+ .funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string())))
10
+ .unwrap()
11
+ }
@@ -1,16 +1,17 @@
1
1
  pub(crate) mod any_value;
2
2
  mod categorical;
3
3
  mod chunked_array;
4
+ mod datetime;
4
5
 
6
+ use std::collections::BTreeMap;
5
7
  use std::fmt::{Debug, Display, Formatter};
6
8
  use std::fs::File;
7
9
  use std::hash::{Hash, Hasher};
8
- use std::num::NonZeroUsize;
9
10
 
10
11
  pub use categorical::RbCategories;
11
12
  use magnus::{
12
- IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, class, exception,
13
- prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
13
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, prelude::*, r_hash::ForEach,
14
+ try_convert::TryConvertOwned, value::Opaque,
14
15
  };
15
16
  use polars::chunked_array::object::PolarsObjectSafe;
16
17
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -18,9 +19,13 @@ use polars::datatypes::AnyValue;
18
19
  use polars::frame::row::Row;
19
20
  use polars::io::avro::AvroCompression;
20
21
  use polars::io::cloud::CloudOptions;
22
+ use polars::prelude::default_values::{
23
+ DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
24
+ };
21
25
  use polars::prelude::deletion::DeletionFilesList;
22
26
  use polars::prelude::*;
23
27
  use polars::series::ops::NullBehavior;
28
+ use polars_compute::decimal::dec128_verify_prec_scale;
24
29
  use polars_core::schema::iceberg::IcebergSchema;
25
30
  use polars_core::utils::arrow::array::Array;
26
31
  use polars_core::utils::materialize_dyn_int;
@@ -30,7 +35,7 @@ use polars_utils::total_ord::{TotalEq, TotalHash};
30
35
 
31
36
  use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
32
37
  use crate::object::OBJECT_NAME;
33
- use crate::rb_modules::series;
38
+ use crate::rb_modules::pl_series;
34
39
  use crate::utils::to_rb_err;
35
40
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
36
41
 
@@ -86,7 +91,7 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
86
91
  }
87
92
 
88
93
  pub(crate) fn to_series(s: RbSeries) -> Value {
89
- let series = series();
94
+ let series = pl_series();
90
95
  series
91
96
  .funcall::<_, _, Value>("_from_rbseries", (s,))
92
97
  .unwrap()
@@ -121,16 +126,16 @@ impl TryConvert for Wrap<NullValues> {
121
126
  }
122
127
  }
123
128
 
124
- fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
125
- let dict = RHash::new();
129
+ fn struct_dict<'a>(ruby: &Ruby, vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
130
+ let dict = ruby.hash_new();
126
131
  for (fld, val) in flds.iter().zip(vals) {
127
132
  dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
128
133
  }
129
- dict.into_value()
134
+ dict.as_value()
130
135
  }
131
136
 
132
137
  impl IntoValue for Wrap<DataType> {
133
- fn into_value_with(self, _: &Ruby) -> Value {
138
+ fn into_value_with(self, ruby: &Ruby) -> Value {
134
139
  let pl = crate::rb_modules::polars();
135
140
 
136
141
  match self.0 {
@@ -170,6 +175,10 @@ impl IntoValue for Wrap<DataType> {
170
175
  let class = pl.const_get::<_, Value>("UInt64").unwrap();
171
176
  class.funcall("new", ()).unwrap()
172
177
  }
178
+ DataType::UInt128 => {
179
+ let class = pl.const_get::<_, Value>("UInt128").unwrap();
180
+ class.funcall("new", ()).unwrap()
181
+ }
173
182
  DataType::Float32 => {
174
183
  let class = pl.const_get::<_, Value>("Float32").unwrap();
175
184
  class.funcall("new", ()).unwrap()
@@ -236,8 +245,10 @@ impl IntoValue for Wrap<DataType> {
236
245
  let categories: Value = categories_class
237
246
  .funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
238
247
  .unwrap();
239
- let kwargs = RHash::new();
240
- kwargs.aset(Symbol::new("categories"), categories).unwrap();
248
+ let kwargs = ruby.hash_new();
249
+ kwargs
250
+ .aset(ruby.to_symbol("categories"), categories)
251
+ .unwrap();
241
252
  categorical_class.funcall("new", (kwargs,)).unwrap()
242
253
  }
243
254
  DataType::Enum(_, mapping) => {
@@ -264,7 +275,7 @@ impl IntoValue for Wrap<DataType> {
264
275
  .funcall::<_, _, Value>("new", (name, dtype))
265
276
  .unwrap()
266
277
  });
267
- let fields = RArray::from_iter(iter);
278
+ let fields = ruby.ary_from_iter(iter);
268
279
  let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
269
280
  struct_class
270
281
  .funcall::<_, _, Value>("new", (fields,))
@@ -275,7 +286,7 @@ impl IntoValue for Wrap<DataType> {
275
286
  class.funcall("new", ()).unwrap()
276
287
  }
277
288
  DataType::Unknown(UnknownKind::Int(v)) => {
278
- Wrap(materialize_dyn_int(v).dtype()).into_value()
289
+ Wrap(materialize_dyn_int(v).dtype()).into_value_with(ruby)
279
290
  }
280
291
  DataType::Unknown(_) => {
281
292
  let class = pl.const_get::<_, Value>("Unknown").unwrap();
@@ -293,19 +304,19 @@ enum CategoricalOrdering {
293
304
  }
294
305
 
295
306
  impl IntoValue for Wrap<CategoricalOrdering> {
296
- fn into_value_with(self, _: &Ruby) -> Value {
297
- "lexical".into_value()
307
+ fn into_value_with(self, ruby: &Ruby) -> Value {
308
+ "lexical".into_value_with(ruby)
298
309
  }
299
310
  }
300
311
 
301
312
  impl IntoValue for Wrap<TimeUnit> {
302
- fn into_value_with(self, _: &Ruby) -> Value {
313
+ fn into_value_with(self, ruby: &Ruby) -> Value {
303
314
  let tu = match self.0 {
304
315
  TimeUnit::Nanoseconds => "ns",
305
316
  TimeUnit::Microseconds => "us",
306
317
  TimeUnit::Milliseconds => "ms",
307
318
  };
308
- tu.into_value()
319
+ tu.into_value_with(ruby)
309
320
  }
310
321
  }
311
322
 
@@ -319,7 +330,8 @@ impl TryConvert for Wrap<Field> {
319
330
 
320
331
  impl TryConvert for Wrap<DataType> {
321
332
  fn try_convert(ob: Value) -> RbResult<Self> {
322
- let dtype = if ob.is_kind_of(class::class()) {
333
+ let ruby = Ruby::get_with(ob);
334
+ let dtype = if ob.is_kind_of(ruby.class_class()) {
323
335
  let name = ob.funcall::<_, _, String>("name", ())?;
324
336
  match name.as_str() {
325
337
  "Polars::Int8" => DataType::Int8,
@@ -343,7 +355,11 @@ impl TryConvert for Wrap<DataType> {
343
355
  "Polars::Time" => DataType::Time,
344
356
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
345
357
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
346
- "Polars::Decimal" => DataType::Decimal(None, None),
358
+ "Polars::Decimal" => {
359
+ return Err(RbTypeError::new_err(
360
+ "Decimal without precision/scale set is not a valid Polars datatype",
361
+ ));
362
+ }
347
363
  "Polars::List" => DataType::List(Box::new(DataType::Null)),
348
364
  "Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
349
365
  "Polars::Struct" => DataType::Struct(vec![]),
@@ -408,7 +424,8 @@ impl TryConvert for Wrap<DataType> {
408
424
  "Polars::Decimal" => {
409
425
  let precision = ob.funcall("precision", ())?;
410
426
  let scale = ob.funcall("scale", ())?;
411
- DataType::Decimal(precision, Some(scale))
427
+ dec128_verify_prec_scale(precision, scale).map_err(to_rb_err)?;
428
+ DataType::Decimal(precision, scale)
412
429
  }
413
430
  "Polars::List" => {
414
431
  let inner: Value = ob.funcall("inner", ()).unwrap();
@@ -527,8 +544,56 @@ impl TryConvert for Wrap<Schema> {
527
544
  }
528
545
 
529
546
  impl TryConvert for Wrap<ArrowSchema> {
530
- fn try_convert(_ob: Value) -> RbResult<Self> {
531
- todo!();
547
+ fn try_convert(ob: Value) -> RbResult<Self> {
548
+ let ruby = Ruby::get_with(ob);
549
+ // TODO improve
550
+ let ob = RHash::try_convert(ob)?;
551
+ let fields: RArray = ob.aref(ruby.to_symbol("fields"))?;
552
+ let mut arrow_schema = ArrowSchema::with_capacity(fields.len());
553
+ for f in fields {
554
+ let f = RHash::try_convert(f)?;
555
+ let name: String = f.aref(ruby.to_symbol("name"))?;
556
+ let rb_dtype: String = f.aref(ruby.to_symbol("type"))?;
557
+ let dtype = match rb_dtype.as_str() {
558
+ "null" => ArrowDataType::Null,
559
+ "boolean" => ArrowDataType::Boolean,
560
+ "int8" => ArrowDataType::Int8,
561
+ "int16" => ArrowDataType::Int16,
562
+ "int32" => ArrowDataType::Int32,
563
+ "int64" => ArrowDataType::Int64,
564
+ "uint8" => ArrowDataType::UInt8,
565
+ "uint16" => ArrowDataType::UInt16,
566
+ "uint32" => ArrowDataType::UInt32,
567
+ "uint64" => ArrowDataType::UInt64,
568
+ "float16" => ArrowDataType::Float16,
569
+ "float32" => ArrowDataType::Float32,
570
+ "float64" => ArrowDataType::Float64,
571
+ "date32" => ArrowDataType::Date32,
572
+ "date64" => ArrowDataType::Date64,
573
+ "binary" => ArrowDataType::Binary,
574
+ "large_binary" => ArrowDataType::LargeBinary,
575
+ "string" => ArrowDataType::Utf8,
576
+ "large_string" => ArrowDataType::LargeUtf8,
577
+ "binary_view" => ArrowDataType::BinaryView,
578
+ "string_view" => ArrowDataType::Utf8View,
579
+ "unknown" => ArrowDataType::Unknown,
580
+ _ => todo!(),
581
+ };
582
+ let is_nullable = f.aref(ruby.to_symbol("nullable"))?;
583
+ let rb_metadata: RHash = f.aref(ruby.to_symbol("metadata"))?;
584
+ let mut metadata = BTreeMap::new();
585
+ rb_metadata.foreach(|k: String, v: String| {
586
+ metadata.insert(k.into(), v.into());
587
+ Ok(ForEach::Continue)
588
+ })?;
589
+ arrow_schema
590
+ .try_insert(
591
+ name.clone().into(),
592
+ ArrowField::new(name.into(), dtype, is_nullable).with_metadata(metadata),
593
+ )
594
+ .map_err(to_rb_err)?;
595
+ }
596
+ Ok(Wrap(arrow_schema))
532
597
  }
533
598
  }
534
599
 
@@ -673,7 +738,7 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
673
738
 
674
739
  impl ObjectValue {
675
740
  pub fn to_value(&self) -> Value {
676
- self.clone().into_value()
741
+ self.clone().into_value_with(&Ruby::get().unwrap())
677
742
  }
678
743
  }
679
744
 
@@ -827,7 +892,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
827
892
  let parsed = match String::try_convert(ob)?.as_str() {
828
893
  "uncompressed" => None,
829
894
  "lz4" => Some(IpcCompression::LZ4),
830
- "zstd" => Some(IpcCompression::ZSTD),
895
+ "zstd" => Some(IpcCompression::ZSTD(Default::default())),
831
896
  v => {
832
897
  return Err(RbValueError::new_err(format!(
833
898
  "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
@@ -1036,21 +1101,6 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1036
1101
  }
1037
1102
  }
1038
1103
 
1039
- impl TryConvert for Wrap<IpcCompression> {
1040
- fn try_convert(ob: Value) -> RbResult<Self> {
1041
- let parsed = match String::try_convert(ob)?.as_str() {
1042
- "lz4" => IpcCompression::LZ4,
1043
- "zstd" => IpcCompression::ZSTD,
1044
- v => {
1045
- return Err(RbValueError::new_err(format!(
1046
- "compression must be one of {{'lz4', 'zstd'}}, got {v}"
1047
- )));
1048
- }
1049
- };
1050
- Ok(Wrap(parsed))
1051
- }
1052
- }
1053
-
1054
1104
  impl TryConvert for Wrap<SearchSortedSide> {
1055
1105
  fn try_convert(ob: Value) -> RbResult<Self> {
1056
1106
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1153,7 +1203,8 @@ impl TryConvert for Wrap<QuoteStyle> {
1153
1203
  }
1154
1204
 
1155
1205
  pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1156
- let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1206
+ let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), kv)
1207
+ .map_err(RbPolarsErr::from)?;
1157
1208
  Ok(out)
1158
1209
  }
1159
1210
 
@@ -1180,7 +1231,121 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
1180
1231
  let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
1181
1232
  return Ok(out);
1182
1233
  }
1183
- todo!();
1234
+
1235
+ let integer_upcast = match &*ob.funcall::<_, _, String>("integer_cast", ())? {
1236
+ "upcast" => true,
1237
+ "forbid" => false,
1238
+ v => {
1239
+ return Err(RbValueError::new_err(format!(
1240
+ "unknown option for integer_cast: {v}"
1241
+ )));
1242
+ }
1243
+ };
1244
+
1245
+ let mut float_upcast = false;
1246
+ let mut float_downcast = false;
1247
+
1248
+ let float_cast_object: Value = ob.funcall("float_cast", ())?;
1249
+
1250
+ parse_multiple_options("float_cast", float_cast_object, |v| {
1251
+ match v {
1252
+ "forbid" => {}
1253
+ "upcast" => float_upcast = true,
1254
+ "downcast" => float_downcast = true,
1255
+ v => {
1256
+ return Err(RbValueError::new_err(format!(
1257
+ "unknown option for float_cast: {v}"
1258
+ )));
1259
+ }
1260
+ }
1261
+
1262
+ Ok(())
1263
+ })?;
1264
+
1265
+ let mut datetime_nanoseconds_downcast = false;
1266
+ let mut datetime_convert_timezone = false;
1267
+
1268
+ let datetime_cast_object: Value = ob.funcall("datetime_cast", ())?;
1269
+
1270
+ parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1271
+ match v {
1272
+ "forbid" => {}
1273
+ "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1274
+ "convert-timezone" => datetime_convert_timezone = true,
1275
+ v => {
1276
+ return Err(RbValueError::new_err(format!(
1277
+ "unknown option for datetime_cast: {v}"
1278
+ )));
1279
+ }
1280
+ };
1281
+
1282
+ Ok(())
1283
+ })?;
1284
+
1285
+ let missing_struct_fields =
1286
+ match &*ob.funcall::<_, _, String>("missing_struct_fields", ())? {
1287
+ "insert" => MissingColumnsPolicy::Insert,
1288
+ "raise" => MissingColumnsPolicy::Raise,
1289
+ v => {
1290
+ return Err(RbValueError::new_err(format!(
1291
+ "unknown option for missing_struct_fields: {v}"
1292
+ )));
1293
+ }
1294
+ };
1295
+
1296
+ let extra_struct_fields = match &*ob.funcall::<_, _, String>("extra_struct_fields", ())? {
1297
+ "ignore" => ExtraColumnsPolicy::Ignore,
1298
+ "raise" => ExtraColumnsPolicy::Raise,
1299
+ v => {
1300
+ return Err(RbValueError::new_err(format!(
1301
+ "unknown option for extra_struct_fields: {v}"
1302
+ )));
1303
+ }
1304
+ };
1305
+
1306
+ let categorical_to_string =
1307
+ match &*ob.funcall::<_, _, String>("categorical_to_string", ())? {
1308
+ "allow" => true,
1309
+ "forbid" => false,
1310
+ v => {
1311
+ return Err(RbValueError::new_err(format!(
1312
+ "unknown option for categorical_to_string: {v}"
1313
+ )));
1314
+ }
1315
+ };
1316
+
1317
+ return Ok(Wrap(CastColumnsPolicy {
1318
+ integer_upcast,
1319
+ float_upcast,
1320
+ float_downcast,
1321
+ datetime_nanoseconds_downcast,
1322
+ datetime_microseconds_downcast: false,
1323
+ datetime_convert_timezone,
1324
+ null_upcast: true,
1325
+ categorical_to_string,
1326
+ missing_struct_fields,
1327
+ extra_struct_fields,
1328
+ }));
1329
+
1330
+ fn parse_multiple_options(
1331
+ parameter_name: &'static str,
1332
+ rb_object: Value,
1333
+ mut parser_func: impl FnMut(&str) -> RbResult<()>,
1334
+ ) -> RbResult<()> {
1335
+ if let Ok(v) = String::try_convert(rb_object) {
1336
+ parser_func(&v)?;
1337
+ } else if let Ok(v) = RArray::try_convert(rb_object) {
1338
+ for v in v {
1339
+ parser_func(&String::try_convert(v)?)?;
1340
+ }
1341
+ } else {
1342
+ return Err(RbValueError::new_err(format!(
1343
+ "unknown type for {parameter_name}: {rb_object}"
1344
+ )));
1345
+ }
1346
+
1347
+ Ok(())
1348
+ }
1184
1349
  }
1185
1350
  }
1186
1351
 
@@ -1198,7 +1363,7 @@ pub fn parse_fill_null_strategy(
1198
1363
  "one" => FillNullStrategy::One,
1199
1364
  e => {
1200
1365
  return Err(magnus::Error::new(
1201
- exception::runtime_error(),
1366
+ Ruby::get().unwrap().exception_runtime_error(),
1202
1367
  format!(
1203
1368
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1204
1369
  ),
@@ -1249,15 +1414,6 @@ pub fn parse_parquet_compression(
1249
1414
  Ok(parsed)
1250
1415
  }
1251
1416
 
1252
- impl TryConvert for Wrap<NonZeroUsize> {
1253
- fn try_convert(ob: Value) -> RbResult<Self> {
1254
- let v = usize::try_convert(ob)?;
1255
- NonZeroUsize::new(v)
1256
- .map(Wrap)
1257
- .ok_or(RbValueError::new_err("must be non-zero"))
1258
- }
1259
- }
1260
-
1261
1417
  pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1262
1418
  where
1263
1419
  I: IntoIterator<Item = S>,
@@ -1411,3 +1567,40 @@ impl TryConvert for Wrap<DeletionFilesList> {
1411
1567
  }))
1412
1568
  }
1413
1569
  }
1570
+
1571
+ impl TryConvert for Wrap<DefaultFieldValues> {
1572
+ fn try_convert(ob: Value) -> RbResult<Self> {
1573
+ let (default_values_type, ob) = <(String, Value)>::try_convert(ob)?;
1574
+
1575
+ Ok(Wrap(match &*default_values_type {
1576
+ "iceberg" => {
1577
+ let dict = RHash::try_convert(ob)?;
1578
+
1579
+ let mut out = PlIndexMap::new();
1580
+
1581
+ dict.foreach(|k: u32, v: Value| {
1582
+ let v: Result<Column, String> = if let Ok(s) = get_series(v) {
1583
+ Ok(s.into_column())
1584
+ } else {
1585
+ let err_msg = String::try_convert(v)?;
1586
+ Err(err_msg)
1587
+ };
1588
+
1589
+ out.insert(k, v);
1590
+
1591
+ Ok(ForEach::Continue)
1592
+ })?;
1593
+
1594
+ DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1595
+ out,
1596
+ )))
1597
+ }
1598
+
1599
+ v => {
1600
+ return Err(RbValueError::new_err(format!(
1601
+ "unknown deletion file type: {v}"
1602
+ )));
1603
+ }
1604
+ }))
1605
+ }
1606
+ }
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, RHash, Symbol, Value, prelude::*, r_hash::ForEach};
1
+ use magnus::{RArray, RHash, Ruby, Symbol, Value, prelude::*, r_hash::ForEach};
2
2
  use polars::frame::row::{Row, rows_to_schema_supertypes, rows_to_supertypes};
3
3
  use polars::prelude::*;
4
4
 
@@ -54,7 +54,7 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- let mut schema = if let Some(mut schema) = schema {
57
+ let schema = if let Some(mut schema) = schema {
58
58
  resolve_schema_overrides(&mut schema, schema_overrides);
59
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
60
60
  schema
@@ -62,11 +62,6 @@ fn finish_from_rows(
62
62
  rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
63
63
  };
64
64
 
65
- // TODO: Remove this step when Decimals are supported properly.
66
- // Erasing the decimal precision/scale here will just require us to infer it again later.
67
- // https://github.com/pola-rs/polars/issues/14427
68
- erase_decimal_precision_scale(&mut schema);
69
-
70
65
  let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
71
66
  Ok(df.into())
72
67
  }
@@ -106,14 +101,6 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
106
101
  }
107
102
  }
108
103
 
109
- fn erase_decimal_precision_scale(schema: &mut Schema) {
110
- for dtype in schema.iter_values_mut() {
111
- if let DataType::Decimal(_, _) = dtype {
112
- *dtype = DataType::Decimal(None, None)
113
- }
114
- }
115
- }
116
-
117
104
  fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
118
105
  where
119
106
  I: IntoIterator<Item = &'a str>,
@@ -125,6 +112,7 @@ where
125
112
  }
126
113
 
127
114
  fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
115
+ let ruby = Ruby::get_with(*data);
128
116
  let (data, len) = get_rbseq(*data)?;
129
117
  let mut rows = Vec::with_capacity(len);
130
118
  for d in data.into_iter() {
@@ -132,8 +120,8 @@ fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResu
132
120
 
133
121
  let mut row = Vec::with_capacity(names.len());
134
122
  for k in names.iter() {
135
- // TODO improve performance
136
- let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
123
+ // TODO improve performance (must work with GC)
124
+ let val = match d.get(k.clone()).or_else(|| d.get(ruby.to_symbol(k))) {
137
125
  None => AnyValue::Null,
138
126
  Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
139
127
  };