polars-df 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +55 -48
  4. data/Cargo.toml +3 -0
  5. data/README.md +12 -0
  6. data/ext/polars/Cargo.toml +22 -11
  7. data/ext/polars/src/batched_csv.rs +4 -4
  8. data/ext/polars/src/catalog/unity.rs +96 -94
  9. data/ext/polars/src/conversion/any_value.rs +26 -30
  10. data/ext/polars/src/conversion/chunked_array.rs +32 -28
  11. data/ext/polars/src/conversion/datetime.rs +11 -0
  12. data/ext/polars/src/conversion/mod.rs +307 -34
  13. data/ext/polars/src/dataframe/construction.rs +4 -3
  14. data/ext/polars/src/dataframe/export.rs +17 -15
  15. data/ext/polars/src/dataframe/general.rs +15 -12
  16. data/ext/polars/src/dataframe/io.rs +1 -2
  17. data/ext/polars/src/dataframe/mod.rs +25 -1
  18. data/ext/polars/src/dataframe/serde.rs +23 -8
  19. data/ext/polars/src/exceptions.rs +8 -4
  20. data/ext/polars/src/expr/array.rs +73 -4
  21. data/ext/polars/src/expr/binary.rs +26 -1
  22. data/ext/polars/src/expr/bitwise.rs +39 -0
  23. data/ext/polars/src/expr/categorical.rs +20 -0
  24. data/ext/polars/src/expr/datatype.rs +24 -1
  25. data/ext/polars/src/expr/datetime.rs +58 -14
  26. data/ext/polars/src/expr/general.rs +87 -15
  27. data/ext/polars/src/expr/list.rs +32 -24
  28. data/ext/polars/src/expr/meta.rs +15 -6
  29. data/ext/polars/src/expr/mod.rs +3 -0
  30. data/ext/polars/src/expr/name.rs +19 -14
  31. data/ext/polars/src/expr/rolling.rs +20 -0
  32. data/ext/polars/src/expr/serde.rs +28 -0
  33. data/ext/polars/src/expr/string.rs +64 -10
  34. data/ext/polars/src/expr/struct.rs +9 -1
  35. data/ext/polars/src/file.rs +15 -9
  36. data/ext/polars/src/functions/business.rs +0 -1
  37. data/ext/polars/src/functions/io.rs +25 -3
  38. data/ext/polars/src/functions/lazy.rs +11 -6
  39. data/ext/polars/src/functions/meta.rs +3 -3
  40. data/ext/polars/src/functions/string_cache.rs +3 -3
  41. data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
  42. data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
  43. data/ext/polars/src/io/mod.rs +6 -0
  44. data/ext/polars/src/lazyframe/general.rs +59 -9
  45. data/ext/polars/src/lazyframe/mod.rs +16 -1
  46. data/ext/polars/src/lazyframe/optflags.rs +58 -0
  47. data/ext/polars/src/lazyframe/serde.rs +27 -3
  48. data/ext/polars/src/lib.rs +261 -19
  49. data/ext/polars/src/map/dataframe.rs +20 -17
  50. data/ext/polars/src/map/lazy.rs +6 -5
  51. data/ext/polars/src/map/series.rs +8 -7
  52. data/ext/polars/src/on_startup.rs +12 -5
  53. data/ext/polars/src/rb_modules.rs +2 -2
  54. data/ext/polars/src/series/aggregation.rs +85 -28
  55. data/ext/polars/src/series/construction.rs +1 -0
  56. data/ext/polars/src/series/export.rs +37 -33
  57. data/ext/polars/src/series/general.rs +120 -21
  58. data/ext/polars/src/series/mod.rs +29 -4
  59. data/lib/polars/array_expr.rb +382 -3
  60. data/lib/polars/array_name_space.rb +281 -0
  61. data/lib/polars/binary_expr.rb +67 -0
  62. data/lib/polars/binary_name_space.rb +43 -0
  63. data/lib/polars/cat_expr.rb +224 -0
  64. data/lib/polars/cat_name_space.rb +138 -0
  65. data/lib/polars/config.rb +2 -2
  66. data/lib/polars/convert.rb +6 -6
  67. data/lib/polars/data_frame.rb +794 -27
  68. data/lib/polars/data_type_expr.rb +52 -0
  69. data/lib/polars/data_types.rb +26 -5
  70. data/lib/polars/date_time_expr.rb +252 -1
  71. data/lib/polars/date_time_name_space.rb +299 -0
  72. data/lib/polars/expr.rb +1248 -206
  73. data/lib/polars/functions/business.rb +95 -0
  74. data/lib/polars/functions/datatype.rb +21 -0
  75. data/lib/polars/functions/lazy.rb +14 -1
  76. data/lib/polars/io/csv.rb +1 -1
  77. data/lib/polars/io/iceberg.rb +27 -0
  78. data/lib/polars/io/json.rb +4 -4
  79. data/lib/polars/io/ndjson.rb +4 -4
  80. data/lib/polars/io/parquet.rb +32 -7
  81. data/lib/polars/io/scan_options.rb +4 -1
  82. data/lib/polars/lazy_frame.rb +1028 -28
  83. data/lib/polars/list_expr.rb +217 -17
  84. data/lib/polars/list_name_space.rb +231 -22
  85. data/lib/polars/meta_expr.rb +89 -0
  86. data/lib/polars/name_expr.rb +36 -0
  87. data/lib/polars/query_opt_flags.rb +50 -0
  88. data/lib/polars/scan_cast_options.rb +20 -1
  89. data/lib/polars/schema.rb +79 -3
  90. data/lib/polars/selector.rb +72 -0
  91. data/lib/polars/selectors.rb +3 -3
  92. data/lib/polars/series.rb +1053 -54
  93. data/lib/polars/string_expr.rb +436 -32
  94. data/lib/polars/string_name_space.rb +736 -50
  95. data/lib/polars/struct_expr.rb +103 -0
  96. data/lib/polars/struct_name_space.rb +19 -1
  97. data/lib/polars/utils/serde.rb +17 -0
  98. data/lib/polars/utils/various.rb +22 -1
  99. data/lib/polars/utils.rb +5 -1
  100. data/lib/polars/version.rb +1 -1
  101. data/lib/polars.rb +6 -0
  102. metadata +11 -1
@@ -1,16 +1,17 @@
1
1
  pub(crate) mod any_value;
2
2
  mod categorical;
3
3
  mod chunked_array;
4
+ mod datetime;
4
5
 
6
+ use std::collections::BTreeMap;
5
7
  use std::fmt::{Debug, Display, Formatter};
6
8
  use std::fs::File;
7
9
  use std::hash::{Hash, Hasher};
8
- use std::num::NonZeroUsize;
9
10
 
10
11
  pub use categorical::RbCategories;
11
12
  use magnus::{
12
- IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, class, exception,
13
- prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
13
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, prelude::*, r_hash::ForEach,
14
+ try_convert::TryConvertOwned, value::Opaque,
14
15
  };
15
16
  use polars::chunked_array::object::PolarsObjectSafe;
16
17
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -18,9 +19,13 @@ use polars::datatypes::AnyValue;
18
19
  use polars::frame::row::Row;
19
20
  use polars::io::avro::AvroCompression;
20
21
  use polars::io::cloud::CloudOptions;
22
+ use polars::prelude::default_values::{
23
+ DefaultFieldValues, IcebergIdentityTransformedPartitionFields,
24
+ };
21
25
  use polars::prelude::deletion::DeletionFilesList;
22
26
  use polars::prelude::*;
23
27
  use polars::series::ops::NullBehavior;
28
+ use polars_core::schema::iceberg::IcebergSchema;
24
29
  use polars_core::utils::arrow::array::Array;
25
30
  use polars_core::utils::materialize_dyn_int;
26
31
  use polars_plan::dsl::ScanSources;
@@ -29,7 +34,8 @@ use polars_utils::total_ord::{TotalEq, TotalHash};
29
34
 
30
35
  use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
31
36
  use crate::object::OBJECT_NAME;
32
- use crate::rb_modules::series;
37
+ use crate::rb_modules::pl_series;
38
+ use crate::utils::to_rb_err;
33
39
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
34
40
 
35
41
  pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
@@ -84,7 +90,7 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
84
90
  }
85
91
 
86
92
  pub(crate) fn to_series(s: RbSeries) -> Value {
87
- let series = series();
93
+ let series = pl_series();
88
94
  series
89
95
  .funcall::<_, _, Value>("_from_rbseries", (s,))
90
96
  .unwrap()
@@ -119,16 +125,16 @@ impl TryConvert for Wrap<NullValues> {
119
125
  }
120
126
  }
121
127
 
122
- fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
123
- let dict = RHash::new();
128
+ fn struct_dict<'a>(ruby: &Ruby, vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
129
+ let dict = ruby.hash_new();
124
130
  for (fld, val) in flds.iter().zip(vals) {
125
131
  dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
126
132
  }
127
- dict.into_value()
133
+ dict.as_value()
128
134
  }
129
135
 
130
136
  impl IntoValue for Wrap<DataType> {
131
- fn into_value_with(self, _: &Ruby) -> Value {
137
+ fn into_value_with(self, ruby: &Ruby) -> Value {
132
138
  let pl = crate::rb_modules::polars();
133
139
 
134
140
  match self.0 {
@@ -234,8 +240,10 @@ impl IntoValue for Wrap<DataType> {
234
240
  let categories: Value = categories_class
235
241
  .funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
236
242
  .unwrap();
237
- let kwargs = RHash::new();
238
- kwargs.aset(Symbol::new("categories"), categories).unwrap();
243
+ let kwargs = ruby.hash_new();
244
+ kwargs
245
+ .aset(ruby.to_symbol("categories"), categories)
246
+ .unwrap();
239
247
  categorical_class.funcall("new", (kwargs,)).unwrap()
240
248
  }
241
249
  DataType::Enum(_, mapping) => {
@@ -262,7 +270,7 @@ impl IntoValue for Wrap<DataType> {
262
270
  .funcall::<_, _, Value>("new", (name, dtype))
263
271
  .unwrap()
264
272
  });
265
- let fields = RArray::from_iter(iter);
273
+ let fields = ruby.ary_from_iter(iter);
266
274
  let struct_class = pl.const_get::<_, Value>("Struct").unwrap();
267
275
  struct_class
268
276
  .funcall::<_, _, Value>("new", (fields,))
@@ -273,7 +281,7 @@ impl IntoValue for Wrap<DataType> {
273
281
  class.funcall("new", ()).unwrap()
274
282
  }
275
283
  DataType::Unknown(UnknownKind::Int(v)) => {
276
- Wrap(materialize_dyn_int(v).dtype()).into_value()
284
+ Wrap(materialize_dyn_int(v).dtype()).into_value_with(ruby)
277
285
  }
278
286
  DataType::Unknown(_) => {
279
287
  let class = pl.const_get::<_, Value>("Unknown").unwrap();
@@ -291,19 +299,19 @@ enum CategoricalOrdering {
291
299
  }
292
300
 
293
301
  impl IntoValue for Wrap<CategoricalOrdering> {
294
- fn into_value_with(self, _: &Ruby) -> Value {
295
- "lexical".into_value()
302
+ fn into_value_with(self, ruby: &Ruby) -> Value {
303
+ "lexical".into_value_with(ruby)
296
304
  }
297
305
  }
298
306
 
299
307
  impl IntoValue for Wrap<TimeUnit> {
300
- fn into_value_with(self, _: &Ruby) -> Value {
308
+ fn into_value_with(self, ruby: &Ruby) -> Value {
301
309
  let tu = match self.0 {
302
310
  TimeUnit::Nanoseconds => "ns",
303
311
  TimeUnit::Microseconds => "us",
304
312
  TimeUnit::Milliseconds => "ms",
305
313
  };
306
- tu.into_value()
314
+ tu.into_value_with(ruby)
307
315
  }
308
316
  }
309
317
 
@@ -317,7 +325,8 @@ impl TryConvert for Wrap<Field> {
317
325
 
318
326
  impl TryConvert for Wrap<DataType> {
319
327
  fn try_convert(ob: Value) -> RbResult<Self> {
320
- let dtype = if ob.is_kind_of(class::class()) {
328
+ let ruby = Ruby::get_with(ob);
329
+ let dtype = if ob.is_kind_of(ruby.class_class()) {
321
330
  let name = ob.funcall::<_, _, String>("name", ())?;
322
331
  match name.as_str() {
323
332
  "Polars::Int8" => DataType::Int8,
@@ -524,6 +533,60 @@ impl TryConvert for Wrap<Schema> {
524
533
  }
525
534
  }
526
535
 
536
+ impl TryConvert for Wrap<ArrowSchema> {
537
+ fn try_convert(ob: Value) -> RbResult<Self> {
538
+ let ruby = Ruby::get_with(ob);
539
+ // TODO improve
540
+ let ob = RHash::try_convert(ob)?;
541
+ let fields: RArray = ob.aref(ruby.to_symbol("fields"))?;
542
+ let mut arrow_schema = ArrowSchema::with_capacity(fields.len());
543
+ for f in fields {
544
+ let f = RHash::try_convert(f)?;
545
+ let name: String = f.aref(ruby.to_symbol("name"))?;
546
+ let rb_dtype: String = f.aref(ruby.to_symbol("type"))?;
547
+ let dtype = match rb_dtype.as_str() {
548
+ "null" => ArrowDataType::Null,
549
+ "boolean" => ArrowDataType::Boolean,
550
+ "int8" => ArrowDataType::Int8,
551
+ "int16" => ArrowDataType::Int16,
552
+ "int32" => ArrowDataType::Int32,
553
+ "int64" => ArrowDataType::Int64,
554
+ "uint8" => ArrowDataType::UInt8,
555
+ "uint16" => ArrowDataType::UInt16,
556
+ "uint32" => ArrowDataType::UInt32,
557
+ "uint64" => ArrowDataType::UInt64,
558
+ "float16" => ArrowDataType::Float16,
559
+ "float32" => ArrowDataType::Float32,
560
+ "float64" => ArrowDataType::Float64,
561
+ "date32" => ArrowDataType::Date32,
562
+ "date64" => ArrowDataType::Date64,
563
+ "binary" => ArrowDataType::Binary,
564
+ "large_binary" => ArrowDataType::LargeBinary,
565
+ "string" => ArrowDataType::Utf8,
566
+ "large_string" => ArrowDataType::LargeUtf8,
567
+ "binary_view" => ArrowDataType::BinaryView,
568
+ "string_view" => ArrowDataType::Utf8View,
569
+ "unknown" => ArrowDataType::Unknown,
570
+ _ => todo!(),
571
+ };
572
+ let is_nullable = f.aref(ruby.to_symbol("nullable"))?;
573
+ let rb_metadata: RHash = f.aref(ruby.to_symbol("metadata"))?;
574
+ let mut metadata = BTreeMap::new();
575
+ rb_metadata.foreach(|k: String, v: String| {
576
+ metadata.insert(k.into(), v.into());
577
+ Ok(ForEach::Continue)
578
+ })?;
579
+ arrow_schema
580
+ .try_insert(
581
+ name.clone().into(),
582
+ ArrowField::new(name.into(), dtype, is_nullable).with_metadata(metadata),
583
+ )
584
+ .map_err(to_rb_err)?;
585
+ }
586
+ Ok(Wrap(arrow_schema))
587
+ }
588
+ }
589
+
527
590
  impl TryConvert for Wrap<ScanSources> {
528
591
  fn try_convert(ob: Value) -> RbResult<Self> {
529
592
  let list = RArray::try_convert(ob)?;
@@ -665,7 +728,7 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
665
728
 
666
729
  impl ObjectValue {
667
730
  pub fn to_value(&self) -> Value {
668
- self.clone().into_value()
731
+ self.clone().into_value_with(&Ruby::get().unwrap())
669
732
  }
670
733
  }
671
734
 
@@ -979,6 +1042,22 @@ impl TryConvert for Wrap<RankMethod> {
979
1042
  }
980
1043
  }
981
1044
 
1045
+ impl TryConvert for Wrap<Roll> {
1046
+ fn try_convert(ob: Value) -> RbResult<Self> {
1047
+ let parsed = match String::try_convert(ob)?.as_str() {
1048
+ "raise" => Roll::Raise,
1049
+ "forward" => Roll::Forward,
1050
+ "backward" => Roll::Backward,
1051
+ v => {
1052
+ return Err(RbValueError::new_err(format!(
1053
+ "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
1054
+ )));
1055
+ }
1056
+ };
1057
+ Ok(Wrap(parsed))
1058
+ }
1059
+ }
1060
+
982
1061
  impl TryConvert for Wrap<TimeUnit> {
983
1062
  fn try_convert(ob: Value) -> RbResult<Self> {
984
1063
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1156,7 +1235,109 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
1156
1235
  let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
1157
1236
  return Ok(out);
1158
1237
  }
1159
- todo!();
1238
+
1239
+ let integer_upcast = match &*ob.funcall::<_, _, String>("integer_cast", ())? {
1240
+ "upcast" => true,
1241
+ "forbid" => false,
1242
+ v => {
1243
+ return Err(RbValueError::new_err(format!(
1244
+ "unknown option for integer_cast: {v}"
1245
+ )));
1246
+ }
1247
+ };
1248
+
1249
+ let mut float_upcast = false;
1250
+ let mut float_downcast = false;
1251
+
1252
+ let float_cast_object: Value = ob.funcall("float_cast", ())?;
1253
+
1254
+ parse_multiple_options("float_cast", float_cast_object, |v| {
1255
+ match v {
1256
+ "forbid" => {}
1257
+ "upcast" => float_upcast = true,
1258
+ "downcast" => float_downcast = true,
1259
+ v => {
1260
+ return Err(RbValueError::new_err(format!(
1261
+ "unknown option for float_cast: {v}"
1262
+ )));
1263
+ }
1264
+ }
1265
+
1266
+ Ok(())
1267
+ })?;
1268
+
1269
+ let mut datetime_nanoseconds_downcast = false;
1270
+ let mut datetime_convert_timezone = false;
1271
+
1272
+ let datetime_cast_object: Value = ob.funcall("datetime_cast", ())?;
1273
+
1274
+ parse_multiple_options("datetime_cast", datetime_cast_object, |v| {
1275
+ match v {
1276
+ "forbid" => {}
1277
+ "nanosecond-downcast" => datetime_nanoseconds_downcast = true,
1278
+ "convert-timezone" => datetime_convert_timezone = true,
1279
+ v => {
1280
+ return Err(RbValueError::new_err(format!(
1281
+ "unknown option for datetime_cast: {v}"
1282
+ )));
1283
+ }
1284
+ };
1285
+
1286
+ Ok(())
1287
+ })?;
1288
+
1289
+ let missing_struct_fields =
1290
+ match &*ob.funcall::<_, _, String>("missing_struct_fields", ())? {
1291
+ "insert" => MissingColumnsPolicy::Insert,
1292
+ "raise" => MissingColumnsPolicy::Raise,
1293
+ v => {
1294
+ return Err(RbValueError::new_err(format!(
1295
+ "unknown option for missing_struct_fields: {v}"
1296
+ )));
1297
+ }
1298
+ };
1299
+
1300
+ let extra_struct_fields = match &*ob.funcall::<_, _, String>("extra_struct_fields", ())? {
1301
+ "ignore" => ExtraColumnsPolicy::Ignore,
1302
+ "raise" => ExtraColumnsPolicy::Raise,
1303
+ v => {
1304
+ return Err(RbValueError::new_err(format!(
1305
+ "unknown option for extra_struct_fields: {v}"
1306
+ )));
1307
+ }
1308
+ };
1309
+
1310
+ return Ok(Wrap(CastColumnsPolicy {
1311
+ integer_upcast,
1312
+ float_upcast,
1313
+ float_downcast,
1314
+ datetime_nanoseconds_downcast,
1315
+ datetime_microseconds_downcast: false,
1316
+ datetime_convert_timezone,
1317
+ null_upcast: true,
1318
+ missing_struct_fields,
1319
+ extra_struct_fields,
1320
+ }));
1321
+
1322
+ fn parse_multiple_options(
1323
+ parameter_name: &'static str,
1324
+ rb_object: Value,
1325
+ mut parser_func: impl FnMut(&str) -> RbResult<()>,
1326
+ ) -> RbResult<()> {
1327
+ if let Ok(v) = String::try_convert(rb_object) {
1328
+ parser_func(&v)?;
1329
+ } else if let Ok(v) = RArray::try_convert(rb_object) {
1330
+ for v in v {
1331
+ parser_func(&String::try_convert(v)?)?;
1332
+ }
1333
+ } else {
1334
+ return Err(RbValueError::new_err(format!(
1335
+ "unknown type for {parameter_name}: {rb_object}"
1336
+ )));
1337
+ }
1338
+
1339
+ Ok(())
1340
+ }
1160
1341
  }
1161
1342
  }
1162
1343
 
@@ -1174,7 +1355,7 @@ pub fn parse_fill_null_strategy(
1174
1355
  "one" => FillNullStrategy::One,
1175
1356
  e => {
1176
1357
  return Err(magnus::Error::new(
1177
- exception::runtime_error(),
1358
+ Ruby::get().unwrap().exception_runtime_error(),
1178
1359
  format!(
1179
1360
  "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1180
1361
  ),
@@ -1225,15 +1406,6 @@ pub fn parse_parquet_compression(
1225
1406
  Ok(parsed)
1226
1407
  }
1227
1408
 
1228
- impl TryConvert for Wrap<NonZeroUsize> {
1229
- fn try_convert(ob: Value) -> RbResult<Self> {
1230
- let v = usize::try_convert(ob)?;
1231
- NonZeroUsize::new(v)
1232
- .map(Wrap)
1233
- .ok_or(RbValueError::new_err("must be non-zero"))
1234
- }
1235
- }
1236
-
1237
1409
  pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1238
1410
  where
1239
1411
  I: IntoIterator<Item = S>,
@@ -1270,6 +1442,23 @@ impl TryConvert for RbCompatLevel {
1270
1442
  }
1271
1443
  }
1272
1444
 
1445
+ impl TryConvert for Wrap<UnicodeForm> {
1446
+ fn try_convert(ob: Value) -> RbResult<Self> {
1447
+ let parsed = match String::try_convert(ob)?.as_str() {
1448
+ "NFC" => UnicodeForm::NFC,
1449
+ "NFKC" => UnicodeForm::NFKC,
1450
+ "NFD" => UnicodeForm::NFD,
1451
+ "NFKD" => UnicodeForm::NFKD,
1452
+ v => {
1453
+ return Err(RbValueError::new_err(format!(
1454
+ "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1455
+ )));
1456
+ }
1457
+ };
1458
+ Ok(Wrap(parsed))
1459
+ }
1460
+ }
1461
+
1273
1462
  impl TryConvert for Wrap<Option<TimeZone>> {
1274
1463
  fn try_convert(ob: Value) -> RbResult<Self> {
1275
1464
  let tz = Option::<Wrap<PlSmallStr>>::try_convert(ob)?;
@@ -1313,13 +1502,97 @@ impl TryConvert for Wrap<MissingColumnsPolicy> {
1313
1502
  }
1314
1503
 
1315
1504
  impl TryConvert for Wrap<ColumnMapping> {
1316
- fn try_convert(_ob: Value) -> RbResult<Self> {
1317
- todo!()
1505
+ fn try_convert(ob: Value) -> RbResult<Self> {
1506
+ let (column_mapping_type, ob) = <(String, Value)>::try_convert(ob)?;
1507
+
1508
+ Ok(Wrap(match column_mapping_type.as_str() {
1509
+ "iceberg-column-mapping" => {
1510
+ let arrow_schema = Wrap::<ArrowSchema>::try_convert(ob)?;
1511
+ ColumnMapping::Iceberg(Arc::new(
1512
+ IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_rb_err)?,
1513
+ ))
1514
+ }
1515
+
1516
+ v => {
1517
+ return Err(RbValueError::new_err(format!(
1518
+ "unknown column mapping type: {v}"
1519
+ )));
1520
+ }
1521
+ }))
1318
1522
  }
1319
1523
  }
1320
1524
 
1321
1525
  impl TryConvert for Wrap<DeletionFilesList> {
1322
- fn try_convert(_ob: Value) -> RbResult<Self> {
1323
- todo!();
1526
+ fn try_convert(ob: Value) -> RbResult<Self> {
1527
+ let (deletion_file_type, ob) = <(String, Value)>::try_convert(ob)?;
1528
+
1529
+ Ok(Wrap(match deletion_file_type.as_str() {
1530
+ "iceberg-position-delete" => {
1531
+ let dict = RHash::try_convert(ob)?;
1532
+
1533
+ let mut out = PlIndexMap::new();
1534
+
1535
+ dict.foreach(|k: usize, v: RArray| {
1536
+ let files = v
1537
+ .into_iter()
1538
+ .map(|x| {
1539
+ let x = String::try_convert(x)?;
1540
+ Ok(x)
1541
+ })
1542
+ .collect::<RbResult<Arc<[String]>>>()?;
1543
+
1544
+ if !files.is_empty() {
1545
+ out.insert(k, files);
1546
+ }
1547
+
1548
+ Ok(ForEach::Continue)
1549
+ })?;
1550
+
1551
+ DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1552
+ }
1553
+
1554
+ v => {
1555
+ return Err(RbValueError::new_err(format!(
1556
+ "unknown deletion file type: {v}"
1557
+ )));
1558
+ }
1559
+ }))
1560
+ }
1561
+ }
1562
+
1563
+ impl TryConvert for Wrap<DefaultFieldValues> {
1564
+ fn try_convert(ob: Value) -> RbResult<Self> {
1565
+ let (default_values_type, ob) = <(String, Value)>::try_convert(ob)?;
1566
+
1567
+ Ok(Wrap(match &*default_values_type {
1568
+ "iceberg" => {
1569
+ let dict = RHash::try_convert(ob)?;
1570
+
1571
+ let mut out = PlIndexMap::new();
1572
+
1573
+ dict.foreach(|k: u32, v: Value| {
1574
+ let v: Result<Column, String> = if let Ok(s) = get_series(v) {
1575
+ Ok(s.into_column())
1576
+ } else {
1577
+ let err_msg = String::try_convert(v)?;
1578
+ Err(err_msg)
1579
+ };
1580
+
1581
+ out.insert(k, v);
1582
+
1583
+ Ok(ForEach::Continue)
1584
+ })?;
1585
+
1586
+ DefaultFieldValues::Iceberg(Arc::new(IcebergIdentityTransformedPartitionFields(
1587
+ out,
1588
+ )))
1589
+ }
1590
+
1591
+ v => {
1592
+ return Err(RbValueError::new_err(format!(
1593
+ "unknown deletion file type: {v}"
1594
+ )));
1595
+ }
1596
+ }))
1324
1597
  }
1325
1598
  }
@@ -1,4 +1,4 @@
1
- use magnus::{RArray, RHash, Symbol, Value, prelude::*, r_hash::ForEach};
1
+ use magnus::{RArray, RHash, Ruby, Symbol, Value, prelude::*, r_hash::ForEach};
2
2
  use polars::frame::row::{Row, rows_to_schema_supertypes, rows_to_supertypes};
3
3
  use polars::prelude::*;
4
4
 
@@ -125,6 +125,7 @@ where
125
125
  }
126
126
 
127
127
  fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
128
+ let ruby = Ruby::get_with(*data);
128
129
  let (data, len) = get_rbseq(*data)?;
129
130
  let mut rows = Vec::with_capacity(len);
130
131
  for d in data.into_iter() {
@@ -132,8 +133,8 @@ fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResu
132
133
 
133
134
  let mut row = Vec::with_capacity(names.len());
134
135
  for k in names.iter() {
135
- // TODO improve performance
136
- let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
136
+ // TODO improve performance (must work with GC)
137
+ let val = match d.get(k.clone()).or_else(|| d.get(ruby.to_symbol(k))) {
137
138
  None => AnyValue::Null,
138
139
  Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
139
140
  };
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, RArray, Value, prelude::*};
1
+ use magnus::{IntoValue, Ruby, Value, prelude::*};
2
2
 
3
3
  use super::*;
4
4
  use crate::RbResult;
@@ -6,14 +6,15 @@ use crate::conversion::{ObjectValue, Wrap};
6
6
  use crate::interop::arrow::to_ruby::dataframe_to_stream;
7
7
 
8
8
  impl RbDataFrame {
9
- pub fn row_tuple(&self, idx: i64) -> Value {
9
+ pub fn row_tuple(ruby: &Ruby, rb_self: &Self, idx: i64) -> Value {
10
10
  let idx = if idx < 0 {
11
- (self.df.borrow().height() as i64 + idx) as usize
11
+ (rb_self.df.borrow().height() as i64 + idx) as usize
12
12
  } else {
13
13
  idx as usize
14
14
  };
15
- RArray::from_iter(
16
- self.df
15
+ ruby.ary_from_iter(
16
+ rb_self
17
+ .df
17
18
  .borrow()
18
19
  .get_columns()
19
20
  .iter()
@@ -22,17 +23,18 @@ impl RbDataFrame {
22
23
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
23
24
  obj.unwrap().to_value()
24
25
  }
25
- _ => Wrap(s.get(idx).unwrap()).into_value(),
26
+ _ => Wrap(s.get(idx).unwrap()).into_value_with(ruby),
26
27
  }),
27
28
  )
28
29
  .as_value()
29
30
  }
30
31
 
31
- pub fn row_tuples(&self) -> Value {
32
- let df = &self.df;
33
- RArray::from_iter((0..df.borrow().height()).map(|idx| {
34
- RArray::from_iter(
35
- self.df
32
+ pub fn row_tuples(ruby: &Ruby, rb_self: &Self) -> Value {
33
+ let df = &rb_self.df;
34
+ ruby.ary_from_iter((0..df.borrow().height()).map(|idx| {
35
+ ruby.ary_from_iter(
36
+ rb_self
37
+ .df
36
38
  .borrow()
37
39
  .get_columns()
38
40
  .iter()
@@ -41,15 +43,15 @@ impl RbDataFrame {
41
43
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
42
44
  obj.unwrap().to_value()
43
45
  }
44
- _ => Wrap(s.get(idx).unwrap()).into_value(),
46
+ _ => Wrap(s.get(idx).unwrap()).into_value_with(ruby),
45
47
  }),
46
48
  )
47
49
  }))
48
50
  .as_value()
49
51
  }
50
52
 
51
- pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
- self.df.borrow_mut().align_chunks();
53
- dataframe_to_stream(&self.df.borrow())
53
+ pub fn __arrow_c_stream__(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
54
+ rb_self.df.borrow_mut().align_chunks();
55
+ dataframe_to_stream(&rb_self.df.borrow(), ruby)
54
56
  }
55
57
  }
@@ -1,7 +1,7 @@
1
1
  use std::hash::BuildHasher;
2
2
 
3
3
  use either::Either;
4
- use magnus::{IntoValue, RArray, Value, prelude::*, typed_data::Obj};
4
+ use magnus::{IntoValue, RArray, Ruby, Value, prelude::*};
5
5
  use polars::prelude::pivot::{pivot, pivot_stable};
6
6
  use polars::prelude::*;
7
7
 
@@ -149,12 +149,13 @@ impl RbDataFrame {
149
149
  Ok(())
150
150
  }
151
151
 
152
- pub fn dtypes(&self) -> RArray {
153
- RArray::from_iter(
154
- self.df
152
+ pub fn dtypes(ruby: &Ruby, rb_self: &Self) -> RArray {
153
+ ruby.ary_from_iter(
154
+ rb_self
155
+ .df
155
156
  .borrow()
156
157
  .iter()
157
- .map(|s| Wrap(s.dtype().clone()).into_value()),
158
+ .map(|s| Wrap(s.dtype().clone()).into_value_with(ruby)),
158
159
  )
159
160
  }
160
161
 
@@ -393,18 +394,19 @@ impl RbDataFrame {
393
394
  }
394
395
 
395
396
  pub fn partition_by(
396
- &self,
397
+ ruby: &Ruby,
398
+ rb_self: &Self,
397
399
  by: Vec<String>,
398
400
  maintain_order: bool,
399
401
  include_key: bool,
400
402
  ) -> RbResult<RArray> {
401
403
  let out = if maintain_order {
402
- self.df.borrow().partition_by_stable(by, include_key)
404
+ rb_self.df.borrow().partition_by_stable(by, include_key)
403
405
  } else {
404
- self.df.borrow().partition_by(by, include_key)
406
+ rb_self.df.borrow().partition_by(by, include_key)
405
407
  }
406
408
  .map_err(RbPolarsErr::from)?;
407
- Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
409
+ Ok(ruby.ary_from_iter(out.into_iter().map(RbDataFrame::new)))
408
410
  }
409
411
 
410
412
  pub fn lazy(&self) -> RbLazyFrame {
@@ -440,12 +442,13 @@ impl RbDataFrame {
440
442
  }
441
443
 
442
444
  pub fn map_rows(
443
- &self,
445
+ ruby: &Ruby,
446
+ rb_self: &Self,
444
447
  lambda: Value,
445
448
  output_type: Option<Wrap<DataType>>,
446
449
  inference_size: usize,
447
450
  ) -> RbResult<(Value, bool)> {
448
- let df = &self.df.borrow();
451
+ let df = &rb_self.df.borrow();
449
452
 
450
453
  let output_type = output_type.map(|dt| dt.0);
451
454
  let out = match output_type {
@@ -490,7 +493,7 @@ impl RbDataFrame {
490
493
  _ => return apply_lambda_unknown(df, lambda, inference_size),
491
494
  };
492
495
 
493
- Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
496
+ Ok((ruby.obj_wrap(RbSeries::from(out)).as_value(), false))
494
497
  }
495
498
 
496
499
  pub fn shrink_to_fit(&self) {
@@ -249,14 +249,13 @@ impl RbDataFrame {
249
249
  include_header: bool,
250
250
  separator: u8,
251
251
  quote_char: u8,
252
- batch_size: Wrap<NonZeroUsize>,
252
+ batch_size: NonZeroUsize,
253
253
  datetime_format: Option<String>,
254
254
  date_format: Option<String>,
255
255
  time_format: Option<String>,
256
256
  float_precision: Option<usize>,
257
257
  null_value: Option<String>,
258
258
  ) -> RbResult<()> {
259
- let batch_size = batch_size.0;
260
259
  let null = null_value.unwrap_or_default();
261
260
  let mut buf = get_file_like(rb_f, true)?;
262
261
  CsvWriter::new(&mut buf)