polars-df 0.20.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +19 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +275 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +2 -1
  18. data/ext/polars/src/expr/array.rs +73 -4
  19. data/ext/polars/src/expr/binary.rs +26 -1
  20. data/ext/polars/src/expr/bitwise.rs +39 -0
  21. data/ext/polars/src/expr/categorical.rs +20 -0
  22. data/ext/polars/src/expr/datatype.rs +37 -0
  23. data/ext/polars/src/expr/datetime.rs +58 -0
  24. data/ext/polars/src/expr/general.rs +106 -22
  25. data/ext/polars/src/expr/list.rs +45 -2
  26. data/ext/polars/src/expr/meta.rs +5 -28
  27. data/ext/polars/src/expr/mod.rs +4 -1
  28. data/ext/polars/src/expr/name.rs +10 -2
  29. data/ext/polars/src/expr/rolling.rs +21 -1
  30. data/ext/polars/src/expr/selector.rs +219 -0
  31. data/ext/polars/src/expr/string.rs +73 -6
  32. data/ext/polars/src/expr/struct.rs +9 -1
  33. data/ext/polars/src/file.rs +11 -5
  34. data/ext/polars/src/functions/io.rs +21 -11
  35. data/ext/polars/src/functions/lazy.rs +26 -54
  36. data/ext/polars/src/functions/meta.rs +2 -2
  37. data/ext/polars/src/functions/misc.rs +1 -1
  38. data/ext/polars/src/functions/string_cache.rs +4 -5
  39. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  40. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  41. data/ext/polars/src/io/mod.rs +102 -0
  42. data/ext/polars/src/lazyframe/general.rs +124 -111
  43. data/ext/polars/src/lazyframe/serde.rs +1 -1
  44. data/ext/polars/src/lazyframe/sink.rs +6 -6
  45. data/ext/polars/src/lib.rs +216 -29
  46. data/ext/polars/src/map/dataframe.rs +9 -9
  47. data/ext/polars/src/map/lazy.rs +1 -1
  48. data/ext/polars/src/map/mod.rs +31 -19
  49. data/ext/polars/src/map/series.rs +9 -9
  50. data/ext/polars/src/on_startup.rs +5 -2
  51. data/ext/polars/src/rb_modules.rs +1 -1
  52. data/ext/polars/src/series/aggregation.rs +44 -0
  53. data/ext/polars/src/series/construction.rs +11 -7
  54. data/ext/polars/src/series/export.rs +6 -4
  55. data/ext/polars/src/series/general.rs +75 -210
  56. data/ext/polars/src/series/import.rs +2 -2
  57. data/ext/polars/src/series/map.rs +227 -0
  58. data/ext/polars/src/series/mod.rs +2 -1
  59. data/ext/polars/src/series/scatter.rs +1 -1
  60. data/ext/polars/src/utils.rs +10 -2
  61. data/lib/polars/array_expr.rb +382 -3
  62. data/lib/polars/array_name_space.rb +281 -0
  63. data/lib/polars/binary_expr.rb +67 -0
  64. data/lib/polars/binary_name_space.rb +43 -0
  65. data/lib/polars/cat_expr.rb +224 -0
  66. data/lib/polars/cat_name_space.rb +130 -32
  67. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  68. data/lib/polars/catalog/unity/column_info.rb +31 -0
  69. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  70. data/lib/polars/catalog/unity/table_info.rb +50 -0
  71. data/lib/polars/catalog.rb +448 -0
  72. data/lib/polars/config.rb +2 -2
  73. data/lib/polars/convert.rb +12 -2
  74. data/lib/polars/data_frame.rb +834 -48
  75. data/lib/polars/data_type_expr.rb +52 -0
  76. data/lib/polars/data_types.rb +61 -5
  77. data/lib/polars/date_time_expr.rb +251 -0
  78. data/lib/polars/date_time_name_space.rb +299 -0
  79. data/lib/polars/exceptions.rb +7 -2
  80. data/lib/polars/expr.rb +1247 -211
  81. data/lib/polars/functions/col.rb +6 -5
  82. data/lib/polars/functions/datatype.rb +21 -0
  83. data/lib/polars/functions/lazy.rb +127 -15
  84. data/lib/polars/functions/repeat.rb +4 -0
  85. data/lib/polars/io/csv.rb +19 -1
  86. data/lib/polars/io/json.rb +16 -0
  87. data/lib/polars/io/ndjson.rb +13 -0
  88. data/lib/polars/io/parquet.rb +70 -66
  89. data/lib/polars/io/scan_options.rb +47 -0
  90. data/lib/polars/lazy_frame.rb +1099 -95
  91. data/lib/polars/list_expr.rb +400 -11
  92. data/lib/polars/list_name_space.rb +321 -5
  93. data/lib/polars/meta_expr.rb +71 -22
  94. data/lib/polars/name_expr.rb +36 -0
  95. data/lib/polars/scan_cast_options.rb +64 -0
  96. data/lib/polars/schema.rb +84 -3
  97. data/lib/polars/selector.rb +210 -0
  98. data/lib/polars/selectors.rb +932 -203
  99. data/lib/polars/series.rb +1083 -63
  100. data/lib/polars/string_expr.rb +435 -9
  101. data/lib/polars/string_name_space.rb +729 -45
  102. data/lib/polars/struct_expr.rb +103 -0
  103. data/lib/polars/struct_name_space.rb +19 -1
  104. data/lib/polars/utils/parse.rb +40 -0
  105. data/lib/polars/utils/various.rb +18 -1
  106. data/lib/polars/utils.rb +9 -1
  107. data/lib/polars/version.rb +1 -1
  108. data/lib/polars.rb +10 -0
  109. metadata +20 -2
@@ -1,15 +1,16 @@
1
1
  pub(crate) mod any_value;
2
+ mod categorical;
2
3
  mod chunked_array;
3
4
 
4
5
  use std::fmt::{Debug, Display, Formatter};
5
6
  use std::fs::File;
6
7
  use std::hash::{Hash, Hasher};
7
8
  use std::num::NonZeroUsize;
8
- use std::path::PathBuf;
9
9
 
10
+ pub use categorical::RbCategories;
10
11
  use magnus::{
11
- class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
- IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, class, exception,
13
+ prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
13
14
  };
14
15
  use polars::chunked_array::object::PolarsObjectSafe;
15
16
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -17,17 +18,20 @@ use polars::datatypes::AnyValue;
17
18
  use polars::frame::row::Row;
18
19
  use polars::io::avro::AvroCompression;
19
20
  use polars::io::cloud::CloudOptions;
21
+ use polars::prelude::deletion::DeletionFilesList;
20
22
  use polars::prelude::*;
21
23
  use polars::series::ops::NullBehavior;
24
+ use polars_core::schema::iceberg::IcebergSchema;
22
25
  use polars_core::utils::arrow::array::Array;
23
26
  use polars_core::utils::materialize_dyn_int;
24
27
  use polars_plan::dsl::ScanSources;
25
28
  use polars_utils::mmap::MemSlice;
26
29
  use polars_utils::total_ord::{TotalEq, TotalHash};
27
30
 
28
- use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
31
+ use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
29
32
  use crate::object::OBJECT_NAME;
30
33
  use crate::rb_modules::series;
34
+ use crate::utils::to_rb_err;
31
35
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
32
36
 
33
37
  pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
@@ -226,18 +230,25 @@ impl IntoValue for Wrap<DataType> {
226
230
  let class = pl.const_get::<_, Value>("Object").unwrap();
227
231
  class.funcall("new", ()).unwrap()
228
232
  }
229
- DataType::Categorical(_, ordering) => {
230
- let class = pl.const_get::<_, Value>("Categorical").unwrap();
231
- class.funcall("new", (Wrap(ordering),)).unwrap()
232
- }
233
- DataType::Enum(rev_map, _) => {
234
- // we should always have an initialized rev_map coming from rust
235
- let categories = rev_map.as_ref().unwrap().get_categories();
233
+ DataType::Categorical(cats, _) => {
234
+ let categories_class = pl.const_get::<_, Value>("Categories").unwrap();
235
+ let categorical_class = pl.const_get::<_, Value>("Categorical").unwrap();
236
+ let categories: Value = categories_class
237
+ .funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
238
+ .unwrap();
239
+ let kwargs = RHash::new();
240
+ kwargs.aset(Symbol::new("categories"), categories).unwrap();
241
+ categorical_class.funcall("new", (kwargs,)).unwrap()
242
+ }
243
+ DataType::Enum(_, mapping) => {
244
+ let categories = unsafe {
245
+ StringChunked::from_chunks(
246
+ PlSmallStr::from_static("category"),
247
+ vec![mapping.to_arrow(true)],
248
+ )
249
+ };
236
250
  let class = pl.const_get::<_, Value>("Enum").unwrap();
237
- let s =
238
- Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
239
- .unwrap();
240
- let series = to_series(s.into());
251
+ let series = to_series(categories.into_series().into());
241
252
  class.funcall::<_, _, Value>("new", (series,)).unwrap()
242
253
  }
243
254
  DataType::Time => {
@@ -277,13 +288,13 @@ impl IntoValue for Wrap<DataType> {
277
288
  }
278
289
  }
279
290
 
291
+ enum CategoricalOrdering {
292
+ Lexical,
293
+ }
294
+
280
295
  impl IntoValue for Wrap<CategoricalOrdering> {
281
296
  fn into_value_with(self, _: &Ruby) -> Value {
282
- let ordering = match self.0 {
283
- CategoricalOrdering::Physical => "physical",
284
- CategoricalOrdering::Lexical => "lexical",
285
- };
286
- ordering.into_value()
297
+ "lexical".into_value()
287
298
  }
288
299
  }
289
300
 
@@ -324,8 +335,10 @@ impl TryConvert for Wrap<DataType> {
324
335
  "Polars::Boolean" => DataType::Boolean,
325
336
  "Polars::String" => DataType::String,
326
337
  "Polars::Binary" => DataType::Binary,
327
- "Polars::Categorical" => DataType::Categorical(None, Default::default()),
328
- "Polars::Enum" => DataType::Enum(None, Default::default()),
338
+ "Polars::Categorical" => DataType::from_categories(Categories::global()),
339
+ "Polars::Enum" => {
340
+ DataType::from_frozen_categories(FrozenCategories::new([]).unwrap())
341
+ }
329
342
  "Polars::Date" => DataType::Date,
330
343
  "Polars::Time" => DataType::Time,
331
344
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
@@ -340,11 +353,12 @@ impl TryConvert for Wrap<DataType> {
340
353
  dt => {
341
354
  return Err(RbValueError::new_err(format!(
342
355
  "{dt} is not a correct polars DataType.",
343
- )))
356
+ )));
344
357
  }
345
358
  }
346
359
  } else if String::try_convert(ob).is_err() {
347
- let name = unsafe { ob.class().name() }.into_owned();
360
+ let cls = ob.class();
361
+ let name = unsafe { cls.name() }.into_owned();
348
362
  match name.as_str() {
349
363
  "Polars::Int8" => DataType::Int8,
350
364
  "Polars::Int16" => DataType::Int16,
@@ -360,17 +374,20 @@ impl TryConvert for Wrap<DataType> {
360
374
  "Polars::String" => DataType::String,
361
375
  "Polars::Binary" => DataType::Binary,
362
376
  "Polars::Categorical" => {
363
- let ordering = ob
364
- .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
365
- .0;
366
- DataType::Categorical(None, ordering)
377
+ let categories: Value = ob.funcall("categories", ()).unwrap();
378
+ let rb_categories: &RbCategories =
379
+ categories.funcall("_categories", ()).unwrap();
380
+ DataType::from_categories(rb_categories.categories().clone())
367
381
  }
368
382
  "Polars::Enum" => {
369
- let categories = ob.funcall("categories", ()).unwrap();
383
+ let categories: Value = ob.funcall("categories", ()).unwrap();
370
384
  let s = get_series(categories)?;
371
385
  let ca = s.str().map_err(RbPolarsErr::from)?;
372
386
  let categories = ca.downcast_iter().next().unwrap().clone();
373
- create_enum_dtype(categories)
387
+ assert!(!categories.has_nulls());
388
+ DataType::from_frozen_categories(
389
+ FrozenCategories::new(categories.values_iter()).unwrap(),
390
+ )
374
391
  }
375
392
  "Polars::Date" => DataType::Date,
376
393
  "Polars::Time" => DataType::Time,
@@ -420,7 +437,7 @@ impl TryConvert for Wrap<DataType> {
420
437
  return Err(RbTypeError::new_err(format!(
421
438
  "A {dt} object is not a correct polars DataType. \
422
439
  Hint: use the class without instantiating it.",
423
- )))
440
+ )));
424
441
  }
425
442
  }
426
443
  } else {
@@ -436,7 +453,7 @@ impl TryConvert for Wrap<DataType> {
436
453
  "str" => DataType::String,
437
454
  "bin" => DataType::Binary,
438
455
  "bool" => DataType::Boolean,
439
- "cat" => DataType::Categorical(None, Default::default()),
456
+ "cat" => DataType::from_categories(Categories::global()),
440
457
  "date" => DataType::Date,
441
458
  "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
442
459
  "f32" => DataType::Float32,
@@ -449,9 +466,8 @@ impl TryConvert for Wrap<DataType> {
449
466
  "unk" => DataType::Unknown(Default::default()),
450
467
  _ => {
451
468
  return Err(RbValueError::new_err(format!(
452
- "{} is not a supported DataType.",
453
- ob
454
- )))
469
+ "{ob} is not a supported DataType."
470
+ )));
455
471
  }
456
472
  }
457
473
  };
@@ -475,7 +491,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
475
491
  _ => {
476
492
  return Err(RbTypeError::new_err(format!(
477
493
  "'{key}' is not a valid statistic option",
478
- )))
494
+ )));
479
495
  }
480
496
  }
481
497
  Ok(ForEach::Continue)
@@ -510,6 +526,12 @@ impl TryConvert for Wrap<Schema> {
510
526
  }
511
527
  }
512
528
 
529
+ impl TryConvert for Wrap<ArrowSchema> {
530
+ fn try_convert(_ob: Value) -> RbResult<Self> {
531
+ todo!();
532
+ }
533
+ }
534
+
513
535
  impl TryConvert for Wrap<ScanSources> {
514
536
  fn try_convert(ob: Value) -> RbResult<Self> {
515
537
  let list = RArray::try_convert(ob)?;
@@ -519,7 +541,7 @@ impl TryConvert for Wrap<ScanSources> {
519
541
  }
520
542
 
521
543
  enum MutableSources {
522
- Paths(Vec<PathBuf>),
544
+ Paths(Vec<PlPath>),
523
545
  Files(Vec<File>),
524
546
  Buffers(Vec<MemSlice>),
525
547
  }
@@ -560,7 +582,7 @@ impl TryConvert for Wrap<ScanSources> {
560
582
  return Err(RbTypeError::new_err(
561
583
  "Cannot combine in-memory bytes, paths and files for scan sources"
562
584
  .to_string(),
563
- ))
585
+ ));
564
586
  }
565
587
  }
566
588
  }
@@ -678,7 +700,7 @@ impl TryConvert for Wrap<AsofStrategy> {
678
700
  v => {
679
701
  return Err(RbValueError::new_err(format!(
680
702
  "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
681
- )))
703
+ )));
682
704
  }
683
705
  };
684
706
  Ok(Wrap(parsed))
@@ -693,7 +715,7 @@ impl TryConvert for Wrap<InterpolationMethod> {
693
715
  v => {
694
716
  return Err(RbValueError::new_err(format!(
695
717
  "method must be one of {{'linear', 'nearest'}}, got {v}",
696
- )))
718
+ )));
697
719
  }
698
720
  };
699
721
  Ok(Wrap(parsed))
@@ -708,9 +730,8 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
708
730
  "deflate" => Some(AvroCompression::Deflate),
709
731
  v => {
710
732
  return Err(RbValueError::new_err(format!(
711
- "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
712
- v
713
- )))
733
+ "compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}"
734
+ )));
714
735
  }
715
736
  };
716
737
  Ok(Wrap(parsed))
@@ -720,13 +741,18 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
720
741
  impl TryConvert for Wrap<CategoricalOrdering> {
721
742
  fn try_convert(ob: Value) -> RbResult<Self> {
722
743
  let parsed = match String::try_convert(ob)?.as_str() {
723
- "physical" => CategoricalOrdering::Physical,
724
744
  "lexical" => CategoricalOrdering::Lexical,
745
+ "physical" => {
746
+ polars_warn!(
747
+ Deprecation,
748
+ "physical ordering is deprecated, will use lexical ordering instead"
749
+ );
750
+ CategoricalOrdering::Lexical
751
+ }
725
752
  v => {
726
753
  return Err(RbValueError::new_err(format!(
727
- "ordering must be one of {{'physical', 'lexical'}}, got {}",
728
- v
729
- )))
754
+ "ordering must be one of {{'physical', 'lexical'}}, got {v}"
755
+ )));
730
756
  }
731
757
  };
732
758
  Ok(Wrap(parsed))
@@ -742,7 +768,7 @@ impl TryConvert for Wrap<StartBy> {
742
768
  v => {
743
769
  return Err(RbValueError::new_err(format!(
744
770
  "closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
745
- )))
771
+ )));
746
772
  }
747
773
  };
748
774
  Ok(Wrap(parsed))
@@ -758,9 +784,8 @@ impl TryConvert for Wrap<ClosedWindow> {
758
784
  "none" => ClosedWindow::None,
759
785
  v => {
760
786
  return Err(RbValueError::new_err(format!(
761
- "closed must be one of {{'left', 'right', 'both', 'none'}}, got {}",
762
- v
763
- )))
787
+ "closed must be one of {{'left', 'right', 'both', 'none'}}, got {v}"
788
+ )));
764
789
  }
765
790
  };
766
791
  Ok(Wrap(parsed))
@@ -789,9 +814,8 @@ impl TryConvert for Wrap<CsvEncoding> {
789
814
  "utf8-lossy" => CsvEncoding::LossyUtf8,
790
815
  v => {
791
816
  return Err(RbValueError::new_err(format!(
792
- "encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}",
793
- v
794
- )))
817
+ "encoding must be one of {{'utf8', 'utf8-lossy'}}, got {v}"
818
+ )));
795
819
  }
796
820
  };
797
821
  Ok(Wrap(parsed))
@@ -806,9 +830,8 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
806
830
  "zstd" => Some(IpcCompression::ZSTD),
807
831
  v => {
808
832
  return Err(RbValueError::new_err(format!(
809
- "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}",
810
- v
811
- )))
833
+ "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
834
+ )));
812
835
  }
813
836
  };
814
837
  Ok(Wrap(parsed))
@@ -826,9 +849,8 @@ impl TryConvert for Wrap<JoinType> {
826
849
  "cross" => JoinType::Cross,
827
850
  v => {
828
851
  return Err(RbValueError::new_err(format!(
829
- "how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {}",
830
- v
831
- )))
852
+ "how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}"
853
+ )));
832
854
  }
833
855
  };
834
856
  Ok(Wrap(parsed))
@@ -844,7 +866,7 @@ impl TryConvert for Wrap<Label> {
844
866
  v => {
845
867
  return Err(RbValueError::new_err(format!(
846
868
  "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
847
- )))
869
+ )));
848
870
  }
849
871
  };
850
872
  Ok(Wrap(parsed))
@@ -858,9 +880,8 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
858
880
  "max_width" => ListToStructWidthStrategy::MaxWidth,
859
881
  v => {
860
882
  return Err(RbValueError::new_err(format!(
861
- "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
862
- v
863
- )))
883
+ "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {v}"
884
+ )));
864
885
  }
865
886
  };
866
887
  Ok(Wrap(parsed))
@@ -875,7 +896,7 @@ impl TryConvert for Wrap<NonExistent> {
875
896
  v => {
876
897
  return Err(RbValueError::new_err(format!(
877
898
  "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
878
- )))
899
+ )));
879
900
  }
880
901
  };
881
902
  Ok(Wrap(parsed))
@@ -889,9 +910,8 @@ impl TryConvert for Wrap<NullBehavior> {
889
910
  "ignore" => NullBehavior::Ignore,
890
911
  v => {
891
912
  return Err(RbValueError::new_err(format!(
892
- "null behavior must be one of {{'drop', 'ignore'}}, got {}",
893
- v
894
- )))
913
+ "null behavior must be one of {{'drop', 'ignore'}}, got {v}"
914
+ )));
895
915
  }
896
916
  };
897
917
  Ok(Wrap(parsed))
@@ -905,9 +925,8 @@ impl TryConvert for Wrap<NullStrategy> {
905
925
  "propagate" => NullStrategy::Propagate,
906
926
  v => {
907
927
  return Err(RbValueError::new_err(format!(
908
- "null strategy must be one of {{'ignore', 'propagate'}}, got {}",
909
- v
910
- )))
928
+ "null strategy must be one of {{'ignore', 'propagate'}}, got {v}"
929
+ )));
911
930
  }
912
931
  };
913
932
  Ok(Wrap(parsed))
@@ -923,9 +942,8 @@ impl TryConvert for Wrap<ParallelStrategy> {
923
942
  "none" => ParallelStrategy::None,
924
943
  v => {
925
944
  return Err(RbValueError::new_err(format!(
926
- "parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}",
927
- v
928
- )))
945
+ "parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {v}"
946
+ )));
929
947
  }
930
948
  };
931
949
  Ok(Wrap(parsed))
@@ -942,9 +960,8 @@ impl TryConvert for Wrap<QuantileMethod> {
942
960
  "midpoint" => QuantileMethod::Midpoint,
943
961
  v => {
944
962
  return Err(RbValueError::new_err(format!(
945
- "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
946
- v
947
- )))
963
+ "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {v}"
964
+ )));
948
965
  }
949
966
  };
950
967
  Ok(Wrap(parsed))
@@ -962,9 +979,24 @@ impl TryConvert for Wrap<RankMethod> {
962
979
  "random" => RankMethod::Random,
963
980
  v => {
964
981
  return Err(RbValueError::new_err(format!(
965
- "method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {}",
966
- v
967
- )))
982
+ "method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}"
983
+ )));
984
+ }
985
+ };
986
+ Ok(Wrap(parsed))
987
+ }
988
+ }
989
+
990
+ impl TryConvert for Wrap<Roll> {
991
+ fn try_convert(ob: Value) -> RbResult<Self> {
992
+ let parsed = match String::try_convert(ob)?.as_str() {
993
+ "raise" => Roll::Raise,
994
+ "forward" => Roll::Forward,
995
+ "backward" => Roll::Backward,
996
+ v => {
997
+ return Err(RbValueError::new_err(format!(
998
+ "`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
999
+ )));
968
1000
  }
969
1001
  };
970
1002
  Ok(Wrap(parsed))
@@ -979,15 +1011,16 @@ impl TryConvert for Wrap<TimeUnit> {
979
1011
  "ms" => TimeUnit::Milliseconds,
980
1012
  v => {
981
1013
  return Err(RbValueError::new_err(format!(
982
- "time unit must be one of {{'ns', 'us', 'ms'}}, got {}",
983
- v
984
- )))
1014
+ "time unit must be one of {{'ns', 'us', 'ms'}}, got {v}"
1015
+ )));
985
1016
  }
986
1017
  };
987
1018
  Ok(Wrap(parsed))
988
1019
  }
989
1020
  }
990
1021
 
1022
+ unsafe impl TryConvertOwned for Wrap<TimeUnit> {}
1023
+
991
1024
  impl TryConvert for Wrap<UniqueKeepStrategy> {
992
1025
  fn try_convert(ob: Value) -> RbResult<Self> {
993
1026
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -995,9 +1028,8 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
995
1028
  "last" => UniqueKeepStrategy::Last,
996
1029
  v => {
997
1030
  return Err(RbValueError::new_err(format!(
998
- "keep must be one of {{'first', 'last'}}, got {}",
999
- v
1000
- )))
1031
+ "keep must be one of {{'first', 'last'}}, got {v}"
1032
+ )));
1001
1033
  }
1002
1034
  };
1003
1035
  Ok(Wrap(parsed))
@@ -1011,9 +1043,8 @@ impl TryConvert for Wrap<IpcCompression> {
1011
1043
  "zstd" => IpcCompression::ZSTD,
1012
1044
  v => {
1013
1045
  return Err(RbValueError::new_err(format!(
1014
- "compression must be one of {{'lz4', 'zstd'}}, got {}",
1015
- v
1016
- )))
1046
+ "compression must be one of {{'lz4', 'zstd'}}, got {v}"
1047
+ )));
1017
1048
  }
1018
1049
  };
1019
1050
  Ok(Wrap(parsed))
@@ -1029,7 +1060,7 @@ impl TryConvert for Wrap<SearchSortedSide> {
1029
1060
  v => {
1030
1061
  return Err(RbValueError::new_err(format!(
1031
1062
  "side must be one of {{'any', 'left', 'right'}}, got {v}",
1032
- )))
1063
+ )));
1033
1064
  }
1034
1065
  };
1035
1066
  Ok(Wrap(parsed))
@@ -1046,7 +1077,7 @@ impl TryConvert for Wrap<ClosedInterval> {
1046
1077
  v => {
1047
1078
  return Err(RbValueError::new_err(format!(
1048
1079
  "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
1049
- )))
1080
+ )));
1050
1081
  }
1051
1082
  };
1052
1083
  Ok(Wrap(parsed))
@@ -1061,8 +1092,8 @@ impl TryConvert for Wrap<WindowMapping> {
1061
1092
  "explode" => WindowMapping::Explode,
1062
1093
  v => {
1063
1094
  return Err(RbValueError::new_err(format!(
1064
- "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1065
- )))
1095
+ "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
1096
+ )));
1066
1097
  }
1067
1098
  };
1068
1099
  Ok(Wrap(parsed))
@@ -1079,7 +1110,7 @@ impl TryConvert for Wrap<JoinValidation> {
1079
1110
  v => {
1080
1111
  return Err(RbValueError::new_err(format!(
1081
1112
  "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
1082
- )))
1113
+ )));
1083
1114
  }
1084
1115
  };
1085
1116
  Ok(Wrap(parsed))
@@ -1097,8 +1128,8 @@ impl TryConvert for Wrap<MaintainOrderJoin> {
1097
1128
  v => {
1098
1129
  return Err(RbValueError::new_err(format!(
1099
1130
  "`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
1100
- )))
1101
- },
1131
+ )));
1132
+ }
1102
1133
  };
1103
1134
  Ok(Wrap(parsed))
1104
1135
  }
@@ -1114,8 +1145,8 @@ impl TryConvert for Wrap<QuoteStyle> {
1114
1145
  v => {
1115
1146
  return Err(RbValueError::new_err(format!(
1116
1147
  "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
1117
- )))
1118
- },
1148
+ )));
1149
+ }
1119
1150
  };
1120
1151
  Ok(Wrap(parsed))
1121
1152
  }
@@ -1126,6 +1157,33 @@ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbRes
1126
1157
  Ok(out)
1127
1158
  }
1128
1159
 
1160
+ impl TryConvert for Wrap<SetOperation> {
1161
+ fn try_convert(ob: Value) -> RbResult<Self> {
1162
+ let parsed = match String::try_convert(ob)?.as_str() {
1163
+ "union" => SetOperation::Union,
1164
+ "difference" => SetOperation::Difference,
1165
+ "intersection" => SetOperation::Intersection,
1166
+ "symmetric_difference" => SetOperation::SymmetricDifference,
1167
+ v => {
1168
+ return Err(RbValueError::new_err(format!(
1169
+ "set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
1170
+ )));
1171
+ }
1172
+ };
1173
+ Ok(Wrap(parsed))
1174
+ }
1175
+ }
1176
+
1177
+ impl TryConvert for Wrap<CastColumnsPolicy> {
1178
+ fn try_convert(ob: Value) -> RbResult<Self> {
1179
+ if ob.is_nil() {
1180
+ let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
1181
+ return Ok(out);
1182
+ }
1183
+ todo!();
1184
+ }
1185
+ }
1186
+
1129
1187
  pub fn parse_fill_null_strategy(
1130
1188
  strategy: &str,
1131
1189
  limit: FillNullLimit,
@@ -1139,10 +1197,12 @@ pub fn parse_fill_null_strategy(
1139
1197
  "zero" => FillNullStrategy::Zero,
1140
1198
  "one" => FillNullStrategy::One,
1141
1199
  e => {
1142
- return Err(magnus::Error::new(exception::runtime_error(), format!(
1143
- "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
1144
- e,
1145
- )))
1200
+ return Err(magnus::Error::new(
1201
+ exception::runtime_error(),
1202
+ format!(
1203
+ "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
1204
+ ),
1205
+ ));
1146
1206
  }
1147
1207
  };
1148
1208
  Ok(parsed)
@@ -1159,7 +1219,7 @@ pub fn parse_parquet_compression(
1159
1219
  compression_level
1160
1220
  .map(|lvl| {
1161
1221
  GzipLevel::try_new(lvl as u8)
1162
- .map_err(|e| RbValueError::new_err(format!("{:?}", e)))
1222
+ .map_err(|e| RbValueError::new_err(format!("{e:?}")))
1163
1223
  })
1164
1224
  .transpose()?,
1165
1225
  ),
@@ -1168,7 +1228,7 @@ pub fn parse_parquet_compression(
1168
1228
  compression_level
1169
1229
  .map(|lvl| {
1170
1230
  BrotliLevel::try_new(lvl as u32)
1171
- .map_err(|e| RbValueError::new_err(format!("{:?}", e)))
1231
+ .map_err(|e| RbValueError::new_err(format!("{e:?}")))
1172
1232
  })
1173
1233
  .transpose()?,
1174
1234
  ),
@@ -1176,16 +1236,14 @@ pub fn parse_parquet_compression(
1176
1236
  "zstd" => ParquetCompression::Zstd(
1177
1237
  compression_level
1178
1238
  .map(|lvl| {
1179
- ZstdLevel::try_new(lvl)
1180
- .map_err(|e| RbValueError::new_err(format!("{:?}", e)))
1239
+ ZstdLevel::try_new(lvl).map_err(|e| RbValueError::new_err(format!("{e:?}")))
1181
1240
  })
1182
1241
  .transpose()?,
1183
1242
  ),
1184
1243
  e => {
1185
1244
  return Err(RbValueError::new_err(format!(
1186
- "compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {}",
1187
- e
1188
- )))
1245
+ "compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}"
1246
+ )));
1189
1247
  }
1190
1248
  };
1191
1249
  Ok(parsed)
@@ -1236,6 +1294,23 @@ impl TryConvert for RbCompatLevel {
1236
1294
  }
1237
1295
  }
1238
1296
 
1297
+ impl TryConvert for Wrap<UnicodeForm> {
1298
+ fn try_convert(ob: Value) -> RbResult<Self> {
1299
+ let parsed = match String::try_convert(ob)?.as_str() {
1300
+ "NFC" => UnicodeForm::NFC,
1301
+ "NFKC" => UnicodeForm::NFKC,
1302
+ "NFD" => UnicodeForm::NFD,
1303
+ "NFKD" => UnicodeForm::NFKD,
1304
+ v => {
1305
+ return Err(RbValueError::new_err(format!(
1306
+ "`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
1307
+ )));
1308
+ }
1309
+ };
1310
+ Ok(Wrap(parsed))
1311
+ }
1312
+ }
1313
+
1239
1314
  impl TryConvert for Wrap<Option<TimeZone>> {
1240
1315
  fn try_convert(ob: Value) -> RbResult<Self> {
1241
1316
  let tz = Option::<Wrap<PlSmallStr>>::try_convert(ob)?;
@@ -1245,3 +1320,94 @@ impl TryConvert for Wrap<Option<TimeZone>> {
1245
1320
  Ok(Wrap(TimeZone::opt_try_new(tz).map_err(RbPolarsErr::from)?))
1246
1321
  }
1247
1322
  }
1323
+
1324
+ unsafe impl TryConvertOwned for Wrap<Option<TimeZone>> {}
1325
+
1326
+ impl TryConvert for Wrap<ExtraColumnsPolicy> {
1327
+ fn try_convert(ob: Value) -> RbResult<Self> {
1328
+ let parsed = match String::try_convert(ob)?.as_str() {
1329
+ "ignore" => ExtraColumnsPolicy::Ignore,
1330
+ "raise" => ExtraColumnsPolicy::Raise,
1331
+ v => {
1332
+ return Err(RbValueError::new_err(format!(
1333
+ "extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
1334
+ )));
1335
+ }
1336
+ };
1337
+ Ok(Wrap(parsed))
1338
+ }
1339
+ }
1340
+
1341
+ impl TryConvert for Wrap<MissingColumnsPolicy> {
1342
+ fn try_convert(ob: Value) -> RbResult<Self> {
1343
+ let parsed = match String::try_convert(ob)?.as_str() {
1344
+ "insert" => MissingColumnsPolicy::Insert,
1345
+ "raise" => MissingColumnsPolicy::Raise,
1346
+ v => {
1347
+ return Err(RbValueError::new_err(format!(
1348
+ "missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
1349
+ )));
1350
+ }
1351
+ };
1352
+ Ok(Wrap(parsed))
1353
+ }
1354
+ }
1355
+
1356
+ impl TryConvert for Wrap<ColumnMapping> {
1357
+ fn try_convert(ob: Value) -> RbResult<Self> {
1358
+ let (column_mapping_type, ob) = <(String, Value)>::try_convert(ob)?;
1359
+
1360
+ Ok(Wrap(match column_mapping_type.as_str() {
1361
+ "iceberg-column-mapping" => {
1362
+ let arrow_schema = Wrap::<ArrowSchema>::try_convert(ob)?;
1363
+ ColumnMapping::Iceberg(Arc::new(
1364
+ IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_rb_err)?,
1365
+ ))
1366
+ }
1367
+
1368
+ v => {
1369
+ return Err(RbValueError::new_err(format!(
1370
+ "unknown column mapping type: {v}"
1371
+ )));
1372
+ }
1373
+ }))
1374
+ }
1375
+ }
1376
+
1377
+ impl TryConvert for Wrap<DeletionFilesList> {
1378
+ fn try_convert(ob: Value) -> RbResult<Self> {
1379
+ let (deletion_file_type, ob) = <(String, Value)>::try_convert(ob)?;
1380
+
1381
+ Ok(Wrap(match deletion_file_type.as_str() {
1382
+ "iceberg-position-delete" => {
1383
+ let dict = RHash::try_convert(ob)?;
1384
+
1385
+ let mut out = PlIndexMap::new();
1386
+
1387
+ dict.foreach(|k: usize, v: RArray| {
1388
+ let files = v
1389
+ .into_iter()
1390
+ .map(|x| {
1391
+ let x = String::try_convert(x)?;
1392
+ Ok(x)
1393
+ })
1394
+ .collect::<RbResult<Arc<[String]>>>()?;
1395
+
1396
+ if !files.is_empty() {
1397
+ out.insert(k, files);
1398
+ }
1399
+
1400
+ Ok(ForEach::Continue)
1401
+ })?;
1402
+
1403
+ DeletionFilesList::IcebergPositionDelete(Arc::new(out))
1404
+ }
1405
+
1406
+ v => {
1407
+ return Err(RbValueError::new_err(format!(
1408
+ "unknown deletion file type: {v}"
1409
+ )));
1410
+ }
1411
+ }))
1412
+ }
1413
+ }