polars-df 0.20.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +19 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +275 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +2 -1
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +37 -0
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +106 -22
- data/ext/polars/src/expr/list.rs +45 -2
- data/ext/polars/src/expr/meta.rs +5 -28
- data/ext/polars/src/expr/mod.rs +4 -1
- data/ext/polars/src/expr/name.rs +10 -2
- data/ext/polars/src/expr/rolling.rs +21 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +73 -6
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +21 -11
- data/ext/polars/src/functions/lazy.rs +26 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +124 -111
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +216 -29
- data/ext/polars/src/map/dataframe.rs +9 -9
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +9 -9
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +75 -210
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +20 -2
@@ -1,15 +1,16 @@
|
|
1
1
|
pub(crate) mod any_value;
|
2
|
+
mod categorical;
|
2
3
|
mod chunked_array;
|
3
4
|
|
4
5
|
use std::fmt::{Debug, Display, Formatter};
|
5
6
|
use std::fs::File;
|
6
7
|
use std::hash::{Hash, Hasher};
|
7
8
|
use std::num::NonZeroUsize;
|
8
|
-
use std::path::PathBuf;
|
9
9
|
|
10
|
+
pub use categorical::RbCategories;
|
10
11
|
use magnus::{
|
11
|
-
|
12
|
-
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, class, exception,
|
13
|
+
prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
13
14
|
};
|
14
15
|
use polars::chunked_array::object::PolarsObjectSafe;
|
15
16
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -17,17 +18,20 @@ use polars::datatypes::AnyValue;
|
|
17
18
|
use polars::frame::row::Row;
|
18
19
|
use polars::io::avro::AvroCompression;
|
19
20
|
use polars::io::cloud::CloudOptions;
|
21
|
+
use polars::prelude::deletion::DeletionFilesList;
|
20
22
|
use polars::prelude::*;
|
21
23
|
use polars::series::ops::NullBehavior;
|
24
|
+
use polars_core::schema::iceberg::IcebergSchema;
|
22
25
|
use polars_core::utils::arrow::array::Array;
|
23
26
|
use polars_core::utils::materialize_dyn_int;
|
24
27
|
use polars_plan::dsl::ScanSources;
|
25
28
|
use polars_utils::mmap::MemSlice;
|
26
29
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
30
|
|
28
|
-
use crate::file::{
|
31
|
+
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
29
32
|
use crate::object::OBJECT_NAME;
|
30
33
|
use crate::rb_modules::series;
|
34
|
+
use crate::utils::to_rb_err;
|
31
35
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
32
36
|
|
33
37
|
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
@@ -226,18 +230,25 @@ impl IntoValue for Wrap<DataType> {
|
|
226
230
|
let class = pl.const_get::<_, Value>("Object").unwrap();
|
227
231
|
class.funcall("new", ()).unwrap()
|
228
232
|
}
|
229
|
-
DataType::Categorical(
|
230
|
-
let
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
let
|
233
|
+
DataType::Categorical(cats, _) => {
|
234
|
+
let categories_class = pl.const_get::<_, Value>("Categories").unwrap();
|
235
|
+
let categorical_class = pl.const_get::<_, Value>("Categorical").unwrap();
|
236
|
+
let categories: Value = categories_class
|
237
|
+
.funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
|
238
|
+
.unwrap();
|
239
|
+
let kwargs = RHash::new();
|
240
|
+
kwargs.aset(Symbol::new("categories"), categories).unwrap();
|
241
|
+
categorical_class.funcall("new", (kwargs,)).unwrap()
|
242
|
+
}
|
243
|
+
DataType::Enum(_, mapping) => {
|
244
|
+
let categories = unsafe {
|
245
|
+
StringChunked::from_chunks(
|
246
|
+
PlSmallStr::from_static("category"),
|
247
|
+
vec![mapping.to_arrow(true)],
|
248
|
+
)
|
249
|
+
};
|
236
250
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
237
|
-
let
|
238
|
-
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
239
|
-
.unwrap();
|
240
|
-
let series = to_series(s.into());
|
251
|
+
let series = to_series(categories.into_series().into());
|
241
252
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
242
253
|
}
|
243
254
|
DataType::Time => {
|
@@ -277,13 +288,13 @@ impl IntoValue for Wrap<DataType> {
|
|
277
288
|
}
|
278
289
|
}
|
279
290
|
|
291
|
+
enum CategoricalOrdering {
|
292
|
+
Lexical,
|
293
|
+
}
|
294
|
+
|
280
295
|
impl IntoValue for Wrap<CategoricalOrdering> {
|
281
296
|
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
|
283
|
-
CategoricalOrdering::Physical => "physical",
|
284
|
-
CategoricalOrdering::Lexical => "lexical",
|
285
|
-
};
|
286
|
-
ordering.into_value()
|
297
|
+
"lexical".into_value()
|
287
298
|
}
|
288
299
|
}
|
289
300
|
|
@@ -324,8 +335,10 @@ impl TryConvert for Wrap<DataType> {
|
|
324
335
|
"Polars::Boolean" => DataType::Boolean,
|
325
336
|
"Polars::String" => DataType::String,
|
326
337
|
"Polars::Binary" => DataType::Binary,
|
327
|
-
"Polars::Categorical" => DataType::
|
328
|
-
"Polars::Enum" =>
|
338
|
+
"Polars::Categorical" => DataType::from_categories(Categories::global()),
|
339
|
+
"Polars::Enum" => {
|
340
|
+
DataType::from_frozen_categories(FrozenCategories::new([]).unwrap())
|
341
|
+
}
|
329
342
|
"Polars::Date" => DataType::Date,
|
330
343
|
"Polars::Time" => DataType::Time,
|
331
344
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
@@ -340,11 +353,12 @@ impl TryConvert for Wrap<DataType> {
|
|
340
353
|
dt => {
|
341
354
|
return Err(RbValueError::new_err(format!(
|
342
355
|
"{dt} is not a correct polars DataType.",
|
343
|
-
)))
|
356
|
+
)));
|
344
357
|
}
|
345
358
|
}
|
346
359
|
} else if String::try_convert(ob).is_err() {
|
347
|
-
let
|
360
|
+
let cls = ob.class();
|
361
|
+
let name = unsafe { cls.name() }.into_owned();
|
348
362
|
match name.as_str() {
|
349
363
|
"Polars::Int8" => DataType::Int8,
|
350
364
|
"Polars::Int16" => DataType::Int16,
|
@@ -360,17 +374,20 @@ impl TryConvert for Wrap<DataType> {
|
|
360
374
|
"Polars::String" => DataType::String,
|
361
375
|
"Polars::Binary" => DataType::Binary,
|
362
376
|
"Polars::Categorical" => {
|
363
|
-
let
|
364
|
-
|
365
|
-
.
|
366
|
-
DataType::
|
377
|
+
let categories: Value = ob.funcall("categories", ()).unwrap();
|
378
|
+
let rb_categories: &RbCategories =
|
379
|
+
categories.funcall("_categories", ()).unwrap();
|
380
|
+
DataType::from_categories(rb_categories.categories().clone())
|
367
381
|
}
|
368
382
|
"Polars::Enum" => {
|
369
|
-
let categories = ob.funcall("categories", ()).unwrap();
|
383
|
+
let categories: Value = ob.funcall("categories", ()).unwrap();
|
370
384
|
let s = get_series(categories)?;
|
371
385
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
372
386
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
373
|
-
|
387
|
+
assert!(!categories.has_nulls());
|
388
|
+
DataType::from_frozen_categories(
|
389
|
+
FrozenCategories::new(categories.values_iter()).unwrap(),
|
390
|
+
)
|
374
391
|
}
|
375
392
|
"Polars::Date" => DataType::Date,
|
376
393
|
"Polars::Time" => DataType::Time,
|
@@ -420,7 +437,7 @@ impl TryConvert for Wrap<DataType> {
|
|
420
437
|
return Err(RbTypeError::new_err(format!(
|
421
438
|
"A {dt} object is not a correct polars DataType. \
|
422
439
|
Hint: use the class without instantiating it.",
|
423
|
-
)))
|
440
|
+
)));
|
424
441
|
}
|
425
442
|
}
|
426
443
|
} else {
|
@@ -436,7 +453,7 @@ impl TryConvert for Wrap<DataType> {
|
|
436
453
|
"str" => DataType::String,
|
437
454
|
"bin" => DataType::Binary,
|
438
455
|
"bool" => DataType::Boolean,
|
439
|
-
"cat" => DataType::
|
456
|
+
"cat" => DataType::from_categories(Categories::global()),
|
440
457
|
"date" => DataType::Date,
|
441
458
|
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
442
459
|
"f32" => DataType::Float32,
|
@@ -449,9 +466,8 @@ impl TryConvert for Wrap<DataType> {
|
|
449
466
|
"unk" => DataType::Unknown(Default::default()),
|
450
467
|
_ => {
|
451
468
|
return Err(RbValueError::new_err(format!(
|
452
|
-
"{} is not a supported DataType."
|
453
|
-
|
454
|
-
)))
|
469
|
+
"{ob} is not a supported DataType."
|
470
|
+
)));
|
455
471
|
}
|
456
472
|
}
|
457
473
|
};
|
@@ -475,7 +491,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
475
491
|
_ => {
|
476
492
|
return Err(RbTypeError::new_err(format!(
|
477
493
|
"'{key}' is not a valid statistic option",
|
478
|
-
)))
|
494
|
+
)));
|
479
495
|
}
|
480
496
|
}
|
481
497
|
Ok(ForEach::Continue)
|
@@ -510,6 +526,12 @@ impl TryConvert for Wrap<Schema> {
|
|
510
526
|
}
|
511
527
|
}
|
512
528
|
|
529
|
+
impl TryConvert for Wrap<ArrowSchema> {
|
530
|
+
fn try_convert(_ob: Value) -> RbResult<Self> {
|
531
|
+
todo!();
|
532
|
+
}
|
533
|
+
}
|
534
|
+
|
513
535
|
impl TryConvert for Wrap<ScanSources> {
|
514
536
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
515
537
|
let list = RArray::try_convert(ob)?;
|
@@ -519,7 +541,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
519
541
|
}
|
520
542
|
|
521
543
|
enum MutableSources {
|
522
|
-
Paths(Vec<
|
544
|
+
Paths(Vec<PlPath>),
|
523
545
|
Files(Vec<File>),
|
524
546
|
Buffers(Vec<MemSlice>),
|
525
547
|
}
|
@@ -560,7 +582,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
560
582
|
return Err(RbTypeError::new_err(
|
561
583
|
"Cannot combine in-memory bytes, paths and files for scan sources"
|
562
584
|
.to_string(),
|
563
|
-
))
|
585
|
+
));
|
564
586
|
}
|
565
587
|
}
|
566
588
|
}
|
@@ -678,7 +700,7 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
678
700
|
v => {
|
679
701
|
return Err(RbValueError::new_err(format!(
|
680
702
|
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
681
|
-
)))
|
703
|
+
)));
|
682
704
|
}
|
683
705
|
};
|
684
706
|
Ok(Wrap(parsed))
|
@@ -693,7 +715,7 @@ impl TryConvert for Wrap<InterpolationMethod> {
|
|
693
715
|
v => {
|
694
716
|
return Err(RbValueError::new_err(format!(
|
695
717
|
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
696
|
-
)))
|
718
|
+
)));
|
697
719
|
}
|
698
720
|
};
|
699
721
|
Ok(Wrap(parsed))
|
@@ -708,9 +730,8 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
|
|
708
730
|
"deflate" => Some(AvroCompression::Deflate),
|
709
731
|
v => {
|
710
732
|
return Err(RbValueError::new_err(format!(
|
711
|
-
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}"
|
712
|
-
|
713
|
-
)))
|
733
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}"
|
734
|
+
)));
|
714
735
|
}
|
715
736
|
};
|
716
737
|
Ok(Wrap(parsed))
|
@@ -720,13 +741,18 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
|
|
720
741
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
721
742
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
722
743
|
let parsed = match String::try_convert(ob)?.as_str() {
|
723
|
-
"physical" => CategoricalOrdering::Physical,
|
724
744
|
"lexical" => CategoricalOrdering::Lexical,
|
745
|
+
"physical" => {
|
746
|
+
polars_warn!(
|
747
|
+
Deprecation,
|
748
|
+
"physical ordering is deprecated, will use lexical ordering instead"
|
749
|
+
);
|
750
|
+
CategoricalOrdering::Lexical
|
751
|
+
}
|
725
752
|
v => {
|
726
753
|
return Err(RbValueError::new_err(format!(
|
727
|
-
"ordering must be one of {{'physical', 'lexical'}}, got {}"
|
728
|
-
|
729
|
-
)))
|
754
|
+
"ordering must be one of {{'physical', 'lexical'}}, got {v}"
|
755
|
+
)));
|
730
756
|
}
|
731
757
|
};
|
732
758
|
Ok(Wrap(parsed))
|
@@ -742,7 +768,7 @@ impl TryConvert for Wrap<StartBy> {
|
|
742
768
|
v => {
|
743
769
|
return Err(RbValueError::new_err(format!(
|
744
770
|
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
745
|
-
)))
|
771
|
+
)));
|
746
772
|
}
|
747
773
|
};
|
748
774
|
Ok(Wrap(parsed))
|
@@ -758,9 +784,8 @@ impl TryConvert for Wrap<ClosedWindow> {
|
|
758
784
|
"none" => ClosedWindow::None,
|
759
785
|
v => {
|
760
786
|
return Err(RbValueError::new_err(format!(
|
761
|
-
"closed must be one of {{'left', 'right', 'both', 'none'}}, got {}"
|
762
|
-
|
763
|
-
)))
|
787
|
+
"closed must be one of {{'left', 'right', 'both', 'none'}}, got {v}"
|
788
|
+
)));
|
764
789
|
}
|
765
790
|
};
|
766
791
|
Ok(Wrap(parsed))
|
@@ -789,9 +814,8 @@ impl TryConvert for Wrap<CsvEncoding> {
|
|
789
814
|
"utf8-lossy" => CsvEncoding::LossyUtf8,
|
790
815
|
v => {
|
791
816
|
return Err(RbValueError::new_err(format!(
|
792
|
-
"encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}"
|
793
|
-
|
794
|
-
)))
|
817
|
+
"encoding must be one of {{'utf8', 'utf8-lossy'}}, got {v}"
|
818
|
+
)));
|
795
819
|
}
|
796
820
|
};
|
797
821
|
Ok(Wrap(parsed))
|
@@ -806,9 +830,8 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
|
|
806
830
|
"zstd" => Some(IpcCompression::ZSTD),
|
807
831
|
v => {
|
808
832
|
return Err(RbValueError::new_err(format!(
|
809
|
-
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}"
|
810
|
-
|
811
|
-
)))
|
833
|
+
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
|
834
|
+
)));
|
812
835
|
}
|
813
836
|
};
|
814
837
|
Ok(Wrap(parsed))
|
@@ -826,9 +849,8 @@ impl TryConvert for Wrap<JoinType> {
|
|
826
849
|
"cross" => JoinType::Cross,
|
827
850
|
v => {
|
828
851
|
return Err(RbValueError::new_err(format!(
|
829
|
-
|
830
|
-
|
831
|
-
)))
|
852
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}"
|
853
|
+
)));
|
832
854
|
}
|
833
855
|
};
|
834
856
|
Ok(Wrap(parsed))
|
@@ -844,7 +866,7 @@ impl TryConvert for Wrap<Label> {
|
|
844
866
|
v => {
|
845
867
|
return Err(RbValueError::new_err(format!(
|
846
868
|
"`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
|
847
|
-
)))
|
869
|
+
)));
|
848
870
|
}
|
849
871
|
};
|
850
872
|
Ok(Wrap(parsed))
|
@@ -858,9 +880,8 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
|
858
880
|
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
859
881
|
v => {
|
860
882
|
return Err(RbValueError::new_err(format!(
|
861
|
-
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}"
|
862
|
-
|
863
|
-
)))
|
883
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {v}"
|
884
|
+
)));
|
864
885
|
}
|
865
886
|
};
|
866
887
|
Ok(Wrap(parsed))
|
@@ -875,7 +896,7 @@ impl TryConvert for Wrap<NonExistent> {
|
|
875
896
|
v => {
|
876
897
|
return Err(RbValueError::new_err(format!(
|
877
898
|
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
|
878
|
-
)))
|
899
|
+
)));
|
879
900
|
}
|
880
901
|
};
|
881
902
|
Ok(Wrap(parsed))
|
@@ -889,9 +910,8 @@ impl TryConvert for Wrap<NullBehavior> {
|
|
889
910
|
"ignore" => NullBehavior::Ignore,
|
890
911
|
v => {
|
891
912
|
return Err(RbValueError::new_err(format!(
|
892
|
-
"null behavior must be one of {{'drop', 'ignore'}}, got {}"
|
893
|
-
|
894
|
-
)))
|
913
|
+
"null behavior must be one of {{'drop', 'ignore'}}, got {v}"
|
914
|
+
)));
|
895
915
|
}
|
896
916
|
};
|
897
917
|
Ok(Wrap(parsed))
|
@@ -905,9 +925,8 @@ impl TryConvert for Wrap<NullStrategy> {
|
|
905
925
|
"propagate" => NullStrategy::Propagate,
|
906
926
|
v => {
|
907
927
|
return Err(RbValueError::new_err(format!(
|
908
|
-
"null strategy must be one of {{'ignore', 'propagate'}}, got {}"
|
909
|
-
|
910
|
-
)))
|
928
|
+
"null strategy must be one of {{'ignore', 'propagate'}}, got {v}"
|
929
|
+
)));
|
911
930
|
}
|
912
931
|
};
|
913
932
|
Ok(Wrap(parsed))
|
@@ -923,9 +942,8 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
923
942
|
"none" => ParallelStrategy::None,
|
924
943
|
v => {
|
925
944
|
return Err(RbValueError::new_err(format!(
|
926
|
-
"parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}"
|
927
|
-
|
928
|
-
)))
|
945
|
+
"parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {v}"
|
946
|
+
)));
|
929
947
|
}
|
930
948
|
};
|
931
949
|
Ok(Wrap(parsed))
|
@@ -942,9 +960,8 @@ impl TryConvert for Wrap<QuantileMethod> {
|
|
942
960
|
"midpoint" => QuantileMethod::Midpoint,
|
943
961
|
v => {
|
944
962
|
return Err(RbValueError::new_err(format!(
|
945
|
-
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}"
|
946
|
-
|
947
|
-
)))
|
963
|
+
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {v}"
|
964
|
+
)));
|
948
965
|
}
|
949
966
|
};
|
950
967
|
Ok(Wrap(parsed))
|
@@ -962,9 +979,24 @@ impl TryConvert for Wrap<RankMethod> {
|
|
962
979
|
"random" => RankMethod::Random,
|
963
980
|
v => {
|
964
981
|
return Err(RbValueError::new_err(format!(
|
965
|
-
"method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {}"
|
966
|
-
|
967
|
-
|
982
|
+
"method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}"
|
983
|
+
)));
|
984
|
+
}
|
985
|
+
};
|
986
|
+
Ok(Wrap(parsed))
|
987
|
+
}
|
988
|
+
}
|
989
|
+
|
990
|
+
impl TryConvert for Wrap<Roll> {
|
991
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
992
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
993
|
+
"raise" => Roll::Raise,
|
994
|
+
"forward" => Roll::Forward,
|
995
|
+
"backward" => Roll::Backward,
|
996
|
+
v => {
|
997
|
+
return Err(RbValueError::new_err(format!(
|
998
|
+
"`roll` must be one of {{'raise', 'forward', 'backward'}}, got {v}",
|
999
|
+
)));
|
968
1000
|
}
|
969
1001
|
};
|
970
1002
|
Ok(Wrap(parsed))
|
@@ -979,15 +1011,16 @@ impl TryConvert for Wrap<TimeUnit> {
|
|
979
1011
|
"ms" => TimeUnit::Milliseconds,
|
980
1012
|
v => {
|
981
1013
|
return Err(RbValueError::new_err(format!(
|
982
|
-
"time unit must be one of {{'ns', 'us', 'ms'}}, got {}"
|
983
|
-
|
984
|
-
)))
|
1014
|
+
"time unit must be one of {{'ns', 'us', 'ms'}}, got {v}"
|
1015
|
+
)));
|
985
1016
|
}
|
986
1017
|
};
|
987
1018
|
Ok(Wrap(parsed))
|
988
1019
|
}
|
989
1020
|
}
|
990
1021
|
|
1022
|
+
unsafe impl TryConvertOwned for Wrap<TimeUnit> {}
|
1023
|
+
|
991
1024
|
impl TryConvert for Wrap<UniqueKeepStrategy> {
|
992
1025
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
993
1026
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -995,9 +1028,8 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
995
1028
|
"last" => UniqueKeepStrategy::Last,
|
996
1029
|
v => {
|
997
1030
|
return Err(RbValueError::new_err(format!(
|
998
|
-
"keep must be one of {{'first', 'last'}}, got {}"
|
999
|
-
|
1000
|
-
)))
|
1031
|
+
"keep must be one of {{'first', 'last'}}, got {v}"
|
1032
|
+
)));
|
1001
1033
|
}
|
1002
1034
|
};
|
1003
1035
|
Ok(Wrap(parsed))
|
@@ -1011,9 +1043,8 @@ impl TryConvert for Wrap<IpcCompression> {
|
|
1011
1043
|
"zstd" => IpcCompression::ZSTD,
|
1012
1044
|
v => {
|
1013
1045
|
return Err(RbValueError::new_err(format!(
|
1014
|
-
"compression must be one of {{'lz4', 'zstd'}}, got {}"
|
1015
|
-
|
1016
|
-
)))
|
1046
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {v}"
|
1047
|
+
)));
|
1017
1048
|
}
|
1018
1049
|
};
|
1019
1050
|
Ok(Wrap(parsed))
|
@@ -1029,7 +1060,7 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1029
1060
|
v => {
|
1030
1061
|
return Err(RbValueError::new_err(format!(
|
1031
1062
|
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
1032
|
-
)))
|
1063
|
+
)));
|
1033
1064
|
}
|
1034
1065
|
};
|
1035
1066
|
Ok(Wrap(parsed))
|
@@ -1046,7 +1077,7 @@ impl TryConvert for Wrap<ClosedInterval> {
|
|
1046
1077
|
v => {
|
1047
1078
|
return Err(RbValueError::new_err(format!(
|
1048
1079
|
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
1049
|
-
)))
|
1080
|
+
)));
|
1050
1081
|
}
|
1051
1082
|
};
|
1052
1083
|
Ok(Wrap(parsed))
|
@@ -1061,8 +1092,8 @@ impl TryConvert for Wrap<WindowMapping> {
|
|
1061
1092
|
"explode" => WindowMapping::Explode,
|
1062
1093
|
v => {
|
1063
1094
|
return Err(RbValueError::new_err(format!(
|
1064
|
-
|
1065
|
-
|
1095
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
1096
|
+
)));
|
1066
1097
|
}
|
1067
1098
|
};
|
1068
1099
|
Ok(Wrap(parsed))
|
@@ -1079,7 +1110,7 @@ impl TryConvert for Wrap<JoinValidation> {
|
|
1079
1110
|
v => {
|
1080
1111
|
return Err(RbValueError::new_err(format!(
|
1081
1112
|
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
1082
|
-
)))
|
1113
|
+
)));
|
1083
1114
|
}
|
1084
1115
|
};
|
1085
1116
|
Ok(Wrap(parsed))
|
@@ -1097,8 +1128,8 @@ impl TryConvert for Wrap<MaintainOrderJoin> {
|
|
1097
1128
|
v => {
|
1098
1129
|
return Err(RbValueError::new_err(format!(
|
1099
1130
|
"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
|
1100
|
-
)))
|
1101
|
-
}
|
1131
|
+
)));
|
1132
|
+
}
|
1102
1133
|
};
|
1103
1134
|
Ok(Wrap(parsed))
|
1104
1135
|
}
|
@@ -1114,8 +1145,8 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
1114
1145
|
v => {
|
1115
1146
|
return Err(RbValueError::new_err(format!(
|
1116
1147
|
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
1117
|
-
)))
|
1118
|
-
}
|
1148
|
+
)));
|
1149
|
+
}
|
1119
1150
|
};
|
1120
1151
|
Ok(Wrap(parsed))
|
1121
1152
|
}
|
@@ -1126,6 +1157,33 @@ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbRes
|
|
1126
1157
|
Ok(out)
|
1127
1158
|
}
|
1128
1159
|
|
1160
|
+
impl TryConvert for Wrap<SetOperation> {
|
1161
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1162
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1163
|
+
"union" => SetOperation::Union,
|
1164
|
+
"difference" => SetOperation::Difference,
|
1165
|
+
"intersection" => SetOperation::Intersection,
|
1166
|
+
"symmetric_difference" => SetOperation::SymmetricDifference,
|
1167
|
+
v => {
|
1168
|
+
return Err(RbValueError::new_err(format!(
|
1169
|
+
"set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
|
1170
|
+
)));
|
1171
|
+
}
|
1172
|
+
};
|
1173
|
+
Ok(Wrap(parsed))
|
1174
|
+
}
|
1175
|
+
}
|
1176
|
+
|
1177
|
+
impl TryConvert for Wrap<CastColumnsPolicy> {
|
1178
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1179
|
+
if ob.is_nil() {
|
1180
|
+
let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
|
1181
|
+
return Ok(out);
|
1182
|
+
}
|
1183
|
+
todo!();
|
1184
|
+
}
|
1185
|
+
}
|
1186
|
+
|
1129
1187
|
pub fn parse_fill_null_strategy(
|
1130
1188
|
strategy: &str,
|
1131
1189
|
limit: FillNullLimit,
|
@@ -1139,10 +1197,12 @@ pub fn parse_fill_null_strategy(
|
|
1139
1197
|
"zero" => FillNullStrategy::Zero,
|
1140
1198
|
"one" => FillNullStrategy::One,
|
1141
1199
|
e => {
|
1142
|
-
return Err(magnus::Error::new(
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1200
|
+
return Err(magnus::Error::new(
|
1201
|
+
exception::runtime_error(),
|
1202
|
+
format!(
|
1203
|
+
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
|
1204
|
+
),
|
1205
|
+
));
|
1146
1206
|
}
|
1147
1207
|
};
|
1148
1208
|
Ok(parsed)
|
@@ -1159,7 +1219,7 @@ pub fn parse_parquet_compression(
|
|
1159
1219
|
compression_level
|
1160
1220
|
.map(|lvl| {
|
1161
1221
|
GzipLevel::try_new(lvl as u8)
|
1162
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}"
|
1222
|
+
.map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1163
1223
|
})
|
1164
1224
|
.transpose()?,
|
1165
1225
|
),
|
@@ -1168,7 +1228,7 @@ pub fn parse_parquet_compression(
|
|
1168
1228
|
compression_level
|
1169
1229
|
.map(|lvl| {
|
1170
1230
|
BrotliLevel::try_new(lvl as u32)
|
1171
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}"
|
1231
|
+
.map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1172
1232
|
})
|
1173
1233
|
.transpose()?,
|
1174
1234
|
),
|
@@ -1176,16 +1236,14 @@ pub fn parse_parquet_compression(
|
|
1176
1236
|
"zstd" => ParquetCompression::Zstd(
|
1177
1237
|
compression_level
|
1178
1238
|
.map(|lvl| {
|
1179
|
-
ZstdLevel::try_new(lvl)
|
1180
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}", e)))
|
1239
|
+
ZstdLevel::try_new(lvl).map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1181
1240
|
})
|
1182
1241
|
.transpose()?,
|
1183
1242
|
),
|
1184
1243
|
e => {
|
1185
1244
|
return Err(RbValueError::new_err(format!(
|
1186
|
-
"compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {}"
|
1187
|
-
|
1188
|
-
)))
|
1245
|
+
"compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}"
|
1246
|
+
)));
|
1189
1247
|
}
|
1190
1248
|
};
|
1191
1249
|
Ok(parsed)
|
@@ -1236,6 +1294,23 @@ impl TryConvert for RbCompatLevel {
|
|
1236
1294
|
}
|
1237
1295
|
}
|
1238
1296
|
|
1297
|
+
impl TryConvert for Wrap<UnicodeForm> {
|
1298
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1299
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1300
|
+
"NFC" => UnicodeForm::NFC,
|
1301
|
+
"NFKC" => UnicodeForm::NFKC,
|
1302
|
+
"NFD" => UnicodeForm::NFD,
|
1303
|
+
"NFKD" => UnicodeForm::NFKD,
|
1304
|
+
v => {
|
1305
|
+
return Err(RbValueError::new_err(format!(
|
1306
|
+
"`form` must be one of {{'NFC', 'NFKC', 'NFD', 'NFKD'}}, got {v}",
|
1307
|
+
)));
|
1308
|
+
}
|
1309
|
+
};
|
1310
|
+
Ok(Wrap(parsed))
|
1311
|
+
}
|
1312
|
+
}
|
1313
|
+
|
1239
1314
|
impl TryConvert for Wrap<Option<TimeZone>> {
|
1240
1315
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1241
1316
|
let tz = Option::<Wrap<PlSmallStr>>::try_convert(ob)?;
|
@@ -1245,3 +1320,94 @@ impl TryConvert for Wrap<Option<TimeZone>> {
|
|
1245
1320
|
Ok(Wrap(TimeZone::opt_try_new(tz).map_err(RbPolarsErr::from)?))
|
1246
1321
|
}
|
1247
1322
|
}
|
1323
|
+
|
1324
|
+
unsafe impl TryConvertOwned for Wrap<Option<TimeZone>> {}
|
1325
|
+
|
1326
|
+
impl TryConvert for Wrap<ExtraColumnsPolicy> {
|
1327
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1328
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1329
|
+
"ignore" => ExtraColumnsPolicy::Ignore,
|
1330
|
+
"raise" => ExtraColumnsPolicy::Raise,
|
1331
|
+
v => {
|
1332
|
+
return Err(RbValueError::new_err(format!(
|
1333
|
+
"extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
|
1334
|
+
)));
|
1335
|
+
}
|
1336
|
+
};
|
1337
|
+
Ok(Wrap(parsed))
|
1338
|
+
}
|
1339
|
+
}
|
1340
|
+
|
1341
|
+
impl TryConvert for Wrap<MissingColumnsPolicy> {
|
1342
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1343
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1344
|
+
"insert" => MissingColumnsPolicy::Insert,
|
1345
|
+
"raise" => MissingColumnsPolicy::Raise,
|
1346
|
+
v => {
|
1347
|
+
return Err(RbValueError::new_err(format!(
|
1348
|
+
"missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
|
1349
|
+
)));
|
1350
|
+
}
|
1351
|
+
};
|
1352
|
+
Ok(Wrap(parsed))
|
1353
|
+
}
|
1354
|
+
}
|
1355
|
+
|
1356
|
+
impl TryConvert for Wrap<ColumnMapping> {
|
1357
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1358
|
+
let (column_mapping_type, ob) = <(String, Value)>::try_convert(ob)?;
|
1359
|
+
|
1360
|
+
Ok(Wrap(match column_mapping_type.as_str() {
|
1361
|
+
"iceberg-column-mapping" => {
|
1362
|
+
let arrow_schema = Wrap::<ArrowSchema>::try_convert(ob)?;
|
1363
|
+
ColumnMapping::Iceberg(Arc::new(
|
1364
|
+
IcebergSchema::from_arrow_schema(&arrow_schema.0).map_err(to_rb_err)?,
|
1365
|
+
))
|
1366
|
+
}
|
1367
|
+
|
1368
|
+
v => {
|
1369
|
+
return Err(RbValueError::new_err(format!(
|
1370
|
+
"unknown column mapping type: {v}"
|
1371
|
+
)));
|
1372
|
+
}
|
1373
|
+
}))
|
1374
|
+
}
|
1375
|
+
}
|
1376
|
+
|
1377
|
+
impl TryConvert for Wrap<DeletionFilesList> {
|
1378
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1379
|
+
let (deletion_file_type, ob) = <(String, Value)>::try_convert(ob)?;
|
1380
|
+
|
1381
|
+
Ok(Wrap(match deletion_file_type.as_str() {
|
1382
|
+
"iceberg-position-delete" => {
|
1383
|
+
let dict = RHash::try_convert(ob)?;
|
1384
|
+
|
1385
|
+
let mut out = PlIndexMap::new();
|
1386
|
+
|
1387
|
+
dict.foreach(|k: usize, v: RArray| {
|
1388
|
+
let files = v
|
1389
|
+
.into_iter()
|
1390
|
+
.map(|x| {
|
1391
|
+
let x = String::try_convert(x)?;
|
1392
|
+
Ok(x)
|
1393
|
+
})
|
1394
|
+
.collect::<RbResult<Arc<[String]>>>()?;
|
1395
|
+
|
1396
|
+
if !files.is_empty() {
|
1397
|
+
out.insert(k, files);
|
1398
|
+
}
|
1399
|
+
|
1400
|
+
Ok(ForEach::Continue)
|
1401
|
+
})?;
|
1402
|
+
|
1403
|
+
DeletionFilesList::IcebergPositionDelete(Arc::new(out))
|
1404
|
+
}
|
1405
|
+
|
1406
|
+
v => {
|
1407
|
+
return Err(RbValueError::new_err(format!(
|
1408
|
+
"unknown deletion file type: {v}"
|
1409
|
+
)));
|
1410
|
+
}
|
1411
|
+
}))
|
1412
|
+
}
|
1413
|
+
}
|