polars-df 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +36 -44
- data/ext/polars/src/expr/list.rs +27 -17
- data/ext/polars/src/expr/meta.rs +18 -41
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -7
- data/ext/polars/src/file.rs +12 -6
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/range.rs +14 -10
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +75 -113
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +104 -26
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
@@ -1,15 +1,16 @@
|
|
1
1
|
pub(crate) mod any_value;
|
2
|
+
mod categorical;
|
2
3
|
mod chunked_array;
|
3
4
|
|
4
5
|
use std::fmt::{Debug, Display, Formatter};
|
5
6
|
use std::fs::File;
|
6
7
|
use std::hash::{Hash, Hasher};
|
7
8
|
use std::num::NonZeroUsize;
|
8
|
-
use std::path::PathBuf;
|
9
9
|
|
10
|
+
pub use categorical::RbCategories;
|
10
11
|
use magnus::{
|
11
|
-
|
12
|
-
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value, class, exception,
|
13
|
+
prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
13
14
|
};
|
14
15
|
use polars::chunked_array::object::PolarsObjectSafe;
|
15
16
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -17,6 +18,7 @@ use polars::datatypes::AnyValue;
|
|
17
18
|
use polars::frame::row::Row;
|
18
19
|
use polars::io::avro::AvroCompression;
|
19
20
|
use polars::io::cloud::CloudOptions;
|
21
|
+
use polars::prelude::deletion::DeletionFilesList;
|
20
22
|
use polars::prelude::*;
|
21
23
|
use polars::series::ops::NullBehavior;
|
22
24
|
use polars_core::utils::arrow::array::Array;
|
@@ -25,7 +27,7 @@ use polars_plan::dsl::ScanSources;
|
|
25
27
|
use polars_utils::mmap::MemSlice;
|
26
28
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
29
|
|
28
|
-
use crate::file::{
|
30
|
+
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
29
31
|
use crate::object::OBJECT_NAME;
|
30
32
|
use crate::rb_modules::series;
|
31
33
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -226,18 +228,25 @@ impl IntoValue for Wrap<DataType> {
|
|
226
228
|
let class = pl.const_get::<_, Value>("Object").unwrap();
|
227
229
|
class.funcall("new", ()).unwrap()
|
228
230
|
}
|
229
|
-
DataType::Categorical(
|
230
|
-
let
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
let
|
231
|
+
DataType::Categorical(cats, _) => {
|
232
|
+
let categories_class = pl.const_get::<_, Value>("Categories").unwrap();
|
233
|
+
let categorical_class = pl.const_get::<_, Value>("Categorical").unwrap();
|
234
|
+
let categories: Value = categories_class
|
235
|
+
.funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
|
236
|
+
.unwrap();
|
237
|
+
let kwargs = RHash::new();
|
238
|
+
kwargs.aset(Symbol::new("categories"), categories).unwrap();
|
239
|
+
categorical_class.funcall("new", (kwargs,)).unwrap()
|
240
|
+
}
|
241
|
+
DataType::Enum(_, mapping) => {
|
242
|
+
let categories = unsafe {
|
243
|
+
StringChunked::from_chunks(
|
244
|
+
PlSmallStr::from_static("category"),
|
245
|
+
vec![mapping.to_arrow(true)],
|
246
|
+
)
|
247
|
+
};
|
236
248
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
237
|
-
let
|
238
|
-
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
239
|
-
.unwrap();
|
240
|
-
let series = to_series(s.into());
|
249
|
+
let series = to_series(categories.into_series().into());
|
241
250
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
242
251
|
}
|
243
252
|
DataType::Time => {
|
@@ -277,13 +286,13 @@ impl IntoValue for Wrap<DataType> {
|
|
277
286
|
}
|
278
287
|
}
|
279
288
|
|
289
|
+
enum CategoricalOrdering {
|
290
|
+
Lexical,
|
291
|
+
}
|
292
|
+
|
280
293
|
impl IntoValue for Wrap<CategoricalOrdering> {
|
281
294
|
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
|
283
|
-
CategoricalOrdering::Physical => "physical",
|
284
|
-
CategoricalOrdering::Lexical => "lexical",
|
285
|
-
};
|
286
|
-
ordering.into_value()
|
295
|
+
"lexical".into_value()
|
287
296
|
}
|
288
297
|
}
|
289
298
|
|
@@ -324,8 +333,10 @@ impl TryConvert for Wrap<DataType> {
|
|
324
333
|
"Polars::Boolean" => DataType::Boolean,
|
325
334
|
"Polars::String" => DataType::String,
|
326
335
|
"Polars::Binary" => DataType::Binary,
|
327
|
-
"Polars::Categorical" => DataType::
|
328
|
-
"Polars::Enum" =>
|
336
|
+
"Polars::Categorical" => DataType::from_categories(Categories::global()),
|
337
|
+
"Polars::Enum" => {
|
338
|
+
DataType::from_frozen_categories(FrozenCategories::new([]).unwrap())
|
339
|
+
}
|
329
340
|
"Polars::Date" => DataType::Date,
|
330
341
|
"Polars::Time" => DataType::Time,
|
331
342
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
@@ -340,11 +351,12 @@ impl TryConvert for Wrap<DataType> {
|
|
340
351
|
dt => {
|
341
352
|
return Err(RbValueError::new_err(format!(
|
342
353
|
"{dt} is not a correct polars DataType.",
|
343
|
-
)))
|
354
|
+
)));
|
344
355
|
}
|
345
356
|
}
|
346
357
|
} else if String::try_convert(ob).is_err() {
|
347
|
-
let
|
358
|
+
let cls = ob.class();
|
359
|
+
let name = unsafe { cls.name() }.into_owned();
|
348
360
|
match name.as_str() {
|
349
361
|
"Polars::Int8" => DataType::Int8,
|
350
362
|
"Polars::Int16" => DataType::Int16,
|
@@ -360,17 +372,20 @@ impl TryConvert for Wrap<DataType> {
|
|
360
372
|
"Polars::String" => DataType::String,
|
361
373
|
"Polars::Binary" => DataType::Binary,
|
362
374
|
"Polars::Categorical" => {
|
363
|
-
let
|
364
|
-
|
365
|
-
.
|
366
|
-
DataType::
|
375
|
+
let categories: Value = ob.funcall("categories", ()).unwrap();
|
376
|
+
let rb_categories: &RbCategories =
|
377
|
+
categories.funcall("_categories", ()).unwrap();
|
378
|
+
DataType::from_categories(rb_categories.categories().clone())
|
367
379
|
}
|
368
380
|
"Polars::Enum" => {
|
369
|
-
let categories = ob.funcall("categories", ()).unwrap();
|
381
|
+
let categories: Value = ob.funcall("categories", ()).unwrap();
|
370
382
|
let s = get_series(categories)?;
|
371
383
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
372
384
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
373
|
-
|
385
|
+
assert!(!categories.has_nulls());
|
386
|
+
DataType::from_frozen_categories(
|
387
|
+
FrozenCategories::new(categories.values_iter()).unwrap(),
|
388
|
+
)
|
374
389
|
}
|
375
390
|
"Polars::Date" => DataType::Date,
|
376
391
|
"Polars::Time" => DataType::Time,
|
@@ -420,7 +435,7 @@ impl TryConvert for Wrap<DataType> {
|
|
420
435
|
return Err(RbTypeError::new_err(format!(
|
421
436
|
"A {dt} object is not a correct polars DataType. \
|
422
437
|
Hint: use the class without instantiating it.",
|
423
|
-
)))
|
438
|
+
)));
|
424
439
|
}
|
425
440
|
}
|
426
441
|
} else {
|
@@ -436,7 +451,7 @@ impl TryConvert for Wrap<DataType> {
|
|
436
451
|
"str" => DataType::String,
|
437
452
|
"bin" => DataType::Binary,
|
438
453
|
"bool" => DataType::Boolean,
|
439
|
-
"cat" => DataType::
|
454
|
+
"cat" => DataType::from_categories(Categories::global()),
|
440
455
|
"date" => DataType::Date,
|
441
456
|
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
442
457
|
"f32" => DataType::Float32,
|
@@ -449,9 +464,8 @@ impl TryConvert for Wrap<DataType> {
|
|
449
464
|
"unk" => DataType::Unknown(Default::default()),
|
450
465
|
_ => {
|
451
466
|
return Err(RbValueError::new_err(format!(
|
452
|
-
"{} is not a supported DataType."
|
453
|
-
|
454
|
-
)))
|
467
|
+
"{ob} is not a supported DataType."
|
468
|
+
)));
|
455
469
|
}
|
456
470
|
}
|
457
471
|
};
|
@@ -475,7 +489,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
475
489
|
_ => {
|
476
490
|
return Err(RbTypeError::new_err(format!(
|
477
491
|
"'{key}' is not a valid statistic option",
|
478
|
-
)))
|
492
|
+
)));
|
479
493
|
}
|
480
494
|
}
|
481
495
|
Ok(ForEach::Continue)
|
@@ -519,7 +533,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
519
533
|
}
|
520
534
|
|
521
535
|
enum MutableSources {
|
522
|
-
Paths(Vec<
|
536
|
+
Paths(Vec<PlPath>),
|
523
537
|
Files(Vec<File>),
|
524
538
|
Buffers(Vec<MemSlice>),
|
525
539
|
}
|
@@ -560,7 +574,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
560
574
|
return Err(RbTypeError::new_err(
|
561
575
|
"Cannot combine in-memory bytes, paths and files for scan sources"
|
562
576
|
.to_string(),
|
563
|
-
))
|
577
|
+
));
|
564
578
|
}
|
565
579
|
}
|
566
580
|
}
|
@@ -678,7 +692,7 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
678
692
|
v => {
|
679
693
|
return Err(RbValueError::new_err(format!(
|
680
694
|
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
681
|
-
)))
|
695
|
+
)));
|
682
696
|
}
|
683
697
|
};
|
684
698
|
Ok(Wrap(parsed))
|
@@ -693,7 +707,7 @@ impl TryConvert for Wrap<InterpolationMethod> {
|
|
693
707
|
v => {
|
694
708
|
return Err(RbValueError::new_err(format!(
|
695
709
|
"method must be one of {{'linear', 'nearest'}}, got {v}",
|
696
|
-
)))
|
710
|
+
)));
|
697
711
|
}
|
698
712
|
};
|
699
713
|
Ok(Wrap(parsed))
|
@@ -708,9 +722,8 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
|
|
708
722
|
"deflate" => Some(AvroCompression::Deflate),
|
709
723
|
v => {
|
710
724
|
return Err(RbValueError::new_err(format!(
|
711
|
-
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}"
|
712
|
-
|
713
|
-
)))
|
725
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {v}"
|
726
|
+
)));
|
714
727
|
}
|
715
728
|
};
|
716
729
|
Ok(Wrap(parsed))
|
@@ -720,13 +733,18 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
|
|
720
733
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
721
734
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
722
735
|
let parsed = match String::try_convert(ob)?.as_str() {
|
723
|
-
"physical" => CategoricalOrdering::Physical,
|
724
736
|
"lexical" => CategoricalOrdering::Lexical,
|
737
|
+
"physical" => {
|
738
|
+
polars_warn!(
|
739
|
+
Deprecation,
|
740
|
+
"physical ordering is deprecated, will use lexical ordering instead"
|
741
|
+
);
|
742
|
+
CategoricalOrdering::Lexical
|
743
|
+
}
|
725
744
|
v => {
|
726
745
|
return Err(RbValueError::new_err(format!(
|
727
|
-
"ordering must be one of {{'physical', 'lexical'}}, got {}"
|
728
|
-
|
729
|
-
)))
|
746
|
+
"ordering must be one of {{'physical', 'lexical'}}, got {v}"
|
747
|
+
)));
|
730
748
|
}
|
731
749
|
};
|
732
750
|
Ok(Wrap(parsed))
|
@@ -742,7 +760,7 @@ impl TryConvert for Wrap<StartBy> {
|
|
742
760
|
v => {
|
743
761
|
return Err(RbValueError::new_err(format!(
|
744
762
|
"closed must be one of {{'window', 'datapoint', 'monday'}}, got {v}",
|
745
|
-
)))
|
763
|
+
)));
|
746
764
|
}
|
747
765
|
};
|
748
766
|
Ok(Wrap(parsed))
|
@@ -758,9 +776,8 @@ impl TryConvert for Wrap<ClosedWindow> {
|
|
758
776
|
"none" => ClosedWindow::None,
|
759
777
|
v => {
|
760
778
|
return Err(RbValueError::new_err(format!(
|
761
|
-
"closed must be one of {{'left', 'right', 'both', 'none'}}, got {}"
|
762
|
-
|
763
|
-
)))
|
779
|
+
"closed must be one of {{'left', 'right', 'both', 'none'}}, got {v}"
|
780
|
+
)));
|
764
781
|
}
|
765
782
|
};
|
766
783
|
Ok(Wrap(parsed))
|
@@ -789,9 +806,8 @@ impl TryConvert for Wrap<CsvEncoding> {
|
|
789
806
|
"utf8-lossy" => CsvEncoding::LossyUtf8,
|
790
807
|
v => {
|
791
808
|
return Err(RbValueError::new_err(format!(
|
792
|
-
"encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}"
|
793
|
-
|
794
|
-
)))
|
809
|
+
"encoding must be one of {{'utf8', 'utf8-lossy'}}, got {v}"
|
810
|
+
)));
|
795
811
|
}
|
796
812
|
};
|
797
813
|
Ok(Wrap(parsed))
|
@@ -806,9 +822,8 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
|
|
806
822
|
"zstd" => Some(IpcCompression::ZSTD),
|
807
823
|
v => {
|
808
824
|
return Err(RbValueError::new_err(format!(
|
809
|
-
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}"
|
810
|
-
|
811
|
-
)))
|
825
|
+
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
|
826
|
+
)));
|
812
827
|
}
|
813
828
|
};
|
814
829
|
Ok(Wrap(parsed))
|
@@ -826,9 +841,8 @@ impl TryConvert for Wrap<JoinType> {
|
|
826
841
|
"cross" => JoinType::Cross,
|
827
842
|
v => {
|
828
843
|
return Err(RbValueError::new_err(format!(
|
829
|
-
|
830
|
-
|
831
|
-
)))
|
844
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {v}"
|
845
|
+
)));
|
832
846
|
}
|
833
847
|
};
|
834
848
|
Ok(Wrap(parsed))
|
@@ -844,7 +858,7 @@ impl TryConvert for Wrap<Label> {
|
|
844
858
|
v => {
|
845
859
|
return Err(RbValueError::new_err(format!(
|
846
860
|
"`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
|
847
|
-
)))
|
861
|
+
)));
|
848
862
|
}
|
849
863
|
};
|
850
864
|
Ok(Wrap(parsed))
|
@@ -858,9 +872,8 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
|
858
872
|
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
859
873
|
v => {
|
860
874
|
return Err(RbValueError::new_err(format!(
|
861
|
-
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}"
|
862
|
-
|
863
|
-
)))
|
875
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {v}"
|
876
|
+
)));
|
864
877
|
}
|
865
878
|
};
|
866
879
|
Ok(Wrap(parsed))
|
@@ -875,7 +888,7 @@ impl TryConvert for Wrap<NonExistent> {
|
|
875
888
|
v => {
|
876
889
|
return Err(RbValueError::new_err(format!(
|
877
890
|
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
|
878
|
-
)))
|
891
|
+
)));
|
879
892
|
}
|
880
893
|
};
|
881
894
|
Ok(Wrap(parsed))
|
@@ -889,9 +902,8 @@ impl TryConvert for Wrap<NullBehavior> {
|
|
889
902
|
"ignore" => NullBehavior::Ignore,
|
890
903
|
v => {
|
891
904
|
return Err(RbValueError::new_err(format!(
|
892
|
-
"null behavior must be one of {{'drop', 'ignore'}}, got {}"
|
893
|
-
|
894
|
-
)))
|
905
|
+
"null behavior must be one of {{'drop', 'ignore'}}, got {v}"
|
906
|
+
)));
|
895
907
|
}
|
896
908
|
};
|
897
909
|
Ok(Wrap(parsed))
|
@@ -905,9 +917,8 @@ impl TryConvert for Wrap<NullStrategy> {
|
|
905
917
|
"propagate" => NullStrategy::Propagate,
|
906
918
|
v => {
|
907
919
|
return Err(RbValueError::new_err(format!(
|
908
|
-
"null strategy must be one of {{'ignore', 'propagate'}}, got {}"
|
909
|
-
|
910
|
-
)))
|
920
|
+
"null strategy must be one of {{'ignore', 'propagate'}}, got {v}"
|
921
|
+
)));
|
911
922
|
}
|
912
923
|
};
|
913
924
|
Ok(Wrap(parsed))
|
@@ -923,9 +934,8 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
923
934
|
"none" => ParallelStrategy::None,
|
924
935
|
v => {
|
925
936
|
return Err(RbValueError::new_err(format!(
|
926
|
-
"parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}"
|
927
|
-
|
928
|
-
)))
|
937
|
+
"parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {v}"
|
938
|
+
)));
|
929
939
|
}
|
930
940
|
};
|
931
941
|
Ok(Wrap(parsed))
|
@@ -942,9 +952,8 @@ impl TryConvert for Wrap<QuantileMethod> {
|
|
942
952
|
"midpoint" => QuantileMethod::Midpoint,
|
943
953
|
v => {
|
944
954
|
return Err(RbValueError::new_err(format!(
|
945
|
-
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}"
|
946
|
-
|
947
|
-
)))
|
955
|
+
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {v}"
|
956
|
+
)));
|
948
957
|
}
|
949
958
|
};
|
950
959
|
Ok(Wrap(parsed))
|
@@ -962,9 +971,8 @@ impl TryConvert for Wrap<RankMethod> {
|
|
962
971
|
"random" => RankMethod::Random,
|
963
972
|
v => {
|
964
973
|
return Err(RbValueError::new_err(format!(
|
965
|
-
"method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {}"
|
966
|
-
|
967
|
-
)))
|
974
|
+
"method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {v}"
|
975
|
+
)));
|
968
976
|
}
|
969
977
|
};
|
970
978
|
Ok(Wrap(parsed))
|
@@ -979,15 +987,16 @@ impl TryConvert for Wrap<TimeUnit> {
|
|
979
987
|
"ms" => TimeUnit::Milliseconds,
|
980
988
|
v => {
|
981
989
|
return Err(RbValueError::new_err(format!(
|
982
|
-
"time unit must be one of {{'ns', 'us', 'ms'}}, got {}"
|
983
|
-
|
984
|
-
)))
|
990
|
+
"time unit must be one of {{'ns', 'us', 'ms'}}, got {v}"
|
991
|
+
)));
|
985
992
|
}
|
986
993
|
};
|
987
994
|
Ok(Wrap(parsed))
|
988
995
|
}
|
989
996
|
}
|
990
997
|
|
998
|
+
unsafe impl TryConvertOwned for Wrap<TimeUnit> {}
|
999
|
+
|
991
1000
|
impl TryConvert for Wrap<UniqueKeepStrategy> {
|
992
1001
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
993
1002
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -995,9 +1004,8 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
995
1004
|
"last" => UniqueKeepStrategy::Last,
|
996
1005
|
v => {
|
997
1006
|
return Err(RbValueError::new_err(format!(
|
998
|
-
"keep must be one of {{'first', 'last'}}, got {}"
|
999
|
-
|
1000
|
-
)))
|
1007
|
+
"keep must be one of {{'first', 'last'}}, got {v}"
|
1008
|
+
)));
|
1001
1009
|
}
|
1002
1010
|
};
|
1003
1011
|
Ok(Wrap(parsed))
|
@@ -1011,9 +1019,8 @@ impl TryConvert for Wrap<IpcCompression> {
|
|
1011
1019
|
"zstd" => IpcCompression::ZSTD,
|
1012
1020
|
v => {
|
1013
1021
|
return Err(RbValueError::new_err(format!(
|
1014
|
-
"compression must be one of {{'lz4', 'zstd'}}, got {}"
|
1015
|
-
|
1016
|
-
)))
|
1022
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {v}"
|
1023
|
+
)));
|
1017
1024
|
}
|
1018
1025
|
};
|
1019
1026
|
Ok(Wrap(parsed))
|
@@ -1029,7 +1036,7 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1029
1036
|
v => {
|
1030
1037
|
return Err(RbValueError::new_err(format!(
|
1031
1038
|
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
1032
|
-
)))
|
1039
|
+
)));
|
1033
1040
|
}
|
1034
1041
|
};
|
1035
1042
|
Ok(Wrap(parsed))
|
@@ -1046,7 +1053,7 @@ impl TryConvert for Wrap<ClosedInterval> {
|
|
1046
1053
|
v => {
|
1047
1054
|
return Err(RbValueError::new_err(format!(
|
1048
1055
|
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
1049
|
-
)))
|
1056
|
+
)));
|
1050
1057
|
}
|
1051
1058
|
};
|
1052
1059
|
Ok(Wrap(parsed))
|
@@ -1061,8 +1068,8 @@ impl TryConvert for Wrap<WindowMapping> {
|
|
1061
1068
|
"explode" => WindowMapping::Explode,
|
1062
1069
|
v => {
|
1063
1070
|
return Err(RbValueError::new_err(format!(
|
1064
|
-
|
1065
|
-
|
1071
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
1072
|
+
)));
|
1066
1073
|
}
|
1067
1074
|
};
|
1068
1075
|
Ok(Wrap(parsed))
|
@@ -1079,7 +1086,7 @@ impl TryConvert for Wrap<JoinValidation> {
|
|
1079
1086
|
v => {
|
1080
1087
|
return Err(RbValueError::new_err(format!(
|
1081
1088
|
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
1082
|
-
)))
|
1089
|
+
)));
|
1083
1090
|
}
|
1084
1091
|
};
|
1085
1092
|
Ok(Wrap(parsed))
|
@@ -1097,8 +1104,8 @@ impl TryConvert for Wrap<MaintainOrderJoin> {
|
|
1097
1104
|
v => {
|
1098
1105
|
return Err(RbValueError::new_err(format!(
|
1099
1106
|
"`maintain_order` must be one of {{'none', 'left', 'right', 'left_right', 'right_left'}}, got {v}",
|
1100
|
-
)))
|
1101
|
-
}
|
1107
|
+
)));
|
1108
|
+
}
|
1102
1109
|
};
|
1103
1110
|
Ok(Wrap(parsed))
|
1104
1111
|
}
|
@@ -1114,8 +1121,8 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
1114
1121
|
v => {
|
1115
1122
|
return Err(RbValueError::new_err(format!(
|
1116
1123
|
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
1117
|
-
)))
|
1118
|
-
}
|
1124
|
+
)));
|
1125
|
+
}
|
1119
1126
|
};
|
1120
1127
|
Ok(Wrap(parsed))
|
1121
1128
|
}
|
@@ -1126,6 +1133,33 @@ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbRes
|
|
1126
1133
|
Ok(out)
|
1127
1134
|
}
|
1128
1135
|
|
1136
|
+
impl TryConvert for Wrap<SetOperation> {
|
1137
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1138
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1139
|
+
"union" => SetOperation::Union,
|
1140
|
+
"difference" => SetOperation::Difference,
|
1141
|
+
"intersection" => SetOperation::Intersection,
|
1142
|
+
"symmetric_difference" => SetOperation::SymmetricDifference,
|
1143
|
+
v => {
|
1144
|
+
return Err(RbValueError::new_err(format!(
|
1145
|
+
"set operation must be one of {{'union', 'difference', 'intersection', 'symmetric_difference'}}, got {v}",
|
1146
|
+
)));
|
1147
|
+
}
|
1148
|
+
};
|
1149
|
+
Ok(Wrap(parsed))
|
1150
|
+
}
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
impl TryConvert for Wrap<CastColumnsPolicy> {
|
1154
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1155
|
+
if ob.is_nil() {
|
1156
|
+
let out = Wrap(CastColumnsPolicy::ERROR_ON_MISMATCH);
|
1157
|
+
return Ok(out);
|
1158
|
+
}
|
1159
|
+
todo!();
|
1160
|
+
}
|
1161
|
+
}
|
1162
|
+
|
1129
1163
|
pub fn parse_fill_null_strategy(
|
1130
1164
|
strategy: &str,
|
1131
1165
|
limit: FillNullLimit,
|
@@ -1139,10 +1173,12 @@ pub fn parse_fill_null_strategy(
|
|
1139
1173
|
"zero" => FillNullStrategy::Zero,
|
1140
1174
|
"one" => FillNullStrategy::One,
|
1141
1175
|
e => {
|
1142
|
-
return Err(magnus::Error::new(
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1176
|
+
return Err(magnus::Error::new(
|
1177
|
+
exception::runtime_error(),
|
1178
|
+
format!(
|
1179
|
+
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
|
1180
|
+
),
|
1181
|
+
));
|
1146
1182
|
}
|
1147
1183
|
};
|
1148
1184
|
Ok(parsed)
|
@@ -1159,7 +1195,7 @@ pub fn parse_parquet_compression(
|
|
1159
1195
|
compression_level
|
1160
1196
|
.map(|lvl| {
|
1161
1197
|
GzipLevel::try_new(lvl as u8)
|
1162
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}"
|
1198
|
+
.map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1163
1199
|
})
|
1164
1200
|
.transpose()?,
|
1165
1201
|
),
|
@@ -1168,7 +1204,7 @@ pub fn parse_parquet_compression(
|
|
1168
1204
|
compression_level
|
1169
1205
|
.map(|lvl| {
|
1170
1206
|
BrotliLevel::try_new(lvl as u32)
|
1171
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}"
|
1207
|
+
.map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1172
1208
|
})
|
1173
1209
|
.transpose()?,
|
1174
1210
|
),
|
@@ -1176,16 +1212,14 @@ pub fn parse_parquet_compression(
|
|
1176
1212
|
"zstd" => ParquetCompression::Zstd(
|
1177
1213
|
compression_level
|
1178
1214
|
.map(|lvl| {
|
1179
|
-
ZstdLevel::try_new(lvl)
|
1180
|
-
.map_err(|e| RbValueError::new_err(format!("{:?}", e)))
|
1215
|
+
ZstdLevel::try_new(lvl).map_err(|e| RbValueError::new_err(format!("{e:?}")))
|
1181
1216
|
})
|
1182
1217
|
.transpose()?,
|
1183
1218
|
),
|
1184
1219
|
e => {
|
1185
1220
|
return Err(RbValueError::new_err(format!(
|
1186
|
-
"compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {}"
|
1187
|
-
|
1188
|
-
)))
|
1221
|
+
"compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {e}"
|
1222
|
+
)));
|
1189
1223
|
}
|
1190
1224
|
};
|
1191
1225
|
Ok(parsed)
|
@@ -1245,3 +1279,47 @@ impl TryConvert for Wrap<Option<TimeZone>> {
|
|
1245
1279
|
Ok(Wrap(TimeZone::opt_try_new(tz).map_err(RbPolarsErr::from)?))
|
1246
1280
|
}
|
1247
1281
|
}
|
1282
|
+
|
1283
|
+
unsafe impl TryConvertOwned for Wrap<Option<TimeZone>> {}
|
1284
|
+
|
1285
|
+
impl TryConvert for Wrap<ExtraColumnsPolicy> {
|
1286
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1287
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1288
|
+
"ignore" => ExtraColumnsPolicy::Ignore,
|
1289
|
+
"raise" => ExtraColumnsPolicy::Raise,
|
1290
|
+
v => {
|
1291
|
+
return Err(RbValueError::new_err(format!(
|
1292
|
+
"extra column/field parameter must be one of {{'ignore', 'raise'}}, got {v}",
|
1293
|
+
)));
|
1294
|
+
}
|
1295
|
+
};
|
1296
|
+
Ok(Wrap(parsed))
|
1297
|
+
}
|
1298
|
+
}
|
1299
|
+
|
1300
|
+
impl TryConvert for Wrap<MissingColumnsPolicy> {
|
1301
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1302
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
1303
|
+
"insert" => MissingColumnsPolicy::Insert,
|
1304
|
+
"raise" => MissingColumnsPolicy::Raise,
|
1305
|
+
v => {
|
1306
|
+
return Err(RbValueError::new_err(format!(
|
1307
|
+
"missing column/field parameter must be one of {{'insert', 'raise'}}, got {v}",
|
1308
|
+
)));
|
1309
|
+
}
|
1310
|
+
};
|
1311
|
+
Ok(Wrap(parsed))
|
1312
|
+
}
|
1313
|
+
}
|
1314
|
+
|
1315
|
+
impl TryConvert for Wrap<ColumnMapping> {
|
1316
|
+
fn try_convert(_ob: Value) -> RbResult<Self> {
|
1317
|
+
todo!()
|
1318
|
+
}
|
1319
|
+
}
|
1320
|
+
|
1321
|
+
impl TryConvert for Wrap<DeletionFilesList> {
|
1322
|
+
fn try_convert(_ob: Value) -> RbResult<Self> {
|
1323
|
+
todo!();
|
1324
|
+
}
|
1325
|
+
}
|
@@ -1,5 +1,5 @@
|
|
1
|
-
use magnus::{
|
2
|
-
use polars::frame::row::{rows_to_schema_supertypes, rows_to_supertypes
|
1
|
+
use magnus::{RArray, RHash, Symbol, Value, prelude::*, r_hash::ForEach};
|
2
|
+
use polars::frame::row::{Row, rows_to_schema_supertypes, rows_to_supertypes};
|
3
3
|
use polars::prelude::*;
|
4
4
|
|
5
5
|
use super::*;
|
@@ -1,9 +1,9 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{IntoValue, RArray, Value, prelude::*};
|
2
2
|
|
3
3
|
use super::*;
|
4
|
+
use crate::RbResult;
|
4
5
|
use crate::conversion::{ObjectValue, Wrap};
|
5
6
|
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
-
use crate::RbResult;
|
7
7
|
|
8
8
|
impl RbDataFrame {
|
9
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|