polars-df 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +36 -44
- data/ext/polars/src/expr/list.rs +27 -17
- data/ext/polars/src/expr/meta.rs +18 -41
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -7
- data/ext/polars/src/file.rs +12 -6
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/range.rs +14 -10
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +75 -113
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +104 -26
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
@@ -1,7 +1,7 @@
|
|
1
1
|
use std::hash::BuildHasher;
|
2
2
|
|
3
3
|
use either::Either;
|
4
|
-
use magnus::{
|
4
|
+
use magnus::{IntoValue, RArray, Value, prelude::*, typed_data::Obj};
|
5
5
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
6
6
|
use polars::prelude::*;
|
7
7
|
|
@@ -416,17 +416,19 @@ impl RbDataFrame {
|
|
416
416
|
columns: Option<Vec<String>>,
|
417
417
|
separator: Option<String>,
|
418
418
|
drop_first: bool,
|
419
|
+
drop_nulls: bool,
|
419
420
|
) -> RbResult<Self> {
|
420
421
|
let df = match columns {
|
421
422
|
Some(cols) => self.df.borrow().columns_to_dummies(
|
422
423
|
cols.iter().map(|x| x as &str).collect(),
|
423
424
|
separator.as_deref(),
|
424
425
|
drop_first,
|
426
|
+
drop_nulls,
|
425
427
|
),
|
426
428
|
None => self
|
427
429
|
.df
|
428
430
|
.borrow()
|
429
|
-
.to_dummies(separator.as_deref(), drop_first),
|
431
|
+
.to_dummies(separator.as_deref(), drop_first, drop_nulls),
|
430
432
|
}
|
431
433
|
.map_err(RbPolarsErr::from)?;
|
432
434
|
Ok(df.into())
|
@@ -0,0 +1,14 @@
|
|
1
|
+
use polars::prelude::DataTypeExpr;
|
2
|
+
|
3
|
+
#[magnus::wrap(class = "Polars::RbDataTypeExpr")]
|
4
|
+
#[repr(transparent)]
|
5
|
+
#[derive(Clone)]
|
6
|
+
pub struct RbDataTypeExpr {
|
7
|
+
pub inner: DataTypeExpr,
|
8
|
+
}
|
9
|
+
|
10
|
+
impl From<DataTypeExpr> for RbDataTypeExpr {
|
11
|
+
fn from(expr: DataTypeExpr) -> Self {
|
12
|
+
RbDataTypeExpr { inner: expr }
|
13
|
+
}
|
14
|
+
}
|
@@ -1,15 +1,16 @@
|
|
1
1
|
use std::ops::Neg;
|
2
2
|
|
3
|
-
use magnus::{
|
3
|
+
use magnus::{RArray, Value};
|
4
4
|
use polars::lazy::dsl;
|
5
5
|
use polars::prelude::*;
|
6
6
|
use polars::series::ops::NullBehavior;
|
7
7
|
use polars_core::series::IsSorted;
|
8
8
|
|
9
|
-
use
|
9
|
+
use super::selector::RbSelector;
|
10
|
+
use crate::conversion::{Wrap, parse_fill_null_strategy};
|
10
11
|
use crate::map::lazy::map_single;
|
11
12
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::{RbExpr, RbResult};
|
13
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
13
14
|
|
14
15
|
impl RbExpr {
|
15
16
|
pub fn add(&self, rhs: &Self) -> RbResult<Self> {
|
@@ -276,17 +277,8 @@ impl RbExpr {
|
|
276
277
|
.into()
|
277
278
|
}
|
278
279
|
|
279
|
-
pub fn arg_sort(&self,
|
280
|
-
self.clone()
|
281
|
-
.inner
|
282
|
-
.arg_sort(SortOptions {
|
283
|
-
descending: reverse,
|
284
|
-
nulls_last,
|
285
|
-
multithreaded: true,
|
286
|
-
maintain_order: false,
|
287
|
-
limit: None,
|
288
|
-
})
|
289
|
-
.into()
|
280
|
+
pub fn arg_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
281
|
+
self.inner.clone().arg_sort(descending, nulls_last).into()
|
290
282
|
}
|
291
283
|
|
292
284
|
pub fn top_k(&self, k: &Self) -> Self {
|
@@ -331,10 +323,15 @@ impl RbExpr {
|
|
331
323
|
self.inner.clone().arg_min().into()
|
332
324
|
}
|
333
325
|
|
334
|
-
pub fn search_sorted(
|
326
|
+
pub fn search_sorted(
|
327
|
+
&self,
|
328
|
+
element: &Self,
|
329
|
+
side: Wrap<SearchSortedSide>,
|
330
|
+
descending: bool,
|
331
|
+
) -> Self {
|
335
332
|
self.inner
|
336
333
|
.clone()
|
337
|
-
.search_sorted(element.inner.clone(), side.0)
|
334
|
+
.search_sorted(element.inner.clone(), side.0, descending)
|
338
335
|
.into()
|
339
336
|
}
|
340
337
|
|
@@ -389,16 +386,8 @@ impl RbExpr {
|
|
389
386
|
strategy: String,
|
390
387
|
limit: FillNullLimit,
|
391
388
|
) -> RbResult<Self> {
|
392
|
-
let
|
393
|
-
Ok(self
|
394
|
-
.inner
|
395
|
-
.clone()
|
396
|
-
.apply(
|
397
|
-
move |s| s.fill_null(strat).map(Some),
|
398
|
-
GetOutput::same_type(),
|
399
|
-
)
|
400
|
-
.with_fmt("fill_null_with_strategy")
|
401
|
-
.into())
|
389
|
+
let strategy = parse_fill_null_strategy(&strategy, limit)?;
|
390
|
+
Ok(self.inner.clone().fill_null_with_strategy(strategy).into())
|
402
391
|
}
|
403
392
|
|
404
393
|
pub fn fill_nan(&self, expr: &Self) -> Self {
|
@@ -658,10 +647,6 @@ impl RbExpr {
|
|
658
647
|
self.inner.clone().mode().into()
|
659
648
|
}
|
660
649
|
|
661
|
-
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
662
|
-
self.inner.clone().exclude(columns).into()
|
663
|
-
}
|
664
|
-
|
665
650
|
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
666
651
|
self.inner.clone().interpolate(method.0).into()
|
667
652
|
}
|
@@ -678,10 +663,10 @@ impl RbExpr {
|
|
678
663
|
self.inner.clone().upper_bound().into()
|
679
664
|
}
|
680
665
|
|
681
|
-
pub fn cumulative_eval(&self, expr: &Self,
|
666
|
+
pub fn cumulative_eval(&self, expr: &Self, min_samples: usize) -> Self {
|
682
667
|
self.inner
|
683
668
|
.clone()
|
684
|
-
.cumulative_eval(expr.inner.clone(),
|
669
|
+
.cumulative_eval(expr.inner.clone(), min_samples)
|
685
670
|
.into()
|
686
671
|
}
|
687
672
|
|
@@ -807,20 +792,10 @@ impl RbExpr {
|
|
807
792
|
self.inner.clone().ewm_var(options).into()
|
808
793
|
}
|
809
794
|
|
810
|
-
pub fn extend_constant(&self, value:
|
811
|
-
let value = value.into_value();
|
812
|
-
let value = Opaque::from(value);
|
795
|
+
pub fn extend_constant(&self, value: &Self, n: &Self) -> Self {
|
813
796
|
self.inner
|
814
797
|
.clone()
|
815
|
-
.
|
816
|
-
move |s| {
|
817
|
-
let value = Ruby::get().unwrap().get_inner(value);
|
818
|
-
let value = Wrap::<AnyValue>::try_convert(value).unwrap().0;
|
819
|
-
s.extend_constant(value, n).map(Some)
|
820
|
-
},
|
821
|
-
GetOutput::same_type(),
|
822
|
-
)
|
823
|
-
.with_fmt("extend")
|
798
|
+
.extend_constant(value.inner.clone(), n.inner.clone())
|
824
799
|
.into()
|
825
800
|
}
|
826
801
|
|
@@ -881,4 +856,21 @@ impl RbExpr {
|
|
881
856
|
)
|
882
857
|
.into()
|
883
858
|
}
|
859
|
+
|
860
|
+
#[allow(clippy::wrong_self_convention)]
|
861
|
+
pub fn into_selector(&self) -> RbResult<RbSelector> {
|
862
|
+
Ok(self
|
863
|
+
.inner
|
864
|
+
.clone()
|
865
|
+
.into_selector()
|
866
|
+
.ok_or_else(
|
867
|
+
|| polars_err!(InvalidOperation: "expr `{}` is not a selector", &self.inner),
|
868
|
+
)
|
869
|
+
.map_err(RbPolarsErr::from)?
|
870
|
+
.into())
|
871
|
+
}
|
872
|
+
|
873
|
+
pub fn new_selector(selector: &RbSelector) -> Self {
|
874
|
+
Expr::Selector(selector.inner.clone()).into()
|
875
|
+
}
|
884
876
|
}
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{prelude::*, value::Opaque
|
1
|
+
use magnus::{Ruby, Value, prelude::*, value::Opaque};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -43,11 +43,15 @@ impl RbExpr {
|
|
43
43
|
Ok(self.inner.clone().list().diff(n, null_behavior.0).into())
|
44
44
|
}
|
45
45
|
|
46
|
-
pub fn list_eval(&self, expr: &RbExpr
|
46
|
+
pub fn list_eval(&self, expr: &RbExpr) -> Self {
|
47
|
+
self.inner.clone().list().eval(expr.inner.clone()).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn list_filter(&self, predicate: &RbExpr) -> Self {
|
47
51
|
self.inner
|
48
52
|
.clone()
|
49
53
|
.list()
|
50
|
-
.eval(
|
54
|
+
.eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
|
51
55
|
.into()
|
52
56
|
}
|
53
57
|
|
@@ -76,12 +80,7 @@ impl RbExpr {
|
|
76
80
|
}
|
77
81
|
|
78
82
|
pub fn list_mean(&self) -> Self {
|
79
|
-
self.inner
|
80
|
-
.clone()
|
81
|
-
.list()
|
82
|
-
.mean()
|
83
|
-
.with_fmt("list.mean")
|
84
|
-
.into()
|
83
|
+
self.inner.clone().list().mean().into()
|
85
84
|
}
|
86
85
|
|
87
86
|
pub fn list_min(&self) -> Self {
|
@@ -116,20 +115,20 @@ impl RbExpr {
|
|
116
115
|
self.inner.clone().list().tail(n.inner.clone()).into()
|
117
116
|
}
|
118
117
|
|
119
|
-
pub fn list_sort(&self,
|
118
|
+
pub fn list_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
120
119
|
self.inner
|
121
120
|
.clone()
|
122
121
|
.list()
|
123
|
-
.sort(
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
122
|
+
.sort(
|
123
|
+
SortOptions::default()
|
124
|
+
.with_order_descending(descending)
|
125
|
+
.with_nulls_last(nulls_last),
|
126
|
+
)
|
128
127
|
.into()
|
129
128
|
}
|
130
129
|
|
131
130
|
pub fn list_sum(&self) -> Self {
|
132
|
-
self.inner.clone().list().sum().
|
131
|
+
self.inner.clone().list().sum().into()
|
133
132
|
}
|
134
133
|
|
135
134
|
pub fn list_drop_nulls(&self) -> Self {
|
@@ -198,7 +197,7 @@ impl RbExpr {
|
|
198
197
|
.inner
|
199
198
|
.clone()
|
200
199
|
.list()
|
201
|
-
.to_struct(
|
200
|
+
.to_struct(ListToStruct::InferWidth {
|
202
201
|
infer_field_strategy: width_strat.0,
|
203
202
|
get_index_name: name_gen,
|
204
203
|
max_fields: upper_bound,
|
@@ -215,4 +214,15 @@ impl RbExpr {
|
|
215
214
|
e.list().unique().into()
|
216
215
|
}
|
217
216
|
}
|
217
|
+
|
218
|
+
pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
|
219
|
+
let e = self.inner.clone().list();
|
220
|
+
match operation.0 {
|
221
|
+
SetOperation::Intersection => e.set_intersection(other.inner.clone()),
|
222
|
+
SetOperation::Difference => e.set_difference(other.inner.clone()),
|
223
|
+
SetOperation::Union => e.union(other.inner.clone()),
|
224
|
+
SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
|
225
|
+
}
|
226
|
+
.into()
|
227
|
+
}
|
218
228
|
}
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -1,14 +1,21 @@
|
|
1
1
|
use magnus::RArray;
|
2
|
+
use polars::prelude::Schema;
|
2
3
|
|
3
|
-
use crate::{RbExpr, RbPolarsErr, RbResult};
|
4
|
+
use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
|
4
5
|
|
5
6
|
impl RbExpr {
|
6
7
|
pub fn meta_eq(&self, other: &RbExpr) -> bool {
|
7
8
|
self.inner == other.inner
|
8
9
|
}
|
9
10
|
|
10
|
-
pub fn meta_pop(&self) -> RbResult<RArray> {
|
11
|
-
let
|
11
|
+
pub fn meta_pop(&self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
|
12
|
+
let schema = schema.as_ref().map(|s| &s.0);
|
13
|
+
let exprs = self
|
14
|
+
.inner
|
15
|
+
.clone()
|
16
|
+
.meta()
|
17
|
+
.pop(schema)
|
18
|
+
.map_err(RbPolarsErr::from)?;
|
12
19
|
Ok(RArray::from_iter(
|
13
20
|
exprs.iter().map(|e| RbExpr::from(e.clone())),
|
14
21
|
))
|
@@ -50,51 +57,21 @@ impl RbExpr {
|
|
50
57
|
self.inner.clone().meta().is_regex_projection()
|
51
58
|
}
|
52
59
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
._selector_add(other.inner.clone())
|
59
|
-
.map_err(RbPolarsErr::from)?;
|
60
|
-
Ok(out.into())
|
61
|
-
}
|
62
|
-
|
63
|
-
pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
64
|
-
let out = self
|
65
|
-
.inner
|
66
|
-
.clone()
|
67
|
-
.meta()
|
68
|
-
._selector_sub(other.inner.clone())
|
69
|
-
.map_err(RbPolarsErr::from)?;
|
70
|
-
Ok(out.into())
|
71
|
-
}
|
72
|
-
|
73
|
-
pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
74
|
-
let out = self
|
75
|
-
.inner
|
76
|
-
.clone()
|
77
|
-
.meta()
|
78
|
-
._selector_and(other.inner.clone())
|
79
|
-
.map_err(RbPolarsErr::from)?;
|
80
|
-
Ok(out.into())
|
81
|
-
}
|
82
|
-
|
83
|
-
pub fn _meta_as_selector(&self) -> RbExpr {
|
84
|
-
self.inner.clone().meta()._into_selector().into()
|
85
|
-
}
|
86
|
-
|
87
|
-
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
60
|
+
fn compute_tree_format(
|
61
|
+
&self,
|
62
|
+
display_as_dot: bool,
|
63
|
+
schema: Option<Wrap<Schema>>,
|
64
|
+
) -> RbResult<String> {
|
88
65
|
let e = self
|
89
66
|
.inner
|
90
67
|
.clone()
|
91
68
|
.meta()
|
92
|
-
.into_tree_formatter(display_as_dot)
|
69
|
+
.into_tree_formatter(display_as_dot, schema.as_ref().map(|s| &s.0))
|
93
70
|
.map_err(RbPolarsErr::from)?;
|
94
71
|
Ok(format!("{e}"))
|
95
72
|
}
|
96
73
|
|
97
|
-
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
-
self.compute_tree_format(false)
|
74
|
+
pub fn meta_tree_format(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
|
75
|
+
self.compute_tree_format(false, schema)
|
99
76
|
}
|
100
77
|
}
|
data/ext/polars/src/expr/mod.rs
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
mod array;
|
2
2
|
mod binary;
|
3
3
|
mod categorical;
|
4
|
+
pub mod datatype;
|
4
5
|
mod datetime;
|
5
6
|
mod general;
|
6
7
|
mod list;
|
7
8
|
mod meta;
|
8
9
|
mod name;
|
9
10
|
mod rolling;
|
11
|
+
pub mod selector;
|
10
12
|
mod string;
|
11
13
|
mod r#struct;
|
12
14
|
|
13
|
-
use magnus::{prelude
|
15
|
+
use magnus::{RArray, prelude::*};
|
14
16
|
use polars::lazy::dsl::Expr;
|
15
17
|
|
16
18
|
use crate::RbResult;
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{block::Proc, value::Opaque
|
1
|
+
use magnus::{Ruby, block::Proc, value::Opaque};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_utils::format_pl_smallstr;
|
4
4
|
|
@@ -20,7 +20,7 @@ impl RbExpr {
|
|
20
20
|
match out {
|
21
21
|
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
22
22
|
Err(e) => Err(PolarsError::ComputeError(
|
23
|
-
format!("Ruby function in 'name.map' produced an error: {}."
|
23
|
+
format!("Ruby function in 'name.map' produced an error: {e}.").into(),
|
24
24
|
)),
|
25
25
|
}
|
26
26
|
})
|
@@ -0,0 +1,219 @@
|
|
1
|
+
use std::hash::{Hash, Hasher};
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use polars::prelude::{
|
5
|
+
DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
|
6
|
+
};
|
7
|
+
use polars_plan::dsl;
|
8
|
+
|
9
|
+
use crate::prelude::Wrap;
|
10
|
+
use crate::{RbResult, RbTypeError};
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbSelector")]
|
13
|
+
#[repr(transparent)]
|
14
|
+
#[derive(Clone)]
|
15
|
+
pub struct RbSelector {
|
16
|
+
pub inner: Selector,
|
17
|
+
}
|
18
|
+
|
19
|
+
impl From<Selector> for RbSelector {
|
20
|
+
fn from(inner: Selector) -> Self {
|
21
|
+
Self { inner }
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
|
26
|
+
let mut tu = TimeUnitSet::empty();
|
27
|
+
for v in time_units {
|
28
|
+
match v.0 {
|
29
|
+
TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
|
30
|
+
TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
|
31
|
+
TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
|
32
|
+
}
|
33
|
+
}
|
34
|
+
tu
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
|
38
|
+
selector.inner.clone().to_dtype_selector().ok_or_else(|| {
|
39
|
+
RbTypeError::new_err(format!(
|
40
|
+
"expected datatype based expression got '{}'",
|
41
|
+
selector.inner
|
42
|
+
))
|
43
|
+
})
|
44
|
+
}
|
45
|
+
|
46
|
+
impl RbSelector {
|
47
|
+
pub fn union(&self, other: &Self) -> Self {
|
48
|
+
Self {
|
49
|
+
inner: self.inner.clone() | other.inner.clone(),
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
pub fn difference(&self, other: &Self) -> Self {
|
54
|
+
Self {
|
55
|
+
inner: self.inner.clone() - other.inner.clone(),
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn exclusive_or(&self, other: &Self) -> Self {
|
60
|
+
Self {
|
61
|
+
inner: self.inner.clone() ^ other.inner.clone(),
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
pub fn intersect(&self, other: &Self) -> Self {
|
66
|
+
Self {
|
67
|
+
inner: self.inner.clone() & other.inner.clone(),
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
|
72
|
+
let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
|
73
|
+
dsl::dtype_cols(dtypes).as_selector().into()
|
74
|
+
}
|
75
|
+
|
76
|
+
pub fn by_name(names: Vec<String>, strict: bool) -> Self {
|
77
|
+
dsl::by_name(names, strict).into()
|
78
|
+
}
|
79
|
+
|
80
|
+
pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
|
81
|
+
Selector::ByIndex {
|
82
|
+
indices: indices.into(),
|
83
|
+
strict,
|
84
|
+
}
|
85
|
+
.into()
|
86
|
+
}
|
87
|
+
|
88
|
+
pub fn first(strict: bool) -> Self {
|
89
|
+
Selector::ByIndex {
|
90
|
+
indices: [0].into(),
|
91
|
+
strict,
|
92
|
+
}
|
93
|
+
.into()
|
94
|
+
}
|
95
|
+
|
96
|
+
pub fn last(strict: bool) -> Self {
|
97
|
+
Selector::ByIndex {
|
98
|
+
indices: [-1].into(),
|
99
|
+
strict,
|
100
|
+
}
|
101
|
+
.into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn matches(pattern: String) -> Self {
|
105
|
+
Selector::Matches(pattern.into()).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn enum_() -> Self {
|
109
|
+
DataTypeSelector::Enum.as_selector().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn categorical() -> Self {
|
113
|
+
DataTypeSelector::Categorical.as_selector().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn nested() -> Self {
|
117
|
+
DataTypeSelector::Nested.as_selector().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
|
121
|
+
let inner_dst = match inner_dst {
|
122
|
+
None => None,
|
123
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
124
|
+
};
|
125
|
+
Ok(DataTypeSelector::List(inner_dst).as_selector().into())
|
126
|
+
}
|
127
|
+
|
128
|
+
pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
|
129
|
+
let inner_dst = match inner_dst {
|
130
|
+
None => None,
|
131
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
132
|
+
};
|
133
|
+
Ok(DataTypeSelector::Array(inner_dst, width)
|
134
|
+
.as_selector()
|
135
|
+
.into())
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn struct_() -> Self {
|
139
|
+
DataTypeSelector::Struct.as_selector().into()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn integer() -> Self {
|
143
|
+
DataTypeSelector::Integer.as_selector().into()
|
144
|
+
}
|
145
|
+
|
146
|
+
pub fn signed_integer() -> Self {
|
147
|
+
DataTypeSelector::SignedInteger.as_selector().into()
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn unsigned_integer() -> Self {
|
151
|
+
DataTypeSelector::UnsignedInteger.as_selector().into()
|
152
|
+
}
|
153
|
+
|
154
|
+
pub fn float() -> Self {
|
155
|
+
DataTypeSelector::Float.as_selector().into()
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn decimal() -> Self {
|
159
|
+
DataTypeSelector::Decimal.as_selector().into()
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn numeric() -> Self {
|
163
|
+
DataTypeSelector::Numeric.as_selector().into()
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn temporal() -> Self {
|
167
|
+
DataTypeSelector::Temporal.as_selector().into()
|
168
|
+
}
|
169
|
+
|
170
|
+
pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
|
171
|
+
use TimeZoneSet as TZS;
|
172
|
+
|
173
|
+
let mut allow_unset = false;
|
174
|
+
let mut allow_set = false;
|
175
|
+
let mut any_of: Vec<TimeZone> = Vec::new();
|
176
|
+
|
177
|
+
let tu = parse_time_unit_set(tu);
|
178
|
+
for t in tz {
|
179
|
+
let t = t.0;
|
180
|
+
match t {
|
181
|
+
None => allow_unset = true,
|
182
|
+
Some(s) if s.as_str() == "*" => allow_set = true,
|
183
|
+
Some(t) => any_of.push(t),
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
let tzs = match (allow_unset, allow_set) {
|
188
|
+
(true, true) => TZS::Any,
|
189
|
+
(false, true) => TZS::AnySet,
|
190
|
+
(true, false) if any_of.is_empty() => TZS::Unset,
|
191
|
+
(true, false) => TZS::UnsetOrAnyOf(any_of.into()),
|
192
|
+
(false, false) => TZS::AnyOf(any_of.into()),
|
193
|
+
};
|
194
|
+
DataTypeSelector::Datetime(tu, tzs).as_selector().into()
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
|
198
|
+
let tu = parse_time_unit_set(tu);
|
199
|
+
DataTypeSelector::Duration(tu).as_selector().into()
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn object() -> Self {
|
203
|
+
DataTypeSelector::Object.as_selector().into()
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn empty() -> Self {
|
207
|
+
dsl::empty().into()
|
208
|
+
}
|
209
|
+
|
210
|
+
pub fn all() -> Self {
|
211
|
+
dsl::all().into()
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn hash(&self) -> u64 {
|
215
|
+
let mut hasher = std::hash::DefaultHasher::default();
|
216
|
+
self.inner.hash(&mut hasher);
|
217
|
+
hasher.finish()
|
218
|
+
}
|
219
|
+
}
|
@@ -163,12 +163,20 @@ impl RbExpr {
|
|
163
163
|
self.inner.clone().str().reverse().into()
|
164
164
|
}
|
165
165
|
|
166
|
-
pub fn str_pad_start(&self, length:
|
167
|
-
self.clone()
|
166
|
+
pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
|
167
|
+
self.clone()
|
168
|
+
.inner
|
169
|
+
.str()
|
170
|
+
.pad_start(length.inner.clone(), fillchar)
|
171
|
+
.into()
|
168
172
|
}
|
169
173
|
|
170
|
-
pub fn str_pad_end(&self, length:
|
171
|
-
self.clone()
|
174
|
+
pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
|
175
|
+
self.clone()
|
176
|
+
.inner
|
177
|
+
.str()
|
178
|
+
.pad_end(length.inner.clone(), fillchar)
|
179
|
+
.into()
|
172
180
|
}
|
173
181
|
|
174
182
|
pub fn str_zfill(&self, length: &Self) -> Self {
|
@@ -220,12 +228,11 @@ impl RbExpr {
|
|
220
228
|
self.inner.clone().str().base64_decode(strict).into()
|
221
229
|
}
|
222
230
|
|
223
|
-
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
231
|
+
pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
|
224
232
|
self.inner
|
225
233
|
.clone()
|
226
234
|
.str()
|
227
|
-
.to_integer(base.inner.clone(), strict)
|
228
|
-
.with_fmt("str.to_integer")
|
235
|
+
.to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
|
229
236
|
.into()
|
230
237
|
}
|
231
238
|
|