polars-df 0.20.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +19 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +275 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +2 -1
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +37 -0
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +106 -22
- data/ext/polars/src/expr/list.rs +45 -2
- data/ext/polars/src/expr/meta.rs +5 -28
- data/ext/polars/src/expr/mod.rs +4 -1
- data/ext/polars/src/expr/name.rs +10 -2
- data/ext/polars/src/expr/rolling.rs +21 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +73 -6
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +21 -11
- data/ext/polars/src/functions/lazy.rs +26 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +124 -111
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +216 -29
- data/ext/polars/src/map/dataframe.rs +9 -9
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +9 -9
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +75 -210
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +20 -2
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{prelude::*, value::Opaque
|
1
|
+
use magnus::{Ruby, Value, prelude::*, value::Opaque};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -47,6 +47,14 @@ impl RbExpr {
|
|
47
47
|
self.inner.clone().list().eval(expr.inner.clone()).into()
|
48
48
|
}
|
49
49
|
|
50
|
+
pub fn list_filter(&self, predicate: &RbExpr) -> Self {
|
51
|
+
self.inner
|
52
|
+
.clone()
|
53
|
+
.list()
|
54
|
+
.eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
|
55
|
+
.into()
|
56
|
+
}
|
57
|
+
|
50
58
|
pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
51
59
|
self.inner
|
52
60
|
.clone()
|
@@ -75,6 +83,18 @@ impl RbExpr {
|
|
75
83
|
self.inner.clone().list().mean().into()
|
76
84
|
}
|
77
85
|
|
86
|
+
pub fn list_median(&self) -> Self {
|
87
|
+
self.inner.clone().list().median().into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn list_std(&self, ddof: u8) -> Self {
|
91
|
+
self.inner.clone().list().std(ddof).into()
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn list_var(&self, ddof: u8) -> Self {
|
95
|
+
self.inner.clone().list().var(ddof).into()
|
96
|
+
}
|
97
|
+
|
78
98
|
pub fn list_min(&self) -> Self {
|
79
99
|
self.inner.clone().list().min().into()
|
80
100
|
}
|
@@ -163,6 +183,14 @@ impl RbExpr {
|
|
163
183
|
.into()
|
164
184
|
}
|
165
185
|
|
186
|
+
pub fn list_gather_every(&self, n: &RbExpr, offset: &RbExpr) -> Self {
|
187
|
+
self.inner
|
188
|
+
.clone()
|
189
|
+
.list()
|
190
|
+
.gather_every(n.inner.clone(), offset.inner.clone())
|
191
|
+
.into()
|
192
|
+
}
|
193
|
+
|
166
194
|
pub fn list_to_array(&self, width: usize) -> Self {
|
167
195
|
self.inner.clone().list().to_array(width).into()
|
168
196
|
}
|
@@ -189,7 +217,7 @@ impl RbExpr {
|
|
189
217
|
.inner
|
190
218
|
.clone()
|
191
219
|
.list()
|
192
|
-
.to_struct(
|
220
|
+
.to_struct(ListToStruct::InferWidth {
|
193
221
|
infer_field_strategy: width_strat.0,
|
194
222
|
get_index_name: name_gen,
|
195
223
|
max_fields: upper_bound,
|
@@ -197,6 +225,10 @@ impl RbExpr {
|
|
197
225
|
.into())
|
198
226
|
}
|
199
227
|
|
228
|
+
pub fn list_n_unique(&self) -> Self {
|
229
|
+
self.inner.clone().list().n_unique().into()
|
230
|
+
}
|
231
|
+
|
200
232
|
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
201
233
|
let e = self.inner.clone();
|
202
234
|
|
@@ -206,4 +238,15 @@ impl RbExpr {
|
|
206
238
|
e.list().unique().into()
|
207
239
|
}
|
208
240
|
}
|
241
|
+
|
242
|
+
pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
|
243
|
+
let e = self.inner.clone().list();
|
244
|
+
match operation.0 {
|
245
|
+
SetOperation::Intersection => e.set_intersection(other.inner.clone()),
|
246
|
+
SetOperation::Difference => e.set_difference(other.inner.clone()),
|
247
|
+
SetOperation::Union => e.union(other.inner.clone()),
|
248
|
+
SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
|
249
|
+
}
|
250
|
+
.into()
|
251
|
+
}
|
209
252
|
}
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -57,38 +57,15 @@ impl RbExpr {
|
|
57
57
|
self.inner.clone().meta().is_regex_projection()
|
58
58
|
}
|
59
59
|
|
60
|
-
pub fn
|
61
|
-
|
62
|
-
.inner
|
63
|
-
.clone()
|
64
|
-
.meta()
|
65
|
-
._selector_add(other.inner.clone())
|
66
|
-
.map_err(RbPolarsErr::from)?;
|
67
|
-
Ok(out.into())
|
68
|
-
}
|
69
|
-
|
70
|
-
pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
71
|
-
let out = self
|
72
|
-
.inner
|
73
|
-
.clone()
|
74
|
-
.meta()
|
75
|
-
._selector_sub(other.inner.clone())
|
76
|
-
.map_err(RbPolarsErr::from)?;
|
77
|
-
Ok(out.into())
|
78
|
-
}
|
79
|
-
|
80
|
-
pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
81
|
-
let out = self
|
82
|
-
.inner
|
60
|
+
pub fn meta_is_column_selection(&self, allow_aliasing: bool) -> bool {
|
61
|
+
self.inner
|
83
62
|
.clone()
|
84
63
|
.meta()
|
85
|
-
.
|
86
|
-
.map_err(RbPolarsErr::from)?;
|
87
|
-
Ok(out.into())
|
64
|
+
.is_column_selection(allow_aliasing)
|
88
65
|
}
|
89
66
|
|
90
|
-
pub fn
|
91
|
-
self.inner.clone().meta().
|
67
|
+
pub fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
|
68
|
+
self.inner.clone().meta().is_literal(allow_aliasing)
|
92
69
|
}
|
93
70
|
|
94
71
|
fn compute_tree_format(
|
data/ext/polars/src/expr/mod.rs
CHANGED
@@ -1,16 +1,19 @@
|
|
1
1
|
mod array;
|
2
2
|
mod binary;
|
3
|
+
mod bitwise;
|
3
4
|
mod categorical;
|
5
|
+
pub mod datatype;
|
4
6
|
mod datetime;
|
5
7
|
mod general;
|
6
8
|
mod list;
|
7
9
|
mod meta;
|
8
10
|
mod name;
|
9
11
|
mod rolling;
|
12
|
+
pub mod selector;
|
10
13
|
mod string;
|
11
14
|
mod r#struct;
|
12
15
|
|
13
|
-
use magnus::{prelude
|
16
|
+
use magnus::{RArray, prelude::*};
|
14
17
|
use polars::lazy::dsl::Expr;
|
15
18
|
|
16
19
|
use crate::RbResult;
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{block::Proc, value::Opaque
|
1
|
+
use magnus::{Ruby, block::Proc, value::Opaque};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_utils::format_pl_smallstr;
|
4
4
|
|
@@ -20,7 +20,7 @@ impl RbExpr {
|
|
20
20
|
match out {
|
21
21
|
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
22
22
|
Err(e) => Err(PolarsError::ComputeError(
|
23
|
-
format!("Ruby function in 'name.map' produced an error: {}."
|
23
|
+
format!("Ruby function in 'name.map' produced an error: {e}.").into(),
|
24
24
|
)),
|
25
25
|
}
|
26
26
|
})
|
@@ -42,4 +42,12 @@ impl RbExpr {
|
|
42
42
|
pub fn name_to_uppercase(&self) -> Self {
|
43
43
|
self.inner.clone().name().to_uppercase().into()
|
44
44
|
}
|
45
|
+
|
46
|
+
pub fn name_prefix_fields(&self, prefix: String) -> Self {
|
47
|
+
self.inner.clone().name().prefix_fields(&prefix).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn name_suffix_fields(&self, suffix: String) -> Self {
|
51
|
+
self.inner.clone().name().suffix_fields(&suffix).into()
|
52
|
+
}
|
45
53
|
}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
use polars::prelude::*;
|
2
2
|
|
3
|
-
use crate::conversion::Wrap;
|
4
3
|
use crate::RbExpr;
|
4
|
+
use crate::conversion::Wrap;
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn rolling_sum(
|
@@ -337,4 +337,24 @@ impl RbExpr {
|
|
337
337
|
|
338
338
|
self.inner.clone().rolling_skew(options).into()
|
339
339
|
}
|
340
|
+
|
341
|
+
pub fn rolling_kurtosis(
|
342
|
+
&self,
|
343
|
+
window_size: usize,
|
344
|
+
fisher: bool,
|
345
|
+
bias: bool,
|
346
|
+
min_periods: Option<usize>,
|
347
|
+
center: bool,
|
348
|
+
) -> Self {
|
349
|
+
let min_periods = min_periods.unwrap_or(window_size);
|
350
|
+
let options = RollingOptionsFixedWindow {
|
351
|
+
window_size,
|
352
|
+
weights: None,
|
353
|
+
min_periods,
|
354
|
+
center,
|
355
|
+
fn_params: Some(RollingFnParams::Kurtosis { fisher, bias }),
|
356
|
+
};
|
357
|
+
|
358
|
+
self.inner.clone().rolling_kurtosis(options).into()
|
359
|
+
}
|
340
360
|
}
|
@@ -0,0 +1,219 @@
|
|
1
|
+
use std::hash::{Hash, Hasher};
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use polars::prelude::{
|
5
|
+
DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
|
6
|
+
};
|
7
|
+
use polars_plan::dsl;
|
8
|
+
|
9
|
+
use crate::prelude::Wrap;
|
10
|
+
use crate::{RbResult, RbTypeError};
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbSelector")]
|
13
|
+
#[repr(transparent)]
|
14
|
+
#[derive(Clone)]
|
15
|
+
pub struct RbSelector {
|
16
|
+
pub inner: Selector,
|
17
|
+
}
|
18
|
+
|
19
|
+
impl From<Selector> for RbSelector {
|
20
|
+
fn from(inner: Selector) -> Self {
|
21
|
+
Self { inner }
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
|
26
|
+
let mut tu = TimeUnitSet::empty();
|
27
|
+
for v in time_units {
|
28
|
+
match v.0 {
|
29
|
+
TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
|
30
|
+
TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
|
31
|
+
TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
|
32
|
+
}
|
33
|
+
}
|
34
|
+
tu
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
|
38
|
+
selector.inner.clone().to_dtype_selector().ok_or_else(|| {
|
39
|
+
RbTypeError::new_err(format!(
|
40
|
+
"expected datatype based expression got '{}'",
|
41
|
+
selector.inner
|
42
|
+
))
|
43
|
+
})
|
44
|
+
}
|
45
|
+
|
46
|
+
impl RbSelector {
|
47
|
+
pub fn union(&self, other: &Self) -> Self {
|
48
|
+
Self {
|
49
|
+
inner: self.inner.clone() | other.inner.clone(),
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
pub fn difference(&self, other: &Self) -> Self {
|
54
|
+
Self {
|
55
|
+
inner: self.inner.clone() - other.inner.clone(),
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn exclusive_or(&self, other: &Self) -> Self {
|
60
|
+
Self {
|
61
|
+
inner: self.inner.clone() ^ other.inner.clone(),
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
pub fn intersect(&self, other: &Self) -> Self {
|
66
|
+
Self {
|
67
|
+
inner: self.inner.clone() & other.inner.clone(),
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
|
72
|
+
let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
|
73
|
+
dsl::dtype_cols(dtypes).as_selector().into()
|
74
|
+
}
|
75
|
+
|
76
|
+
pub fn by_name(names: Vec<String>, strict: bool) -> Self {
|
77
|
+
dsl::by_name(names, strict).into()
|
78
|
+
}
|
79
|
+
|
80
|
+
pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
|
81
|
+
Selector::ByIndex {
|
82
|
+
indices: indices.into(),
|
83
|
+
strict,
|
84
|
+
}
|
85
|
+
.into()
|
86
|
+
}
|
87
|
+
|
88
|
+
pub fn first(strict: bool) -> Self {
|
89
|
+
Selector::ByIndex {
|
90
|
+
indices: [0].into(),
|
91
|
+
strict,
|
92
|
+
}
|
93
|
+
.into()
|
94
|
+
}
|
95
|
+
|
96
|
+
pub fn last(strict: bool) -> Self {
|
97
|
+
Selector::ByIndex {
|
98
|
+
indices: [-1].into(),
|
99
|
+
strict,
|
100
|
+
}
|
101
|
+
.into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn matches(pattern: String) -> Self {
|
105
|
+
Selector::Matches(pattern.into()).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn enum_() -> Self {
|
109
|
+
DataTypeSelector::Enum.as_selector().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn categorical() -> Self {
|
113
|
+
DataTypeSelector::Categorical.as_selector().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn nested() -> Self {
|
117
|
+
DataTypeSelector::Nested.as_selector().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
|
121
|
+
let inner_dst = match inner_dst {
|
122
|
+
None => None,
|
123
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
124
|
+
};
|
125
|
+
Ok(DataTypeSelector::List(inner_dst).as_selector().into())
|
126
|
+
}
|
127
|
+
|
128
|
+
pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
|
129
|
+
let inner_dst = match inner_dst {
|
130
|
+
None => None,
|
131
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
132
|
+
};
|
133
|
+
Ok(DataTypeSelector::Array(inner_dst, width)
|
134
|
+
.as_selector()
|
135
|
+
.into())
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn struct_() -> Self {
|
139
|
+
DataTypeSelector::Struct.as_selector().into()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn integer() -> Self {
|
143
|
+
DataTypeSelector::Integer.as_selector().into()
|
144
|
+
}
|
145
|
+
|
146
|
+
pub fn signed_integer() -> Self {
|
147
|
+
DataTypeSelector::SignedInteger.as_selector().into()
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn unsigned_integer() -> Self {
|
151
|
+
DataTypeSelector::UnsignedInteger.as_selector().into()
|
152
|
+
}
|
153
|
+
|
154
|
+
pub fn float() -> Self {
|
155
|
+
DataTypeSelector::Float.as_selector().into()
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn decimal() -> Self {
|
159
|
+
DataTypeSelector::Decimal.as_selector().into()
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn numeric() -> Self {
|
163
|
+
DataTypeSelector::Numeric.as_selector().into()
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn temporal() -> Self {
|
167
|
+
DataTypeSelector::Temporal.as_selector().into()
|
168
|
+
}
|
169
|
+
|
170
|
+
pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
|
171
|
+
use TimeZoneSet as TZS;
|
172
|
+
|
173
|
+
let mut allow_unset = false;
|
174
|
+
let mut allow_set = false;
|
175
|
+
let mut any_of: Vec<TimeZone> = Vec::new();
|
176
|
+
|
177
|
+
let tu = parse_time_unit_set(tu);
|
178
|
+
for t in tz {
|
179
|
+
let t = t.0;
|
180
|
+
match t {
|
181
|
+
None => allow_unset = true,
|
182
|
+
Some(s) if s.as_str() == "*" => allow_set = true,
|
183
|
+
Some(t) => any_of.push(t),
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
let tzs = match (allow_unset, allow_set) {
|
188
|
+
(true, true) => TZS::Any,
|
189
|
+
(false, true) => TZS::AnySet,
|
190
|
+
(true, false) if any_of.is_empty() => TZS::Unset,
|
191
|
+
(true, false) => TZS::UnsetOrAnyOf(any_of.into()),
|
192
|
+
(false, false) => TZS::AnyOf(any_of.into()),
|
193
|
+
};
|
194
|
+
DataTypeSelector::Datetime(tu, tzs).as_selector().into()
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
|
198
|
+
let tu = parse_time_unit_set(tu);
|
199
|
+
DataTypeSelector::Duration(tu).as_selector().into()
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn object() -> Self {
|
203
|
+
DataTypeSelector::Object.as_selector().into()
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn empty() -> Self {
|
207
|
+
dsl::empty().into()
|
208
|
+
}
|
209
|
+
|
210
|
+
pub fn all() -> Self {
|
211
|
+
dsl::all().into()
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn hash(&self) -> u64 {
|
215
|
+
let mut hasher = std::hash::DefaultHasher::default();
|
216
|
+
self.inner.hash(&mut hasher);
|
217
|
+
hasher.finish()
|
218
|
+
}
|
219
|
+
}
|
@@ -122,6 +122,14 @@ impl RbExpr {
|
|
122
122
|
.into()
|
123
123
|
}
|
124
124
|
|
125
|
+
pub fn str_head(&self, n: &Self) -> Self {
|
126
|
+
self.inner.clone().str().head(n.inner.clone()).into()
|
127
|
+
}
|
128
|
+
|
129
|
+
pub fn str_tail(&self, n: &Self) -> Self {
|
130
|
+
self.inner.clone().str().tail(n.inner.clone()).into()
|
131
|
+
}
|
132
|
+
|
125
133
|
pub fn str_to_uppercase(&self) -> Self {
|
126
134
|
self.inner.clone().str().to_uppercase().into()
|
127
135
|
}
|
@@ -159,16 +167,28 @@ impl RbExpr {
|
|
159
167
|
.into()
|
160
168
|
}
|
161
169
|
|
170
|
+
pub fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
|
171
|
+
self.inner.clone().str().normalize(form.0).into()
|
172
|
+
}
|
173
|
+
|
162
174
|
pub fn str_reverse(&self) -> Self {
|
163
175
|
self.inner.clone().str().reverse().into()
|
164
176
|
}
|
165
177
|
|
166
|
-
pub fn str_pad_start(&self, length:
|
167
|
-
self.clone()
|
178
|
+
pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
|
179
|
+
self.clone()
|
180
|
+
.inner
|
181
|
+
.str()
|
182
|
+
.pad_start(length.inner.clone(), fillchar)
|
183
|
+
.into()
|
168
184
|
}
|
169
185
|
|
170
|
-
pub fn str_pad_end(&self, length:
|
171
|
-
self.clone()
|
186
|
+
pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
|
187
|
+
self.clone()
|
188
|
+
.inner
|
189
|
+
.str()
|
190
|
+
.pad_end(length.inner.clone(), fillchar)
|
191
|
+
.into()
|
172
192
|
}
|
173
193
|
|
174
194
|
pub fn str_zfill(&self, length: &Self) -> Self {
|
@@ -192,6 +212,23 @@ impl RbExpr {
|
|
192
212
|
}
|
193
213
|
}
|
194
214
|
|
215
|
+
pub fn str_find(&self, pat: &Self, literal: Option<bool>, strict: bool) -> Self {
|
216
|
+
match literal {
|
217
|
+
Some(true) => self
|
218
|
+
.inner
|
219
|
+
.clone()
|
220
|
+
.str()
|
221
|
+
.find_literal(pat.inner.clone())
|
222
|
+
.into(),
|
223
|
+
_ => self
|
224
|
+
.inner
|
225
|
+
.clone()
|
226
|
+
.str()
|
227
|
+
.find(pat.inner.clone(), strict)
|
228
|
+
.into(),
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
195
232
|
pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
|
196
233
|
self.inner.clone().str().ends_with(sub.inner.clone()).into()
|
197
234
|
}
|
@@ -220,11 +257,11 @@ impl RbExpr {
|
|
220
257
|
self.inner.clone().str().base64_decode(strict).into()
|
221
258
|
}
|
222
259
|
|
223
|
-
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
260
|
+
pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
|
224
261
|
self.inner
|
225
262
|
.clone()
|
226
263
|
.str()
|
227
|
-
.to_integer(base.inner.clone(), strict)
|
264
|
+
.to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
|
228
265
|
.into()
|
229
266
|
}
|
230
267
|
|
@@ -343,4 +380,34 @@ impl RbExpr {
|
|
343
380
|
)
|
344
381
|
.into()
|
345
382
|
}
|
383
|
+
|
384
|
+
pub fn str_extract_many(
|
385
|
+
&self,
|
386
|
+
patterns: &RbExpr,
|
387
|
+
ascii_case_insensitive: bool,
|
388
|
+
overlapping: bool,
|
389
|
+
) -> Self {
|
390
|
+
self.inner
|
391
|
+
.clone()
|
392
|
+
.str()
|
393
|
+
.extract_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
|
394
|
+
.into()
|
395
|
+
}
|
396
|
+
|
397
|
+
pub fn str_find_many(
|
398
|
+
&self,
|
399
|
+
patterns: &RbExpr,
|
400
|
+
ascii_case_insensitive: bool,
|
401
|
+
overlapping: bool,
|
402
|
+
) -> Self {
|
403
|
+
self.inner
|
404
|
+
.clone()
|
405
|
+
.str()
|
406
|
+
.find_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
|
407
|
+
.into()
|
408
|
+
}
|
409
|
+
|
410
|
+
pub fn str_escape_regex(&self) -> Self {
|
411
|
+
self.inner.clone().str().escape_regex().into()
|
412
|
+
}
|
346
413
|
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
use
|
1
|
+
use magnus::RArray;
|
2
|
+
|
3
|
+
use crate::{RbExpr, RbResult, rb_exprs_to_exprs};
|
2
4
|
|
3
5
|
impl RbExpr {
|
4
6
|
pub fn struct_field_by_index(&self, index: i64) -> Self {
|
@@ -16,4 +18,10 @@ impl RbExpr {
|
|
16
18
|
pub fn struct_json_encode(&self) -> Self {
|
17
19
|
self.inner.clone().struct_().json_encode().into()
|
18
20
|
}
|
21
|
+
|
22
|
+
pub fn struct_with_fields(&self, fields: RArray) -> RbResult<Self> {
|
23
|
+
let fields = rb_exprs_to_exprs(fields)?;
|
24
|
+
let e = self.inner.clone().struct_().with_fields(fields);
|
25
|
+
Ok(e.into())
|
26
|
+
}
|
19
27
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -3,17 +3,18 @@ use std::io;
|
|
3
3
|
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
|
7
7
|
use polars::io::cloud::CloudOptions;
|
8
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars::prelude::PlPath;
|
9
10
|
use polars::prelude::file::DynWriteable;
|
10
11
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
11
12
|
use polars_utils::file::ClosableFile;
|
12
13
|
use polars_utils::mmap::MemSlice;
|
13
14
|
|
15
|
+
use crate::RbResult;
|
14
16
|
use crate::error::RbPolarsErr;
|
15
17
|
use crate::prelude::resolve_homedir;
|
16
|
-
use crate::RbResult;
|
17
18
|
|
18
19
|
#[derive(Clone)]
|
19
20
|
pub struct RbFileLikeObject {
|
@@ -188,7 +189,7 @@ impl EitherRustRubyFile {
|
|
188
189
|
|
189
190
|
pub enum RubyScanSourceInput {
|
190
191
|
Buffer(MemSlice),
|
191
|
-
Path(
|
192
|
+
Path(PlPath),
|
192
193
|
#[allow(dead_code)]
|
193
194
|
File(File),
|
194
195
|
}
|
@@ -202,8 +203,13 @@ pub(crate) fn try_get_rbfile(
|
|
202
203
|
}
|
203
204
|
|
204
205
|
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
205
|
-
if let Ok(
|
206
|
-
|
206
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
207
|
+
let mut file_path = PlPath::new(&s);
|
208
|
+
if let Some(p) = file_path.as_ref().as_local_path() {
|
209
|
+
if p.starts_with("~/") {
|
210
|
+
file_path = PlPath::Local(resolve_homedir(&p).into());
|
211
|
+
}
|
212
|
+
}
|
207
213
|
Ok(RubyScanSourceInput::Path(file_path))
|
208
214
|
} else {
|
209
215
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
@@ -1,13 +1,10 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
-
use arrow::array::Utf8ViewArray;
|
4
3
|
use magnus::{RHash, Value};
|
5
4
|
use polars::prelude::ArrowSchema;
|
6
|
-
use polars_core::datatypes::create_enum_dtype;
|
7
5
|
|
8
6
|
use crate::conversion::Wrap;
|
9
|
-
use crate::file::{
|
10
|
-
use crate::prelude::ArrowDataType;
|
7
|
+
use crate::file::{EitherRustRubyFile, get_either_file};
|
11
8
|
use crate::{RbPolarsErr, RbResult};
|
12
9
|
|
13
10
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
@@ -24,6 +21,25 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
24
21
|
Ok(dict)
|
25
22
|
}
|
26
23
|
|
24
|
+
pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
|
25
|
+
use polars_parquet::read::read_metadata;
|
26
|
+
use polars_parquet::read::schema::read_custom_key_value_metadata;
|
27
|
+
|
28
|
+
let metadata = match get_either_file(rb_f, false)? {
|
29
|
+
EitherRustRubyFile::Rust(r) => {
|
30
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
31
|
+
}
|
32
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
33
|
+
};
|
34
|
+
|
35
|
+
let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
|
36
|
+
let dict = RHash::new();
|
37
|
+
for (key, value) in key_value_metadata.into_iter() {
|
38
|
+
dict.aset(key.as_str(), value.as_str())?;
|
39
|
+
}
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
27
43
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
28
44
|
use polars_parquet::read::{infer_schema, read_metadata};
|
29
45
|
|
@@ -42,13 +58,7 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
42
58
|
|
43
59
|
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
60
|
for field in schema.iter_values() {
|
45
|
-
let dt =
|
46
|
-
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
-
ArrowDataType::Utf8View,
|
48
|
-
)))
|
49
|
-
} else {
|
50
|
-
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
-
};
|
61
|
+
let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
|
52
62
|
dict.aset(field.name.as_str(), dt)?;
|
53
63
|
}
|
54
64
|
Ok(())
|