polars-df 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +22 -17
- data/ext/polars/src/expr/list.rs +21 -2
- data/ext/polars/src/expr/meta.rs +0 -34
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -6
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +74 -112
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +98 -20
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +34 -31
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +204 -7
- data/lib/polars/list_name_space.rb +120 -1
- data/lib/polars/meta_expr.rb +7 -22
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +34 -11
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +11 -0
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
@@ -1,7 +1,7 @@
|
|
1
1
|
use std::hash::BuildHasher;
|
2
2
|
|
3
3
|
use either::Either;
|
4
|
-
use magnus::{
|
4
|
+
use magnus::{IntoValue, RArray, Value, prelude::*, typed_data::Obj};
|
5
5
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
6
6
|
use polars::prelude::*;
|
7
7
|
|
@@ -416,17 +416,19 @@ impl RbDataFrame {
|
|
416
416
|
columns: Option<Vec<String>>,
|
417
417
|
separator: Option<String>,
|
418
418
|
drop_first: bool,
|
419
|
+
drop_nulls: bool,
|
419
420
|
) -> RbResult<Self> {
|
420
421
|
let df = match columns {
|
421
422
|
Some(cols) => self.df.borrow().columns_to_dummies(
|
422
423
|
cols.iter().map(|x| x as &str).collect(),
|
423
424
|
separator.as_deref(),
|
424
425
|
drop_first,
|
426
|
+
drop_nulls,
|
425
427
|
),
|
426
428
|
None => self
|
427
429
|
.df
|
428
430
|
.borrow()
|
429
|
-
.to_dummies(separator.as_deref(), drop_first),
|
431
|
+
.to_dummies(separator.as_deref(), drop_first, drop_nulls),
|
430
432
|
}
|
431
433
|
.map_err(RbPolarsErr::from)?;
|
432
434
|
Ok(df.into())
|
@@ -0,0 +1,14 @@
|
|
1
|
+
use polars::prelude::DataTypeExpr;
|
2
|
+
|
3
|
+
#[magnus::wrap(class = "Polars::RbDataTypeExpr")]
|
4
|
+
#[repr(transparent)]
|
5
|
+
#[derive(Clone)]
|
6
|
+
pub struct RbDataTypeExpr {
|
7
|
+
pub inner: DataTypeExpr,
|
8
|
+
}
|
9
|
+
|
10
|
+
impl From<DataTypeExpr> for RbDataTypeExpr {
|
11
|
+
fn from(expr: DataTypeExpr) -> Self {
|
12
|
+
RbDataTypeExpr { inner: expr }
|
13
|
+
}
|
14
|
+
}
|
@@ -6,10 +6,11 @@ use polars::prelude::*;
|
|
6
6
|
use polars::series::ops::NullBehavior;
|
7
7
|
use polars_core::series::IsSorted;
|
8
8
|
|
9
|
-
use
|
9
|
+
use super::selector::RbSelector;
|
10
|
+
use crate::conversion::{Wrap, parse_fill_null_strategy};
|
10
11
|
use crate::map::lazy::map_single;
|
11
12
|
use crate::rb_exprs_to_exprs;
|
12
|
-
use crate::{RbExpr, RbResult};
|
13
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
13
14
|
|
14
15
|
impl RbExpr {
|
15
16
|
pub fn add(&self, rhs: &Self) -> RbResult<Self> {
|
@@ -276,17 +277,8 @@ impl RbExpr {
|
|
276
277
|
.into()
|
277
278
|
}
|
278
279
|
|
279
|
-
pub fn arg_sort(&self,
|
280
|
-
self.clone()
|
281
|
-
.inner
|
282
|
-
.arg_sort(SortOptions {
|
283
|
-
descending: reverse,
|
284
|
-
nulls_last,
|
285
|
-
multithreaded: true,
|
286
|
-
maintain_order: false,
|
287
|
-
limit: None,
|
288
|
-
})
|
289
|
-
.into()
|
280
|
+
pub fn arg_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
281
|
+
self.inner.clone().arg_sort(descending, nulls_last).into()
|
290
282
|
}
|
291
283
|
|
292
284
|
pub fn top_k(&self, k: &Self) -> Self {
|
@@ -655,10 +647,6 @@ impl RbExpr {
|
|
655
647
|
self.inner.clone().mode().into()
|
656
648
|
}
|
657
649
|
|
658
|
-
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
659
|
-
self.inner.clone().exclude(columns).into()
|
660
|
-
}
|
661
|
-
|
662
650
|
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
663
651
|
self.inner.clone().interpolate(method.0).into()
|
664
652
|
}
|
@@ -868,4 +856,21 @@ impl RbExpr {
|
|
868
856
|
)
|
869
857
|
.into()
|
870
858
|
}
|
859
|
+
|
860
|
+
#[allow(clippy::wrong_self_convention)]
|
861
|
+
pub fn into_selector(&self) -> RbResult<RbSelector> {
|
862
|
+
Ok(self
|
863
|
+
.inner
|
864
|
+
.clone()
|
865
|
+
.into_selector()
|
866
|
+
.ok_or_else(
|
867
|
+
|| polars_err!(InvalidOperation: "expr `{}` is not a selector", &self.inner),
|
868
|
+
)
|
869
|
+
.map_err(RbPolarsErr::from)?
|
870
|
+
.into())
|
871
|
+
}
|
872
|
+
|
873
|
+
pub fn new_selector(selector: &RbSelector) -> Self {
|
874
|
+
Expr::Selector(selector.inner.clone()).into()
|
875
|
+
}
|
871
876
|
}
|
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{prelude::*, value::Opaque
|
1
|
+
use magnus::{Ruby, Value, prelude::*, value::Opaque};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -47,6 +47,14 @@ impl RbExpr {
|
|
47
47
|
self.inner.clone().list().eval(expr.inner.clone()).into()
|
48
48
|
}
|
49
49
|
|
50
|
+
pub fn list_filter(&self, predicate: &RbExpr) -> Self {
|
51
|
+
self.inner
|
52
|
+
.clone()
|
53
|
+
.list()
|
54
|
+
.eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
|
55
|
+
.into()
|
56
|
+
}
|
57
|
+
|
50
58
|
pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
51
59
|
self.inner
|
52
60
|
.clone()
|
@@ -189,7 +197,7 @@ impl RbExpr {
|
|
189
197
|
.inner
|
190
198
|
.clone()
|
191
199
|
.list()
|
192
|
-
.to_struct(
|
200
|
+
.to_struct(ListToStruct::InferWidth {
|
193
201
|
infer_field_strategy: width_strat.0,
|
194
202
|
get_index_name: name_gen,
|
195
203
|
max_fields: upper_bound,
|
@@ -206,4 +214,15 @@ impl RbExpr {
|
|
206
214
|
e.list().unique().into()
|
207
215
|
}
|
208
216
|
}
|
217
|
+
|
218
|
+
pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
|
219
|
+
let e = self.inner.clone().list();
|
220
|
+
match operation.0 {
|
221
|
+
SetOperation::Intersection => e.set_intersection(other.inner.clone()),
|
222
|
+
SetOperation::Difference => e.set_difference(other.inner.clone()),
|
223
|
+
SetOperation::Union => e.union(other.inner.clone()),
|
224
|
+
SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
|
225
|
+
}
|
226
|
+
.into()
|
227
|
+
}
|
209
228
|
}
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -57,40 +57,6 @@ impl RbExpr {
|
|
57
57
|
self.inner.clone().meta().is_regex_projection()
|
58
58
|
}
|
59
59
|
|
60
|
-
pub fn _meta_selector_add(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
61
|
-
let out = self
|
62
|
-
.inner
|
63
|
-
.clone()
|
64
|
-
.meta()
|
65
|
-
._selector_add(other.inner.clone())
|
66
|
-
.map_err(RbPolarsErr::from)?;
|
67
|
-
Ok(out.into())
|
68
|
-
}
|
69
|
-
|
70
|
-
pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
71
|
-
let out = self
|
72
|
-
.inner
|
73
|
-
.clone()
|
74
|
-
.meta()
|
75
|
-
._selector_sub(other.inner.clone())
|
76
|
-
.map_err(RbPolarsErr::from)?;
|
77
|
-
Ok(out.into())
|
78
|
-
}
|
79
|
-
|
80
|
-
pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
|
81
|
-
let out = self
|
82
|
-
.inner
|
83
|
-
.clone()
|
84
|
-
.meta()
|
85
|
-
._selector_and(other.inner.clone())
|
86
|
-
.map_err(RbPolarsErr::from)?;
|
87
|
-
Ok(out.into())
|
88
|
-
}
|
89
|
-
|
90
|
-
pub fn _meta_as_selector(&self) -> RbExpr {
|
91
|
-
self.inner.clone().meta()._into_selector().into()
|
92
|
-
}
|
93
|
-
|
94
60
|
fn compute_tree_format(
|
95
61
|
&self,
|
96
62
|
display_as_dot: bool,
|
data/ext/polars/src/expr/mod.rs
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
mod array;
|
2
2
|
mod binary;
|
3
3
|
mod categorical;
|
4
|
+
pub mod datatype;
|
4
5
|
mod datetime;
|
5
6
|
mod general;
|
6
7
|
mod list;
|
7
8
|
mod meta;
|
8
9
|
mod name;
|
9
10
|
mod rolling;
|
11
|
+
pub mod selector;
|
10
12
|
mod string;
|
11
13
|
mod r#struct;
|
12
14
|
|
13
|
-
use magnus::{prelude
|
15
|
+
use magnus::{RArray, prelude::*};
|
14
16
|
use polars::lazy::dsl::Expr;
|
15
17
|
|
16
18
|
use crate::RbResult;
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{block::Proc, value::Opaque
|
1
|
+
use magnus::{Ruby, block::Proc, value::Opaque};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_utils::format_pl_smallstr;
|
4
4
|
|
@@ -20,7 +20,7 @@ impl RbExpr {
|
|
20
20
|
match out {
|
21
21
|
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
22
22
|
Err(e) => Err(PolarsError::ComputeError(
|
23
|
-
format!("Ruby function in 'name.map' produced an error: {}."
|
23
|
+
format!("Ruby function in 'name.map' produced an error: {e}.").into(),
|
24
24
|
)),
|
25
25
|
}
|
26
26
|
})
|
@@ -0,0 +1,219 @@
|
|
1
|
+
use std::hash::{Hash, Hasher};
|
2
|
+
use std::sync::Arc;
|
3
|
+
|
4
|
+
use polars::prelude::{
|
5
|
+
DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
|
6
|
+
};
|
7
|
+
use polars_plan::dsl;
|
8
|
+
|
9
|
+
use crate::prelude::Wrap;
|
10
|
+
use crate::{RbResult, RbTypeError};
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbSelector")]
|
13
|
+
#[repr(transparent)]
|
14
|
+
#[derive(Clone)]
|
15
|
+
pub struct RbSelector {
|
16
|
+
pub inner: Selector,
|
17
|
+
}
|
18
|
+
|
19
|
+
impl From<Selector> for RbSelector {
|
20
|
+
fn from(inner: Selector) -> Self {
|
21
|
+
Self { inner }
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
|
26
|
+
let mut tu = TimeUnitSet::empty();
|
27
|
+
for v in time_units {
|
28
|
+
match v.0 {
|
29
|
+
TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
|
30
|
+
TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
|
31
|
+
TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
|
32
|
+
}
|
33
|
+
}
|
34
|
+
tu
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
|
38
|
+
selector.inner.clone().to_dtype_selector().ok_or_else(|| {
|
39
|
+
RbTypeError::new_err(format!(
|
40
|
+
"expected datatype based expression got '{}'",
|
41
|
+
selector.inner
|
42
|
+
))
|
43
|
+
})
|
44
|
+
}
|
45
|
+
|
46
|
+
impl RbSelector {
|
47
|
+
pub fn union(&self, other: &Self) -> Self {
|
48
|
+
Self {
|
49
|
+
inner: self.inner.clone() | other.inner.clone(),
|
50
|
+
}
|
51
|
+
}
|
52
|
+
|
53
|
+
pub fn difference(&self, other: &Self) -> Self {
|
54
|
+
Self {
|
55
|
+
inner: self.inner.clone() - other.inner.clone(),
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn exclusive_or(&self, other: &Self) -> Self {
|
60
|
+
Self {
|
61
|
+
inner: self.inner.clone() ^ other.inner.clone(),
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
pub fn intersect(&self, other: &Self) -> Self {
|
66
|
+
Self {
|
67
|
+
inner: self.inner.clone() & other.inner.clone(),
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
|
72
|
+
let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
|
73
|
+
dsl::dtype_cols(dtypes).as_selector().into()
|
74
|
+
}
|
75
|
+
|
76
|
+
pub fn by_name(names: Vec<String>, strict: bool) -> Self {
|
77
|
+
dsl::by_name(names, strict).into()
|
78
|
+
}
|
79
|
+
|
80
|
+
pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
|
81
|
+
Selector::ByIndex {
|
82
|
+
indices: indices.into(),
|
83
|
+
strict,
|
84
|
+
}
|
85
|
+
.into()
|
86
|
+
}
|
87
|
+
|
88
|
+
pub fn first(strict: bool) -> Self {
|
89
|
+
Selector::ByIndex {
|
90
|
+
indices: [0].into(),
|
91
|
+
strict,
|
92
|
+
}
|
93
|
+
.into()
|
94
|
+
}
|
95
|
+
|
96
|
+
pub fn last(strict: bool) -> Self {
|
97
|
+
Selector::ByIndex {
|
98
|
+
indices: [-1].into(),
|
99
|
+
strict,
|
100
|
+
}
|
101
|
+
.into()
|
102
|
+
}
|
103
|
+
|
104
|
+
pub fn matches(pattern: String) -> Self {
|
105
|
+
Selector::Matches(pattern.into()).into()
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn enum_() -> Self {
|
109
|
+
DataTypeSelector::Enum.as_selector().into()
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn categorical() -> Self {
|
113
|
+
DataTypeSelector::Categorical.as_selector().into()
|
114
|
+
}
|
115
|
+
|
116
|
+
pub fn nested() -> Self {
|
117
|
+
DataTypeSelector::Nested.as_selector().into()
|
118
|
+
}
|
119
|
+
|
120
|
+
pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
|
121
|
+
let inner_dst = match inner_dst {
|
122
|
+
None => None,
|
123
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
124
|
+
};
|
125
|
+
Ok(DataTypeSelector::List(inner_dst).as_selector().into())
|
126
|
+
}
|
127
|
+
|
128
|
+
pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
|
129
|
+
let inner_dst = match inner_dst {
|
130
|
+
None => None,
|
131
|
+
Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
|
132
|
+
};
|
133
|
+
Ok(DataTypeSelector::Array(inner_dst, width)
|
134
|
+
.as_selector()
|
135
|
+
.into())
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn struct_() -> Self {
|
139
|
+
DataTypeSelector::Struct.as_selector().into()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn integer() -> Self {
|
143
|
+
DataTypeSelector::Integer.as_selector().into()
|
144
|
+
}
|
145
|
+
|
146
|
+
pub fn signed_integer() -> Self {
|
147
|
+
DataTypeSelector::SignedInteger.as_selector().into()
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn unsigned_integer() -> Self {
|
151
|
+
DataTypeSelector::UnsignedInteger.as_selector().into()
|
152
|
+
}
|
153
|
+
|
154
|
+
pub fn float() -> Self {
|
155
|
+
DataTypeSelector::Float.as_selector().into()
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn decimal() -> Self {
|
159
|
+
DataTypeSelector::Decimal.as_selector().into()
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn numeric() -> Self {
|
163
|
+
DataTypeSelector::Numeric.as_selector().into()
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn temporal() -> Self {
|
167
|
+
DataTypeSelector::Temporal.as_selector().into()
|
168
|
+
}
|
169
|
+
|
170
|
+
pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
|
171
|
+
use TimeZoneSet as TZS;
|
172
|
+
|
173
|
+
let mut allow_unset = false;
|
174
|
+
let mut allow_set = false;
|
175
|
+
let mut any_of: Vec<TimeZone> = Vec::new();
|
176
|
+
|
177
|
+
let tu = parse_time_unit_set(tu);
|
178
|
+
for t in tz {
|
179
|
+
let t = t.0;
|
180
|
+
match t {
|
181
|
+
None => allow_unset = true,
|
182
|
+
Some(s) if s.as_str() == "*" => allow_set = true,
|
183
|
+
Some(t) => any_of.push(t),
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
let tzs = match (allow_unset, allow_set) {
|
188
|
+
(true, true) => TZS::Any,
|
189
|
+
(false, true) => TZS::AnySet,
|
190
|
+
(true, false) if any_of.is_empty() => TZS::Unset,
|
191
|
+
(true, false) => TZS::UnsetOrAnyOf(any_of.into()),
|
192
|
+
(false, false) => TZS::AnyOf(any_of.into()),
|
193
|
+
};
|
194
|
+
DataTypeSelector::Datetime(tu, tzs).as_selector().into()
|
195
|
+
}
|
196
|
+
|
197
|
+
pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
|
198
|
+
let tu = parse_time_unit_set(tu);
|
199
|
+
DataTypeSelector::Duration(tu).as_selector().into()
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn object() -> Self {
|
203
|
+
DataTypeSelector::Object.as_selector().into()
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn empty() -> Self {
|
207
|
+
dsl::empty().into()
|
208
|
+
}
|
209
|
+
|
210
|
+
pub fn all() -> Self {
|
211
|
+
dsl::all().into()
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn hash(&self) -> u64 {
|
215
|
+
let mut hasher = std::hash::DefaultHasher::default();
|
216
|
+
self.inner.hash(&mut hasher);
|
217
|
+
hasher.finish()
|
218
|
+
}
|
219
|
+
}
|
@@ -163,12 +163,20 @@ impl RbExpr {
|
|
163
163
|
self.inner.clone().str().reverse().into()
|
164
164
|
}
|
165
165
|
|
166
|
-
pub fn str_pad_start(&self, length:
|
167
|
-
self.clone()
|
166
|
+
pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
|
167
|
+
self.clone()
|
168
|
+
.inner
|
169
|
+
.str()
|
170
|
+
.pad_start(length.inner.clone(), fillchar)
|
171
|
+
.into()
|
168
172
|
}
|
169
173
|
|
170
|
-
pub fn str_pad_end(&self, length:
|
171
|
-
self.clone()
|
174
|
+
pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
|
175
|
+
self.clone()
|
176
|
+
.inner
|
177
|
+
.str()
|
178
|
+
.pad_end(length.inner.clone(), fillchar)
|
179
|
+
.into()
|
172
180
|
}
|
173
181
|
|
174
182
|
pub fn str_zfill(&self, length: &Self) -> Self {
|
@@ -220,11 +228,11 @@ impl RbExpr {
|
|
220
228
|
self.inner.clone().str().base64_decode(strict).into()
|
221
229
|
}
|
222
230
|
|
223
|
-
pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
|
231
|
+
pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
|
224
232
|
self.inner
|
225
233
|
.clone()
|
226
234
|
.str()
|
227
|
-
.to_integer(base.inner.clone(), strict)
|
235
|
+
.to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
|
228
236
|
.into()
|
229
237
|
}
|
230
238
|
|
data/ext/polars/src/file.rs
CHANGED
@@ -3,17 +3,18 @@ use std::io;
|
|
3
3
|
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
|
-
use magnus::{
|
6
|
+
use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
|
7
7
|
use polars::io::cloud::CloudOptions;
|
8
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars::prelude::PlPath;
|
9
10
|
use polars::prelude::file::DynWriteable;
|
10
11
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
11
12
|
use polars_utils::file::ClosableFile;
|
12
13
|
use polars_utils::mmap::MemSlice;
|
13
14
|
|
15
|
+
use crate::RbResult;
|
14
16
|
use crate::error::RbPolarsErr;
|
15
17
|
use crate::prelude::resolve_homedir;
|
16
|
-
use crate::RbResult;
|
17
18
|
|
18
19
|
#[derive(Clone)]
|
19
20
|
pub struct RbFileLikeObject {
|
@@ -188,7 +189,7 @@ impl EitherRustRubyFile {
|
|
188
189
|
|
189
190
|
pub enum RubyScanSourceInput {
|
190
191
|
Buffer(MemSlice),
|
191
|
-
Path(
|
192
|
+
Path(PlPath),
|
192
193
|
#[allow(dead_code)]
|
193
194
|
File(File),
|
194
195
|
}
|
@@ -202,8 +203,13 @@ pub(crate) fn try_get_rbfile(
|
|
202
203
|
}
|
203
204
|
|
204
205
|
pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
|
205
|
-
if let Ok(
|
206
|
-
|
206
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
207
|
+
let mut file_path = PlPath::new(&s);
|
208
|
+
if let Some(p) = file_path.as_ref().as_local_path() {
|
209
|
+
if p.starts_with("~/") {
|
210
|
+
file_path = PlPath::Local(resolve_homedir(&p).into());
|
211
|
+
}
|
212
|
+
}
|
207
213
|
Ok(RubyScanSourceInput::Path(file_path))
|
208
214
|
} else {
|
209
215
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
@@ -1,13 +1,10 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
-
use arrow::array::Utf8ViewArray;
|
4
3
|
use magnus::{RHash, Value};
|
5
4
|
use polars::prelude::ArrowSchema;
|
6
|
-
use polars_core::datatypes::create_enum_dtype;
|
7
5
|
|
8
6
|
use crate::conversion::Wrap;
|
9
|
-
use crate::file::{
|
10
|
-
use crate::prelude::ArrowDataType;
|
7
|
+
use crate::file::{EitherRustRubyFile, get_either_file};
|
11
8
|
use crate::{RbPolarsErr, RbResult};
|
12
9
|
|
13
10
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
@@ -42,13 +39,7 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
42
39
|
|
43
40
|
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
41
|
for field in schema.iter_values() {
|
45
|
-
let dt =
|
46
|
-
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
-
ArrowDataType::Utf8View,
|
48
|
-
)))
|
49
|
-
} else {
|
50
|
-
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
-
};
|
42
|
+
let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
|
52
43
|
dict.aset(field.name.as_str(), dt)?;
|
53
44
|
}
|
54
45
|
Ok(())
|