polars-df 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +177 -141
- data/ext/polars/Cargo.toml +5 -6
- data/ext/polars/src/batched_csv.rs +3 -3
- data/ext/polars/src/conversion/any_value.rs +10 -4
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +36 -20
- data/ext/polars/src/dataframe/construction.rs +4 -4
- data/ext/polars/src/dataframe/general.rs +6 -5
- data/ext/polars/src/dataframe/io.rs +6 -6
- data/ext/polars/src/expr/datetime.rs +11 -3
- data/ext/polars/src/expr/general.rs +1 -1
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/string.rs +8 -1
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/lazyframe/mod.rs +18 -16
- data/ext/polars/src/lib.rs +1 -1
- data/ext/polars/src/map/dataframe.rs +36 -8
- data/ext/polars/src/map/mod.rs +8 -8
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/construction.rs +50 -23
- data/ext/polars/src/series/mod.rs +4 -4
- data/lib/polars/data_frame.rb +10 -10
- data/lib/polars/expr.rb +6 -6
- data/lib/polars/io/ipc.rb +0 -8
- data/lib/polars/series.rb +5 -5
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.14.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,14 +15,13 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.7"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.43.1"
|
19
|
+
polars-parquet = "=0.43.1"
|
20
|
+
polars-utils = "=0.43.1"
|
21
21
|
serde_json = "1"
|
22
|
-
smartstring = "1"
|
23
22
|
|
24
23
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
24
|
+
version = "=0.43.1"
|
26
25
|
features = [
|
27
26
|
"abs",
|
28
27
|
"approx_unique",
|
@@ -54,7 +54,7 @@ impl RbBatchedCsv {
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
55
|
let eol_char = eol_char.as_bytes()[0];
|
56
56
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
57
|
-
name:
|
57
|
+
name: name.into(),
|
58
58
|
offset,
|
59
59
|
});
|
60
60
|
let quote_char = if let Some(s) = quote_char {
|
@@ -72,7 +72,7 @@ impl RbBatchedCsv {
|
|
72
72
|
.iter()
|
73
73
|
.map(|(name, dtype)| {
|
74
74
|
let dtype = dtype.0.clone();
|
75
|
-
Field::new(name, dtype)
|
75
|
+
Field::new((&**name).into(), dtype)
|
76
76
|
})
|
77
77
|
.collect::<Schema>()
|
78
78
|
});
|
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
|
|
95
95
|
.with_projection(projection.map(Arc::new))
|
96
96
|
.with_rechunk(rechunk)
|
97
97
|
.with_chunk_size(chunk_size)
|
98
|
-
.with_columns(columns.map(
|
98
|
+
.with_columns(columns.map(|x| x.into_iter().map(PlSmallStr::from_string).collect()))
|
99
99
|
.with_n_threads(n_threads)
|
100
100
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
101
|
.with_low_memory(low_memory)
|
@@ -51,7 +51,10 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
51
51
|
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
52
|
let time_unit = time_unit.to_ascii();
|
53
53
|
utils()
|
54
|
-
.funcall(
|
54
|
+
.funcall(
|
55
|
+
"_to_ruby_datetime",
|
56
|
+
(v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
|
57
|
+
)
|
55
58
|
.unwrap()
|
56
59
|
}
|
57
60
|
AnyValue::Duration(v, time_unit) => {
|
@@ -122,7 +125,10 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
122
125
|
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
126
|
let v = RArray::from_value(ob).unwrap();
|
124
127
|
if v.is_empty() {
|
125
|
-
Ok(AnyValue::List(Series::new_empty(
|
128
|
+
Ok(AnyValue::List(Series::new_empty(
|
129
|
+
PlSmallStr::EMPTY,
|
130
|
+
&DataType::Null,
|
131
|
+
)))
|
126
132
|
} else {
|
127
133
|
let list = v;
|
128
134
|
|
@@ -142,7 +148,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
142
148
|
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
149
|
}
|
144
150
|
|
145
|
-
let s = Series::from_any_values_and_dtype(
|
151
|
+
let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, true)
|
146
152
|
.map_err(RbPolarsErr::from)?;
|
147
153
|
Ok(AnyValue::List(s))
|
148
154
|
}
|
@@ -162,7 +168,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
162
168
|
let key = String::try_convert(k)?;
|
163
169
|
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
170
|
let dtype = DataType::from(&val);
|
165
|
-
keys.push(Field::new(
|
171
|
+
keys.push(Field::new(key.into(), dtype));
|
166
172
|
vals.push(val);
|
167
173
|
Ok(ForEach::Continue)
|
168
174
|
})?;
|
@@ -9,7 +9,7 @@ use crate::RbResult;
|
|
9
9
|
impl TryConvert for Wrap<StringChunked> {
|
10
10
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
|
-
let mut builder = StringChunkedBuilder::new(
|
12
|
+
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, len);
|
13
13
|
|
14
14
|
for res in seq.into_iter() {
|
15
15
|
let item = res;
|
@@ -25,7 +25,7 @@ impl TryConvert for Wrap<StringChunked> {
|
|
25
25
|
impl TryConvert for Wrap<BinaryChunked> {
|
26
26
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
|
-
let mut builder = BinaryChunkedBuilder::new(
|
28
|
+
let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, len);
|
29
29
|
|
30
30
|
for res in seq.into_iter() {
|
31
31
|
let item = res;
|
@@ -90,7 +90,7 @@ impl IntoValue for Wrap<&DatetimeChunked> {
|
|
90
90
|
fn into_value_with(self, _: &Ruby) -> Value {
|
91
91
|
let utils = utils();
|
92
92
|
let time_unit = Wrap(self.0.time_unit()).into_value();
|
93
|
-
let time_zone = self.0.time_zone().
|
93
|
+
let time_zone = self.0.time_zone().as_deref().map(|v| v.into_value());
|
94
94
|
let iter = self.0.into_iter().map(|opt_v| {
|
95
95
|
opt_v.map(|v| {
|
96
96
|
utils
|
@@ -20,7 +20,6 @@ use polars::series::ops::NullBehavior;
|
|
20
20
|
use polars_core::utils::arrow::array::Array;
|
21
21
|
use polars_core::utils::materialize_dyn_int;
|
22
22
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
|
-
use smartstring::alias::String as SmartString;
|
24
23
|
|
25
24
|
use crate::object::OBJECT_NAME;
|
26
25
|
use crate::rb_modules::series;
|
@@ -84,14 +83,26 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
|
|
84
83
|
.unwrap()
|
85
84
|
}
|
86
85
|
|
86
|
+
impl TryConvert for Wrap<PlSmallStr> {
|
87
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
88
|
+
Ok(Wrap((&*String::try_convert(ob)?).into()))
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
87
92
|
impl TryConvert for Wrap<NullValues> {
|
88
93
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
89
94
|
if let Ok(s) = String::try_convert(ob) {
|
90
|
-
Ok(Wrap(NullValues::AllColumnsSingle(s)))
|
95
|
+
Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
|
91
96
|
} else if let Ok(s) = Vec::<String>::try_convert(ob) {
|
92
|
-
Ok(Wrap(NullValues::AllColumns(
|
97
|
+
Ok(Wrap(NullValues::AllColumns(
|
98
|
+
s.into_iter().map(|x| (&*x).into()).collect(),
|
99
|
+
)))
|
93
100
|
} else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
|
94
|
-
Ok(Wrap(NullValues::Named(
|
101
|
+
Ok(Wrap(NullValues::Named(
|
102
|
+
s.into_iter()
|
103
|
+
.map(|(a, b)| ((&*a).into(), (&*b).into()))
|
104
|
+
.collect(),
|
105
|
+
)))
|
95
106
|
} else {
|
96
107
|
Err(RbPolarsErr::other(
|
97
108
|
"could not extract value from null_values argument".into(),
|
@@ -189,7 +200,7 @@ impl IntoValue for Wrap<DataType> {
|
|
189
200
|
DataType::Datetime(tu, tz) => {
|
190
201
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
191
202
|
datetime_class
|
192
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
203
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
|
193
204
|
.unwrap()
|
194
205
|
}
|
195
206
|
DataType::Duration(tu) => {
|
@@ -210,7 +221,9 @@ impl IntoValue for Wrap<DataType> {
|
|
210
221
|
// we should always have an initialized rev_map coming from rust
|
211
222
|
let categories = rev_map.as_ref().unwrap().get_categories();
|
212
223
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
213
|
-
let s =
|
224
|
+
let s =
|
225
|
+
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
226
|
+
.unwrap();
|
214
227
|
let series = to_series(s.into());
|
215
228
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
216
229
|
}
|
@@ -222,7 +235,7 @@ impl IntoValue for Wrap<DataType> {
|
|
222
235
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
223
236
|
let iter = fields.iter().map(|fld| {
|
224
237
|
let name = fld.name().as_str();
|
225
|
-
let dtype = Wrap(fld.
|
238
|
+
let dtype = Wrap(fld.dtype().clone());
|
226
239
|
field_class
|
227
240
|
.funcall::<_, _, Value>("new", (name, dtype))
|
228
241
|
.unwrap()
|
@@ -276,7 +289,7 @@ impl TryConvert for Wrap<Field> {
|
|
276
289
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
277
290
|
let name: String = ob.funcall("name", ())?;
|
278
291
|
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
279
|
-
Ok(Wrap(Field::new(
|
292
|
+
Ok(Wrap(Field::new((&*name).into(), dtype.0)))
|
280
293
|
}
|
281
294
|
}
|
282
295
|
|
@@ -341,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
|
|
341
354
|
let s = get_series(categories)?;
|
342
355
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
343
356
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
344
|
-
|
357
|
+
create_enum_dtype(categories)
|
345
358
|
}
|
346
359
|
"Polars::Date" => DataType::Date,
|
347
360
|
"Polars::Time" => DataType::Time,
|
@@ -357,8 +370,8 @@ impl TryConvert for Wrap<DataType> {
|
|
357
370
|
"Polars::Datetime" => {
|
358
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
359
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
360
|
-
let time_zone = ob.funcall("time_zone", ())?;
|
361
|
-
DataType::Datetime(time_unit, time_zone)
|
373
|
+
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
374
|
+
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
362
375
|
}
|
363
376
|
"Polars::Decimal" => {
|
364
377
|
let precision = ob.funcall("precision", ())?;
|
@@ -463,7 +476,7 @@ impl TryConvert for Wrap<Schema> {
|
|
463
476
|
|
464
477
|
let mut schema = Vec::new();
|
465
478
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
466
|
-
schema.push(Ok(Field::new(
|
479
|
+
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
467
480
|
Ok(ForEach::Continue)
|
468
481
|
})
|
469
482
|
.unwrap();
|
@@ -1053,14 +1066,6 @@ pub fn parse_parquet_compression(
|
|
1053
1066
|
Ok(parsed)
|
1054
1067
|
}
|
1055
1068
|
|
1056
|
-
pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
|
1057
|
-
where
|
1058
|
-
I: IntoIterator<Item = S>,
|
1059
|
-
S: AsRef<str>,
|
1060
|
-
{
|
1061
|
-
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1062
|
-
}
|
1063
|
-
|
1064
1069
|
impl TryConvert for Wrap<NonZeroUsize> {
|
1065
1070
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
1071
|
let v = usize::try_convert(ob)?;
|
@@ -1069,3 +1074,14 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1069
1074
|
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1070
1075
|
}
|
1071
1076
|
}
|
1077
|
+
|
1078
|
+
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
|
1079
|
+
where
|
1080
|
+
I: IntoIterator<Item = S>,
|
1081
|
+
S: AsRef<str>,
|
1082
|
+
{
|
1083
|
+
container
|
1084
|
+
.into_iter()
|
1085
|
+
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1086
|
+
.collect()
|
1087
|
+
}
|
@@ -79,7 +79,7 @@ fn update_schema_from_rows(
|
|
79
79
|
rows: &[Row],
|
80
80
|
infer_schema_length: Option<usize>,
|
81
81
|
) -> RbResult<()> {
|
82
|
-
let schema_is_complete = schema.
|
82
|
+
let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
|
83
83
|
if schema_is_complete {
|
84
84
|
return Ok(());
|
85
85
|
}
|
@@ -89,7 +89,7 @@ fn update_schema_from_rows(
|
|
89
89
|
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
90
|
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
91
|
|
92
|
-
for (i, dtype) in schema.
|
92
|
+
for (i, dtype) in schema.iter_values_mut().enumerate() {
|
93
93
|
if !dtype.is_known() {
|
94
94
|
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
95
|
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
@@ -110,7 +110,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
110
110
|
}
|
111
111
|
|
112
112
|
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
-
for dtype in schema.
|
113
|
+
for dtype in schema.iter_values_mut() {
|
114
114
|
if let DataType::Decimal(_, _) = dtype {
|
115
115
|
*dtype = DataType::Decimal(None, None)
|
116
116
|
}
|
@@ -123,7 +123,7 @@ where
|
|
123
123
|
{
|
124
124
|
let fields = column_names
|
125
125
|
.into_iter()
|
126
|
-
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
126
|
+
.map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
|
127
127
|
Schema::from_iter(fields)
|
128
128
|
}
|
129
129
|
|
@@ -9,6 +9,7 @@ use crate::map::dataframe::{
|
|
9
9
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
10
10
|
apply_lambda_with_utf8_out_type,
|
11
11
|
};
|
12
|
+
use crate::prelude::strings_to_pl_smallstr;
|
12
13
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
13
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
14
15
|
|
@@ -254,7 +255,7 @@ impl RbDataFrame {
|
|
254
255
|
}
|
255
256
|
|
256
257
|
pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
257
|
-
let indices = IdxCa::from_vec("", indices);
|
258
|
+
let indices = IdxCa::from_vec("".into(), indices);
|
258
259
|
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
259
260
|
Ok(RbDataFrame::new(df))
|
260
261
|
}
|
@@ -332,7 +333,7 @@ impl RbDataFrame {
|
|
332
333
|
let df = self
|
333
334
|
.df
|
334
335
|
.borrow()
|
335
|
-
.with_row_index(
|
336
|
+
.with_row_index(name.into(), offset)
|
336
337
|
.map_err(RbPolarsErr::from)?;
|
337
338
|
Ok(df.into())
|
338
339
|
}
|
@@ -349,8 +350,8 @@ impl RbDataFrame {
|
|
349
350
|
variable_name: Option<String>,
|
350
351
|
) -> RbResult<Self> {
|
351
352
|
let args = UnpivotArgsIR {
|
352
|
-
on:
|
353
|
-
index:
|
353
|
+
on: strings_to_pl_smallstr(on),
|
354
|
+
index: strings_to_pl_smallstr(index),
|
354
355
|
value_name: value_name.map(|s| s.into()),
|
355
356
|
variable_name: variable_name.map(|s| s.into()),
|
356
357
|
};
|
@@ -581,7 +582,7 @@ impl RbDataFrame {
|
|
581
582
|
}
|
582
583
|
|
583
584
|
pub fn to_struct(&self, name: String) -> RbSeries {
|
584
|
-
let s = self.df.borrow().clone().into_struct(
|
585
|
+
let s = self.df.borrow().clone().into_struct(name.into());
|
585
586
|
s.into_series().into()
|
586
587
|
}
|
587
588
|
|
@@ -50,7 +50,7 @@ impl RbDataFrame {
|
|
50
50
|
let null_values = null_values.map(|w| w.0);
|
51
51
|
let eol_char = eol_char.as_bytes()[0];
|
52
52
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
53
|
-
name:
|
53
|
+
name: name.into(),
|
54
54
|
offset,
|
55
55
|
});
|
56
56
|
let quote_char = if let Some(s) = quote_char {
|
@@ -68,7 +68,7 @@ impl RbDataFrame {
|
|
68
68
|
.iter()
|
69
69
|
.map(|(name, dtype)| {
|
70
70
|
let dtype = dtype.0.clone();
|
71
|
-
Field::new(name, dtype)
|
71
|
+
Field::new((&**name).into(), dtype)
|
72
72
|
})
|
73
73
|
.collect::<Schema>()
|
74
74
|
});
|
@@ -91,7 +91,7 @@ impl RbDataFrame {
|
|
91
91
|
.with_projection(projection.map(Arc::new))
|
92
92
|
.with_rechunk(rechunk)
|
93
93
|
.with_chunk_size(chunk_size)
|
94
|
-
.with_columns(columns.map(
|
94
|
+
.with_columns(columns.map(|x| x.into_iter().map(|x| x.into()).collect()))
|
95
95
|
.with_n_threads(n_threads)
|
96
96
|
.with_schema_overwrite(overwrite_dtype.map(Arc::new))
|
97
97
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
@@ -135,7 +135,7 @@ impl RbDataFrame {
|
|
135
135
|
use EitherRustRubyFile::*;
|
136
136
|
|
137
137
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
138
|
-
name:
|
138
|
+
name: name.into(),
|
139
139
|
offset,
|
140
140
|
});
|
141
141
|
let result = match get_either_file(rb_f, false)? {
|
@@ -225,7 +225,7 @@ impl RbDataFrame {
|
|
225
225
|
_memory_map: bool,
|
226
226
|
) -> RbResult<Self> {
|
227
227
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
228
|
-
name:
|
228
|
+
name: name.into(),
|
229
229
|
offset,
|
230
230
|
});
|
231
231
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -252,7 +252,7 @@ impl RbDataFrame {
|
|
252
252
|
rechunk: bool,
|
253
253
|
) -> RbResult<Self> {
|
254
254
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
255
|
-
name:
|
255
|
+
name: name.into(),
|
256
256
|
offset,
|
257
257
|
});
|
258
258
|
// rb_f = read_if_bytesio(rb_f);
|
@@ -29,8 +29,12 @@ impl RbExpr {
|
|
29
29
|
self.inner.clone().dt().with_time_unit(tu.0).into()
|
30
30
|
}
|
31
31
|
|
32
|
-
pub fn dt_convert_time_zone(&self,
|
33
|
-
self.inner
|
32
|
+
pub fn dt_convert_time_zone(&self, time_zone: String) -> Self {
|
33
|
+
self.inner
|
34
|
+
.clone()
|
35
|
+
.dt()
|
36
|
+
.convert_time_zone(time_zone.into())
|
37
|
+
.into()
|
34
38
|
}
|
35
39
|
|
36
40
|
pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
|
@@ -46,7 +50,11 @@ impl RbExpr {
|
|
46
50
|
self.inner
|
47
51
|
.clone()
|
48
52
|
.dt()
|
49
|
-
.replace_time_zone(
|
53
|
+
.replace_time_zone(
|
54
|
+
time_zone.map(|x| x.into()),
|
55
|
+
ambiguous.inner.clone(),
|
56
|
+
non_existent.0,
|
57
|
+
)
|
50
58
|
.into()
|
51
59
|
}
|
52
60
|
|
@@ -242,7 +242,7 @@ impl RbExpr {
|
|
242
242
|
pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
|
243
243
|
self.inner
|
244
244
|
.clone()
|
245
|
-
.value_counts(sort, parallel, name, normalize)
|
245
|
+
.value_counts(sort, parallel, name.as_str(), normalize)
|
246
246
|
.into()
|
247
247
|
}
|
248
248
|
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{block::Proc, value::Opaque, Ruby};
|
2
2
|
use polars::prelude::*;
|
3
|
+
use polars_utils::format_pl_smallstr;
|
3
4
|
|
4
5
|
use crate::RbExpr;
|
5
6
|
|
@@ -15,9 +16,9 @@ impl RbExpr {
|
|
15
16
|
.name()
|
16
17
|
.map(move |name| {
|
17
18
|
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
18
|
-
let out = lambda.call::<_, String>((name,));
|
19
|
+
let out = lambda.call::<_, String>((name.as_str(),));
|
19
20
|
match out {
|
20
|
-
Ok(out) => Ok(out),
|
21
|
+
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
21
22
|
Err(e) => Err(PolarsError::ComputeError(
|
22
23
|
format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
|
23
24
|
)),
|
@@ -19,6 +19,8 @@ impl RbExpr {
|
|
19
19
|
exact: bool,
|
20
20
|
cache: bool,
|
21
21
|
) -> Self {
|
22
|
+
let format = format.map(|x| x.into());
|
23
|
+
|
22
24
|
let options = StrptimeOptions {
|
23
25
|
format,
|
24
26
|
strict,
|
@@ -33,12 +35,15 @@ impl RbExpr {
|
|
33
35
|
&self,
|
34
36
|
format: Option<String>,
|
35
37
|
time_unit: Option<Wrap<TimeUnit>>,
|
36
|
-
time_zone: Option<TimeZone
|
38
|
+
time_zone: Option<Wrap<TimeZone>>,
|
37
39
|
strict: bool,
|
38
40
|
exact: bool,
|
39
41
|
cache: bool,
|
40
42
|
ambiguous: &Self,
|
41
43
|
) -> Self {
|
44
|
+
let format = format.map(|x| x.into());
|
45
|
+
let time_zone = time_zone.map(|x| x.0);
|
46
|
+
|
42
47
|
let options = StrptimeOptions {
|
43
48
|
format,
|
44
49
|
strict,
|
@@ -58,6 +63,8 @@ impl RbExpr {
|
|
58
63
|
}
|
59
64
|
|
60
65
|
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
66
|
+
let format = format.map(|x| x.into());
|
67
|
+
|
61
68
|
let options = StrptimeOptions {
|
62
69
|
format,
|
63
70
|
strict,
|
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
11
11
|
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
16
|
-
dict.aset(field.name.
|
14
|
+
for field in metadata.schema.iter_values() {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
+
dict.aset(field.name.as_str(), dt)?;
|
17
17
|
}
|
18
18
|
Ok(dict)
|
19
19
|
}
|
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
26
26
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
|
-
for field in arrow_schema.
|
30
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
31
|
-
dict.aset(field.name, dt)?;
|
29
|
+
for field in arrow_schema.iter_values() {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
31
|
+
dict.aset(field.name.as_str(), dt)?;
|
32
32
|
}
|
33
33
|
Ok(dict)
|
34
34
|
}
|
@@ -56,13 +56,14 @@ pub fn datetime_range(
|
|
56
56
|
every: String,
|
57
57
|
closed: Wrap<ClosedWindow>,
|
58
58
|
time_unit: Option<Wrap<TimeUnit>>,
|
59
|
-
time_zone: Option<TimeZone
|
59
|
+
time_zone: Option<Wrap<TimeZone>>,
|
60
60
|
) -> RbExpr {
|
61
61
|
let start = start.inner.clone();
|
62
62
|
let end = end.inner.clone();
|
63
63
|
let every = Duration::parse(&every);
|
64
64
|
let closed = closed.0;
|
65
65
|
let time_unit = time_unit.map(|x| x.0);
|
66
|
+
let time_zone = time_zone.map(|x| x.0);
|
66
67
|
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
67
68
|
}
|
68
69
|
|
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
|
|
72
73
|
every: String,
|
73
74
|
closed: Wrap<ClosedWindow>,
|
74
75
|
time_unit: Option<Wrap<TimeUnit>>,
|
75
|
-
time_zone: Option<TimeZone
|
76
|
+
time_zone: Option<Wrap<TimeZone>>,
|
76
77
|
) -> RbExpr {
|
77
78
|
let start = start.inner.clone();
|
78
79
|
let end = end.inner.clone();
|
79
80
|
let every = Duration::parse(&every);
|
80
81
|
let closed = closed.0;
|
81
82
|
let time_unit = time_unit.map(|x| x.0);
|
83
|
+
let time_zone = time_zone.map(|x| x.0);
|
82
84
|
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
83
85
|
}
|
84
86
|
|
@@ -59,7 +59,7 @@ impl RbLazyFrame {
|
|
59
59
|
) -> RbResult<Self> {
|
60
60
|
let batch_size = batch_size.map(|v| v.0);
|
61
61
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
62
|
-
name:
|
62
|
+
name: name.into(),
|
63
63
|
offset,
|
64
64
|
});
|
65
65
|
|
@@ -106,14 +106,14 @@ impl RbLazyFrame {
|
|
106
106
|
let separator = separator.as_bytes()[0];
|
107
107
|
let eol_char = eol_char.as_bytes()[0];
|
108
108
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
109
|
-
name:
|
109
|
+
name: name.into(),
|
110
110
|
offset,
|
111
111
|
});
|
112
112
|
|
113
113
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
114
114
|
overwrite_dtype
|
115
115
|
.into_iter()
|
116
|
-
.map(|(name, dtype)| Field::new(
|
116
|
+
.map(|(name, dtype)| Field::new((&*name).into(), dtype.0))
|
117
117
|
.collect::<Schema>()
|
118
118
|
});
|
119
119
|
|
@@ -128,7 +128,7 @@ impl RbLazyFrame {
|
|
128
128
|
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
129
129
|
// TODO add with_schema
|
130
130
|
.with_low_memory(low_memory)
|
131
|
-
.with_comment_prefix(comment_prefix.
|
131
|
+
.with_comment_prefix(comment_prefix.map(|x| x.into()))
|
132
132
|
.with_quote_char(quote_char)
|
133
133
|
.with_eol_char(eol_char)
|
134
134
|
.with_rechunk(rechunk)
|
@@ -176,7 +176,7 @@ impl RbLazyFrame {
|
|
176
176
|
};
|
177
177
|
|
178
178
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
179
|
-
name:
|
179
|
+
name: name.into(),
|
180
180
|
offset,
|
181
181
|
});
|
182
182
|
let hive_options = HiveOptions {
|
@@ -197,7 +197,7 @@ impl RbLazyFrame {
|
|
197
197
|
use_statistics,
|
198
198
|
hive_options,
|
199
199
|
glob,
|
200
|
-
include_file_paths: include_file_paths.map(
|
200
|
+
include_file_paths: include_file_paths.map(|x| x.into()),
|
201
201
|
};
|
202
202
|
|
203
203
|
let lf = if path.is_some() {
|
@@ -216,14 +216,13 @@ impl RbLazyFrame {
|
|
216
216
|
cache: bool,
|
217
217
|
rechunk: bool,
|
218
218
|
row_index: Option<(String, IdxSize)>,
|
219
|
-
memory_map: bool,
|
220
219
|
hive_partitioning: Option<bool>,
|
221
220
|
hive_schema: Option<Wrap<Schema>>,
|
222
221
|
try_parse_hive_dates: bool,
|
223
222
|
include_file_paths: Option<String>,
|
224
223
|
) -> RbResult<Self> {
|
225
224
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
226
|
-
name:
|
225
|
+
name: name.into(),
|
227
226
|
offset,
|
228
227
|
});
|
229
228
|
|
@@ -239,10 +238,9 @@ impl RbLazyFrame {
|
|
239
238
|
cache,
|
240
239
|
rechunk,
|
241
240
|
row_index,
|
242
|
-
memory_map,
|
243
241
|
cloud_options: None,
|
244
242
|
hive_options,
|
245
|
-
include_file_paths: include_file_paths.map(
|
243
|
+
include_file_paths: include_file_paths.map(|x| x.into()),
|
246
244
|
};
|
247
245
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
248
246
|
Ok(lf.into())
|
@@ -593,8 +591,8 @@ impl RbLazyFrame {
|
|
593
591
|
.force_parallel(force_parallel)
|
594
592
|
.how(JoinType::AsOf(AsOfOptions {
|
595
593
|
strategy: strategy.0,
|
596
|
-
left_by: left_by.map(
|
597
|
-
right_by: right_by.map(
|
594
|
+
left_by: left_by.map(strings_to_pl_smallstr),
|
595
|
+
right_by: right_by.map(strings_to_pl_smallstr),
|
598
596
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
599
597
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
600
598
|
}))
|
@@ -744,8 +742,8 @@ impl RbLazyFrame {
|
|
744
742
|
) -> RbResult<Self> {
|
745
743
|
let ldf = self.ldf.borrow().clone();
|
746
744
|
Ok(match maintain_order {
|
747
|
-
true => ldf.
|
748
|
-
false => ldf.
|
745
|
+
true => ldf.unique_stable_generic(subset, keep.0),
|
746
|
+
false => ldf.unique_generic(subset, keep.0),
|
749
747
|
}
|
750
748
|
.into())
|
751
749
|
}
|
@@ -805,7 +803,11 @@ impl RbLazyFrame {
|
|
805
803
|
}
|
806
804
|
|
807
805
|
pub fn collect_schema(&self) -> RbResult<RHash> {
|
808
|
-
let schema = self
|
806
|
+
let schema = self
|
807
|
+
.ldf
|
808
|
+
.borrow_mut()
|
809
|
+
.collect_schema()
|
810
|
+
.map_err(RbPolarsErr::from)?;
|
809
811
|
|
810
812
|
let schema_dict = RHash::new();
|
811
813
|
schema.iter_fields().for_each(|fld| {
|
@@ -813,7 +815,7 @@ impl RbLazyFrame {
|
|
813
815
|
schema_dict
|
814
816
|
.aset::<String, Value>(
|
815
817
|
fld.name().to_string(),
|
816
|
-
Wrap(fld.
|
818
|
+
Wrap(fld.dtype().clone()).into_value(),
|
817
819
|
)
|
818
820
|
.unwrap();
|
819
821
|
});
|
data/ext/polars/src/lib.rs
CHANGED
@@ -711,7 +711,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
711
711
|
"new_from_parquet",
|
712
712
|
function!(RbLazyFrame::new_from_parquet, 14),
|
713
713
|
)?;
|
714
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
714
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
|
715
715
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
716
716
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
717
717
|
class.define_method(
|