polars-df 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +177 -141
- data/ext/polars/Cargo.toml +5 -6
- data/ext/polars/src/batched_csv.rs +3 -3
- data/ext/polars/src/conversion/any_value.rs +10 -4
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +36 -20
- data/ext/polars/src/dataframe/construction.rs +4 -4
- data/ext/polars/src/dataframe/general.rs +6 -5
- data/ext/polars/src/dataframe/io.rs +6 -6
- data/ext/polars/src/expr/datetime.rs +11 -3
- data/ext/polars/src/expr/general.rs +1 -1
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/string.rs +8 -1
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/lazyframe/mod.rs +18 -16
- data/ext/polars/src/lib.rs +1 -1
- data/ext/polars/src/map/dataframe.rs +36 -8
- data/ext/polars/src/map/mod.rs +8 -8
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/construction.rs +50 -23
- data/ext/polars/src/series/mod.rs +4 -4
- data/lib/polars/data_frame.rb +10 -10
- data/lib/polars/expr.rb +6 -6
- data/lib/polars/io/ipc.rb +0 -8
- data/lib/polars/series.rb +5 -5
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.14.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -15,14 +15,13 @@ ahash = "0.8"
|
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
17
|
magnus = "0.7"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
18
|
+
polars-core = "=0.43.1"
|
19
|
+
polars-parquet = "=0.43.1"
|
20
|
+
polars-utils = "=0.43.1"
|
21
21
|
serde_json = "1"
|
22
|
-
smartstring = "1"
|
23
22
|
|
24
23
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
24
|
+
version = "=0.43.1"
|
26
25
|
features = [
|
27
26
|
"abs",
|
28
27
|
"approx_unique",
|
@@ -54,7 +54,7 @@ impl RbBatchedCsv {
|
|
54
54
|
let null_values = null_values.map(|w| w.0);
|
55
55
|
let eol_char = eol_char.as_bytes()[0];
|
56
56
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
57
|
-
name:
|
57
|
+
name: name.into(),
|
58
58
|
offset,
|
59
59
|
});
|
60
60
|
let quote_char = if let Some(s) = quote_char {
|
@@ -72,7 +72,7 @@ impl RbBatchedCsv {
|
|
72
72
|
.iter()
|
73
73
|
.map(|(name, dtype)| {
|
74
74
|
let dtype = dtype.0.clone();
|
75
|
-
Field::new(name, dtype)
|
75
|
+
Field::new((&**name).into(), dtype)
|
76
76
|
})
|
77
77
|
.collect::<Schema>()
|
78
78
|
});
|
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
|
|
95
95
|
.with_projection(projection.map(Arc::new))
|
96
96
|
.with_rechunk(rechunk)
|
97
97
|
.with_chunk_size(chunk_size)
|
98
|
-
.with_columns(columns.map(
|
98
|
+
.with_columns(columns.map(|x| x.into_iter().map(PlSmallStr::from_string).collect()))
|
99
99
|
.with_n_threads(n_threads)
|
100
100
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
101
|
.with_low_memory(low_memory)
|
@@ -51,7 +51,10 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
51
51
|
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
52
|
let time_unit = time_unit.to_ascii();
|
53
53
|
utils()
|
54
|
-
.funcall(
|
54
|
+
.funcall(
|
55
|
+
"_to_ruby_datetime",
|
56
|
+
(v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
|
57
|
+
)
|
55
58
|
.unwrap()
|
56
59
|
}
|
57
60
|
AnyValue::Duration(v, time_unit) => {
|
@@ -122,7 +125,10 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
122
125
|
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
126
|
let v = RArray::from_value(ob).unwrap();
|
124
127
|
if v.is_empty() {
|
125
|
-
Ok(AnyValue::List(Series::new_empty(
|
128
|
+
Ok(AnyValue::List(Series::new_empty(
|
129
|
+
PlSmallStr::EMPTY,
|
130
|
+
&DataType::Null,
|
131
|
+
)))
|
126
132
|
} else {
|
127
133
|
let list = v;
|
128
134
|
|
@@ -142,7 +148,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
142
148
|
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
149
|
}
|
144
150
|
|
145
|
-
let s = Series::from_any_values_and_dtype(
|
151
|
+
let s = Series::from_any_values_and_dtype(PlSmallStr::EMPTY, &avs, &dtype, true)
|
146
152
|
.map_err(RbPolarsErr::from)?;
|
147
153
|
Ok(AnyValue::List(s))
|
148
154
|
}
|
@@ -162,7 +168,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
162
168
|
let key = String::try_convert(k)?;
|
163
169
|
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
170
|
let dtype = DataType::from(&val);
|
165
|
-
keys.push(Field::new(
|
171
|
+
keys.push(Field::new(key.into(), dtype));
|
166
172
|
vals.push(val);
|
167
173
|
Ok(ForEach::Continue)
|
168
174
|
})?;
|
@@ -9,7 +9,7 @@ use crate::RbResult;
|
|
9
9
|
impl TryConvert for Wrap<StringChunked> {
|
10
10
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
|
-
let mut builder = StringChunkedBuilder::new(
|
12
|
+
let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, len);
|
13
13
|
|
14
14
|
for res in seq.into_iter() {
|
15
15
|
let item = res;
|
@@ -25,7 +25,7 @@ impl TryConvert for Wrap<StringChunked> {
|
|
25
25
|
impl TryConvert for Wrap<BinaryChunked> {
|
26
26
|
fn try_convert(obj: Value) -> RbResult<Self> {
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
|
-
let mut builder = BinaryChunkedBuilder::new(
|
28
|
+
let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, len);
|
29
29
|
|
30
30
|
for res in seq.into_iter() {
|
31
31
|
let item = res;
|
@@ -90,7 +90,7 @@ impl IntoValue for Wrap<&DatetimeChunked> {
|
|
90
90
|
fn into_value_with(self, _: &Ruby) -> Value {
|
91
91
|
let utils = utils();
|
92
92
|
let time_unit = Wrap(self.0.time_unit()).into_value();
|
93
|
-
let time_zone = self.0.time_zone().
|
93
|
+
let time_zone = self.0.time_zone().as_deref().map(|v| v.into_value());
|
94
94
|
let iter = self.0.into_iter().map(|opt_v| {
|
95
95
|
opt_v.map(|v| {
|
96
96
|
utils
|
@@ -20,7 +20,6 @@ use polars::series::ops::NullBehavior;
|
|
20
20
|
use polars_core::utils::arrow::array::Array;
|
21
21
|
use polars_core::utils::materialize_dyn_int;
|
22
22
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
|
-
use smartstring::alias::String as SmartString;
|
24
23
|
|
25
24
|
use crate::object::OBJECT_NAME;
|
26
25
|
use crate::rb_modules::series;
|
@@ -84,14 +83,26 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
|
|
84
83
|
.unwrap()
|
85
84
|
}
|
86
85
|
|
86
|
+
impl TryConvert for Wrap<PlSmallStr> {
|
87
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
88
|
+
Ok(Wrap((&*String::try_convert(ob)?).into()))
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
87
92
|
impl TryConvert for Wrap<NullValues> {
|
88
93
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
89
94
|
if let Ok(s) = String::try_convert(ob) {
|
90
|
-
Ok(Wrap(NullValues::AllColumnsSingle(s)))
|
95
|
+
Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
|
91
96
|
} else if let Ok(s) = Vec::<String>::try_convert(ob) {
|
92
|
-
Ok(Wrap(NullValues::AllColumns(
|
97
|
+
Ok(Wrap(NullValues::AllColumns(
|
98
|
+
s.into_iter().map(|x| (&*x).into()).collect(),
|
99
|
+
)))
|
93
100
|
} else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
|
94
|
-
Ok(Wrap(NullValues::Named(
|
101
|
+
Ok(Wrap(NullValues::Named(
|
102
|
+
s.into_iter()
|
103
|
+
.map(|(a, b)| ((&*a).into(), (&*b).into()))
|
104
|
+
.collect(),
|
105
|
+
)))
|
95
106
|
} else {
|
96
107
|
Err(RbPolarsErr::other(
|
97
108
|
"could not extract value from null_values argument".into(),
|
@@ -189,7 +200,7 @@ impl IntoValue for Wrap<DataType> {
|
|
189
200
|
DataType::Datetime(tu, tz) => {
|
190
201
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
191
202
|
datetime_class
|
192
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
203
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
|
193
204
|
.unwrap()
|
194
205
|
}
|
195
206
|
DataType::Duration(tu) => {
|
@@ -210,7 +221,9 @@ impl IntoValue for Wrap<DataType> {
|
|
210
221
|
// we should always have an initialized rev_map coming from rust
|
211
222
|
let categories = rev_map.as_ref().unwrap().get_categories();
|
212
223
|
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
213
|
-
let s =
|
224
|
+
let s =
|
225
|
+
Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
|
226
|
+
.unwrap();
|
214
227
|
let series = to_series(s.into());
|
215
228
|
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
216
229
|
}
|
@@ -222,7 +235,7 @@ impl IntoValue for Wrap<DataType> {
|
|
222
235
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
223
236
|
let iter = fields.iter().map(|fld| {
|
224
237
|
let name = fld.name().as_str();
|
225
|
-
let dtype = Wrap(fld.
|
238
|
+
let dtype = Wrap(fld.dtype().clone());
|
226
239
|
field_class
|
227
240
|
.funcall::<_, _, Value>("new", (name, dtype))
|
228
241
|
.unwrap()
|
@@ -276,7 +289,7 @@ impl TryConvert for Wrap<Field> {
|
|
276
289
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
277
290
|
let name: String = ob.funcall("name", ())?;
|
278
291
|
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
279
|
-
Ok(Wrap(Field::new(
|
292
|
+
Ok(Wrap(Field::new((&*name).into(), dtype.0)))
|
280
293
|
}
|
281
294
|
}
|
282
295
|
|
@@ -341,7 +354,7 @@ impl TryConvert for Wrap<DataType> {
|
|
341
354
|
let s = get_series(categories)?;
|
342
355
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
343
356
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
344
|
-
|
357
|
+
create_enum_dtype(categories)
|
345
358
|
}
|
346
359
|
"Polars::Date" => DataType::Date,
|
347
360
|
"Polars::Time" => DataType::Time,
|
@@ -357,8 +370,8 @@ impl TryConvert for Wrap<DataType> {
|
|
357
370
|
"Polars::Datetime" => {
|
358
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
359
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
360
|
-
let time_zone = ob.funcall("time_zone", ())?;
|
361
|
-
DataType::Datetime(time_unit, time_zone)
|
373
|
+
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
374
|
+
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
362
375
|
}
|
363
376
|
"Polars::Decimal" => {
|
364
377
|
let precision = ob.funcall("precision", ())?;
|
@@ -463,7 +476,7 @@ impl TryConvert for Wrap<Schema> {
|
|
463
476
|
|
464
477
|
let mut schema = Vec::new();
|
465
478
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
466
|
-
schema.push(Ok(Field::new(
|
479
|
+
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
467
480
|
Ok(ForEach::Continue)
|
468
481
|
})
|
469
482
|
.unwrap();
|
@@ -1053,14 +1066,6 @@ pub fn parse_parquet_compression(
|
|
1053
1066
|
Ok(parsed)
|
1054
1067
|
}
|
1055
1068
|
|
1056
|
-
pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
|
1057
|
-
where
|
1058
|
-
I: IntoIterator<Item = S>,
|
1059
|
-
S: AsRef<str>,
|
1060
|
-
{
|
1061
|
-
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1062
|
-
}
|
1063
|
-
|
1064
1069
|
impl TryConvert for Wrap<NonZeroUsize> {
|
1065
1070
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
1071
|
let v = usize::try_convert(ob)?;
|
@@ -1069,3 +1074,14 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1069
1074
|
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1070
1075
|
}
|
1071
1076
|
}
|
1077
|
+
|
1078
|
+
pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
|
1079
|
+
where
|
1080
|
+
I: IntoIterator<Item = S>,
|
1081
|
+
S: AsRef<str>,
|
1082
|
+
{
|
1083
|
+
container
|
1084
|
+
.into_iter()
|
1085
|
+
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1086
|
+
.collect()
|
1087
|
+
}
|
@@ -79,7 +79,7 @@ fn update_schema_from_rows(
|
|
79
79
|
rows: &[Row],
|
80
80
|
infer_schema_length: Option<usize>,
|
81
81
|
) -> RbResult<()> {
|
82
|
-
let schema_is_complete = schema.
|
82
|
+
let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
|
83
83
|
if schema_is_complete {
|
84
84
|
return Ok(());
|
85
85
|
}
|
@@ -89,7 +89,7 @@ fn update_schema_from_rows(
|
|
89
89
|
rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
|
90
90
|
let inferred_dtypes_slice = inferred_dtypes.as_slice();
|
91
91
|
|
92
|
-
for (i, dtype) in schema.
|
92
|
+
for (i, dtype) in schema.iter_values_mut().enumerate() {
|
93
93
|
if !dtype.is_known() {
|
94
94
|
*dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
|
95
95
|
polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
|
@@ -110,7 +110,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
110
110
|
}
|
111
111
|
|
112
112
|
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
113
|
-
for dtype in schema.
|
113
|
+
for dtype in schema.iter_values_mut() {
|
114
114
|
if let DataType::Decimal(_, _) = dtype {
|
115
115
|
*dtype = DataType::Decimal(None, None)
|
116
116
|
}
|
@@ -123,7 +123,7 @@ where
|
|
123
123
|
{
|
124
124
|
let fields = column_names
|
125
125
|
.into_iter()
|
126
|
-
.map(|c| Field::new(c, DataType::Unknown(Default::default())));
|
126
|
+
.map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
|
127
127
|
Schema::from_iter(fields)
|
128
128
|
}
|
129
129
|
|
@@ -9,6 +9,7 @@ use crate::map::dataframe::{
|
|
9
9
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
10
10
|
apply_lambda_with_utf8_out_type,
|
11
11
|
};
|
12
|
+
use crate::prelude::strings_to_pl_smallstr;
|
12
13
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
13
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
14
15
|
|
@@ -254,7 +255,7 @@ impl RbDataFrame {
|
|
254
255
|
}
|
255
256
|
|
256
257
|
pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
257
|
-
let indices = IdxCa::from_vec("", indices);
|
258
|
+
let indices = IdxCa::from_vec("".into(), indices);
|
258
259
|
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
259
260
|
Ok(RbDataFrame::new(df))
|
260
261
|
}
|
@@ -332,7 +333,7 @@ impl RbDataFrame {
|
|
332
333
|
let df = self
|
333
334
|
.df
|
334
335
|
.borrow()
|
335
|
-
.with_row_index(
|
336
|
+
.with_row_index(name.into(), offset)
|
336
337
|
.map_err(RbPolarsErr::from)?;
|
337
338
|
Ok(df.into())
|
338
339
|
}
|
@@ -349,8 +350,8 @@ impl RbDataFrame {
|
|
349
350
|
variable_name: Option<String>,
|
350
351
|
) -> RbResult<Self> {
|
351
352
|
let args = UnpivotArgsIR {
|
352
|
-
on:
|
353
|
-
index:
|
353
|
+
on: strings_to_pl_smallstr(on),
|
354
|
+
index: strings_to_pl_smallstr(index),
|
354
355
|
value_name: value_name.map(|s| s.into()),
|
355
356
|
variable_name: variable_name.map(|s| s.into()),
|
356
357
|
};
|
@@ -581,7 +582,7 @@ impl RbDataFrame {
|
|
581
582
|
}
|
582
583
|
|
583
584
|
pub fn to_struct(&self, name: String) -> RbSeries {
|
584
|
-
let s = self.df.borrow().clone().into_struct(
|
585
|
+
let s = self.df.borrow().clone().into_struct(name.into());
|
585
586
|
s.into_series().into()
|
586
587
|
}
|
587
588
|
|
@@ -50,7 +50,7 @@ impl RbDataFrame {
|
|
50
50
|
let null_values = null_values.map(|w| w.0);
|
51
51
|
let eol_char = eol_char.as_bytes()[0];
|
52
52
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
53
|
-
name:
|
53
|
+
name: name.into(),
|
54
54
|
offset,
|
55
55
|
});
|
56
56
|
let quote_char = if let Some(s) = quote_char {
|
@@ -68,7 +68,7 @@ impl RbDataFrame {
|
|
68
68
|
.iter()
|
69
69
|
.map(|(name, dtype)| {
|
70
70
|
let dtype = dtype.0.clone();
|
71
|
-
Field::new(name, dtype)
|
71
|
+
Field::new((&**name).into(), dtype)
|
72
72
|
})
|
73
73
|
.collect::<Schema>()
|
74
74
|
});
|
@@ -91,7 +91,7 @@ impl RbDataFrame {
|
|
91
91
|
.with_projection(projection.map(Arc::new))
|
92
92
|
.with_rechunk(rechunk)
|
93
93
|
.with_chunk_size(chunk_size)
|
94
|
-
.with_columns(columns.map(
|
94
|
+
.with_columns(columns.map(|x| x.into_iter().map(|x| x.into()).collect()))
|
95
95
|
.with_n_threads(n_threads)
|
96
96
|
.with_schema_overwrite(overwrite_dtype.map(Arc::new))
|
97
97
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
@@ -135,7 +135,7 @@ impl RbDataFrame {
|
|
135
135
|
use EitherRustRubyFile::*;
|
136
136
|
|
137
137
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
138
|
-
name:
|
138
|
+
name: name.into(),
|
139
139
|
offset,
|
140
140
|
});
|
141
141
|
let result = match get_either_file(rb_f, false)? {
|
@@ -225,7 +225,7 @@ impl RbDataFrame {
|
|
225
225
|
_memory_map: bool,
|
226
226
|
) -> RbResult<Self> {
|
227
227
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
228
|
-
name:
|
228
|
+
name: name.into(),
|
229
229
|
offset,
|
230
230
|
});
|
231
231
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -252,7 +252,7 @@ impl RbDataFrame {
|
|
252
252
|
rechunk: bool,
|
253
253
|
) -> RbResult<Self> {
|
254
254
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
255
|
-
name:
|
255
|
+
name: name.into(),
|
256
256
|
offset,
|
257
257
|
});
|
258
258
|
// rb_f = read_if_bytesio(rb_f);
|
@@ -29,8 +29,12 @@ impl RbExpr {
|
|
29
29
|
self.inner.clone().dt().with_time_unit(tu.0).into()
|
30
30
|
}
|
31
31
|
|
32
|
-
pub fn dt_convert_time_zone(&self,
|
33
|
-
self.inner
|
32
|
+
pub fn dt_convert_time_zone(&self, time_zone: String) -> Self {
|
33
|
+
self.inner
|
34
|
+
.clone()
|
35
|
+
.dt()
|
36
|
+
.convert_time_zone(time_zone.into())
|
37
|
+
.into()
|
34
38
|
}
|
35
39
|
|
36
40
|
pub fn dt_cast_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
|
@@ -46,7 +50,11 @@ impl RbExpr {
|
|
46
50
|
self.inner
|
47
51
|
.clone()
|
48
52
|
.dt()
|
49
|
-
.replace_time_zone(
|
53
|
+
.replace_time_zone(
|
54
|
+
time_zone.map(|x| x.into()),
|
55
|
+
ambiguous.inner.clone(),
|
56
|
+
non_existent.0,
|
57
|
+
)
|
50
58
|
.into()
|
51
59
|
}
|
52
60
|
|
@@ -242,7 +242,7 @@ impl RbExpr {
|
|
242
242
|
pub fn value_counts(&self, sort: bool, parallel: bool, name: String, normalize: bool) -> Self {
|
243
243
|
self.inner
|
244
244
|
.clone()
|
245
|
-
.value_counts(sort, parallel, name, normalize)
|
245
|
+
.value_counts(sort, parallel, name.as_str(), normalize)
|
246
246
|
.into()
|
247
247
|
}
|
248
248
|
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{block::Proc, value::Opaque, Ruby};
|
2
2
|
use polars::prelude::*;
|
3
|
+
use polars_utils::format_pl_smallstr;
|
3
4
|
|
4
5
|
use crate::RbExpr;
|
5
6
|
|
@@ -15,9 +16,9 @@ impl RbExpr {
|
|
15
16
|
.name()
|
16
17
|
.map(move |name| {
|
17
18
|
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
18
|
-
let out = lambda.call::<_, String>((name,));
|
19
|
+
let out = lambda.call::<_, String>((name.as_str(),));
|
19
20
|
match out {
|
20
|
-
Ok(out) => Ok(out),
|
21
|
+
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
21
22
|
Err(e) => Err(PolarsError::ComputeError(
|
22
23
|
format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
|
23
24
|
)),
|
@@ -19,6 +19,8 @@ impl RbExpr {
|
|
19
19
|
exact: bool,
|
20
20
|
cache: bool,
|
21
21
|
) -> Self {
|
22
|
+
let format = format.map(|x| x.into());
|
23
|
+
|
22
24
|
let options = StrptimeOptions {
|
23
25
|
format,
|
24
26
|
strict,
|
@@ -33,12 +35,15 @@ impl RbExpr {
|
|
33
35
|
&self,
|
34
36
|
format: Option<String>,
|
35
37
|
time_unit: Option<Wrap<TimeUnit>>,
|
36
|
-
time_zone: Option<TimeZone
|
38
|
+
time_zone: Option<Wrap<TimeZone>>,
|
37
39
|
strict: bool,
|
38
40
|
exact: bool,
|
39
41
|
cache: bool,
|
40
42
|
ambiguous: &Self,
|
41
43
|
) -> Self {
|
44
|
+
let format = format.map(|x| x.into());
|
45
|
+
let time_zone = time_zone.map(|x| x.0);
|
46
|
+
|
42
47
|
let options = StrptimeOptions {
|
43
48
|
format,
|
44
49
|
strict,
|
@@ -58,6 +63,8 @@ impl RbExpr {
|
|
58
63
|
}
|
59
64
|
|
60
65
|
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
66
|
+
let format = format.map(|x| x.into());
|
67
|
+
|
61
68
|
let options = StrptimeOptions {
|
62
69
|
format,
|
63
70
|
strict,
|
@@ -11,9 +11,9 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
11
11
|
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
16
|
-
dict.aset(field.name.
|
14
|
+
for field in metadata.schema.iter_values() {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
+
dict.aset(field.name.as_str(), dt)?;
|
17
17
|
}
|
18
18
|
Ok(dict)
|
19
19
|
}
|
@@ -26,9 +26,9 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
26
26
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
|
-
for field in arrow_schema.
|
30
|
-
let dt: Wrap<DataType> = Wrap((&field.
|
31
|
-
dict.aset(field.name, dt)?;
|
29
|
+
for field in arrow_schema.iter_values() {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
31
|
+
dict.aset(field.name.as_str(), dt)?;
|
32
32
|
}
|
33
33
|
Ok(dict)
|
34
34
|
}
|
@@ -56,13 +56,14 @@ pub fn datetime_range(
|
|
56
56
|
every: String,
|
57
57
|
closed: Wrap<ClosedWindow>,
|
58
58
|
time_unit: Option<Wrap<TimeUnit>>,
|
59
|
-
time_zone: Option<TimeZone
|
59
|
+
time_zone: Option<Wrap<TimeZone>>,
|
60
60
|
) -> RbExpr {
|
61
61
|
let start = start.inner.clone();
|
62
62
|
let end = end.inner.clone();
|
63
63
|
let every = Duration::parse(&every);
|
64
64
|
let closed = closed.0;
|
65
65
|
let time_unit = time_unit.map(|x| x.0);
|
66
|
+
let time_zone = time_zone.map(|x| x.0);
|
66
67
|
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
67
68
|
}
|
68
69
|
|
@@ -72,13 +73,14 @@ pub fn datetime_ranges(
|
|
72
73
|
every: String,
|
73
74
|
closed: Wrap<ClosedWindow>,
|
74
75
|
time_unit: Option<Wrap<TimeUnit>>,
|
75
|
-
time_zone: Option<TimeZone
|
76
|
+
time_zone: Option<Wrap<TimeZone>>,
|
76
77
|
) -> RbExpr {
|
77
78
|
let start = start.inner.clone();
|
78
79
|
let end = end.inner.clone();
|
79
80
|
let every = Duration::parse(&every);
|
80
81
|
let closed = closed.0;
|
81
82
|
let time_unit = time_unit.map(|x| x.0);
|
83
|
+
let time_zone = time_zone.map(|x| x.0);
|
82
84
|
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
83
85
|
}
|
84
86
|
|
@@ -59,7 +59,7 @@ impl RbLazyFrame {
|
|
59
59
|
) -> RbResult<Self> {
|
60
60
|
let batch_size = batch_size.map(|v| v.0);
|
61
61
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
62
|
-
name:
|
62
|
+
name: name.into(),
|
63
63
|
offset,
|
64
64
|
});
|
65
65
|
|
@@ -106,14 +106,14 @@ impl RbLazyFrame {
|
|
106
106
|
let separator = separator.as_bytes()[0];
|
107
107
|
let eol_char = eol_char.as_bytes()[0];
|
108
108
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
109
|
-
name:
|
109
|
+
name: name.into(),
|
110
110
|
offset,
|
111
111
|
});
|
112
112
|
|
113
113
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
114
114
|
overwrite_dtype
|
115
115
|
.into_iter()
|
116
|
-
.map(|(name, dtype)| Field::new(
|
116
|
+
.map(|(name, dtype)| Field::new((&*name).into(), dtype.0))
|
117
117
|
.collect::<Schema>()
|
118
118
|
});
|
119
119
|
|
@@ -128,7 +128,7 @@ impl RbLazyFrame {
|
|
128
128
|
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
129
129
|
// TODO add with_schema
|
130
130
|
.with_low_memory(low_memory)
|
131
|
-
.with_comment_prefix(comment_prefix.
|
131
|
+
.with_comment_prefix(comment_prefix.map(|x| x.into()))
|
132
132
|
.with_quote_char(quote_char)
|
133
133
|
.with_eol_char(eol_char)
|
134
134
|
.with_rechunk(rechunk)
|
@@ -176,7 +176,7 @@ impl RbLazyFrame {
|
|
176
176
|
};
|
177
177
|
|
178
178
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
179
|
-
name:
|
179
|
+
name: name.into(),
|
180
180
|
offset,
|
181
181
|
});
|
182
182
|
let hive_options = HiveOptions {
|
@@ -197,7 +197,7 @@ impl RbLazyFrame {
|
|
197
197
|
use_statistics,
|
198
198
|
hive_options,
|
199
199
|
glob,
|
200
|
-
include_file_paths: include_file_paths.map(
|
200
|
+
include_file_paths: include_file_paths.map(|x| x.into()),
|
201
201
|
};
|
202
202
|
|
203
203
|
let lf = if path.is_some() {
|
@@ -216,14 +216,13 @@ impl RbLazyFrame {
|
|
216
216
|
cache: bool,
|
217
217
|
rechunk: bool,
|
218
218
|
row_index: Option<(String, IdxSize)>,
|
219
|
-
memory_map: bool,
|
220
219
|
hive_partitioning: Option<bool>,
|
221
220
|
hive_schema: Option<Wrap<Schema>>,
|
222
221
|
try_parse_hive_dates: bool,
|
223
222
|
include_file_paths: Option<String>,
|
224
223
|
) -> RbResult<Self> {
|
225
224
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
226
|
-
name:
|
225
|
+
name: name.into(),
|
227
226
|
offset,
|
228
227
|
});
|
229
228
|
|
@@ -239,10 +238,9 @@ impl RbLazyFrame {
|
|
239
238
|
cache,
|
240
239
|
rechunk,
|
241
240
|
row_index,
|
242
|
-
memory_map,
|
243
241
|
cloud_options: None,
|
244
242
|
hive_options,
|
245
|
-
include_file_paths: include_file_paths.map(
|
243
|
+
include_file_paths: include_file_paths.map(|x| x.into()),
|
246
244
|
};
|
247
245
|
let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
|
248
246
|
Ok(lf.into())
|
@@ -593,8 +591,8 @@ impl RbLazyFrame {
|
|
593
591
|
.force_parallel(force_parallel)
|
594
592
|
.how(JoinType::AsOf(AsOfOptions {
|
595
593
|
strategy: strategy.0,
|
596
|
-
left_by: left_by.map(
|
597
|
-
right_by: right_by.map(
|
594
|
+
left_by: left_by.map(strings_to_pl_smallstr),
|
595
|
+
right_by: right_by.map(strings_to_pl_smallstr),
|
598
596
|
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
599
597
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
600
598
|
}))
|
@@ -744,8 +742,8 @@ impl RbLazyFrame {
|
|
744
742
|
) -> RbResult<Self> {
|
745
743
|
let ldf = self.ldf.borrow().clone();
|
746
744
|
Ok(match maintain_order {
|
747
|
-
true => ldf.
|
748
|
-
false => ldf.
|
745
|
+
true => ldf.unique_stable_generic(subset, keep.0),
|
746
|
+
false => ldf.unique_generic(subset, keep.0),
|
749
747
|
}
|
750
748
|
.into())
|
751
749
|
}
|
@@ -805,7 +803,11 @@ impl RbLazyFrame {
|
|
805
803
|
}
|
806
804
|
|
807
805
|
pub fn collect_schema(&self) -> RbResult<RHash> {
|
808
|
-
let schema = self
|
806
|
+
let schema = self
|
807
|
+
.ldf
|
808
|
+
.borrow_mut()
|
809
|
+
.collect_schema()
|
810
|
+
.map_err(RbPolarsErr::from)?;
|
809
811
|
|
810
812
|
let schema_dict = RHash::new();
|
811
813
|
schema.iter_fields().for_each(|fld| {
|
@@ -813,7 +815,7 @@ impl RbLazyFrame {
|
|
813
815
|
schema_dict
|
814
816
|
.aset::<String, Value>(
|
815
817
|
fld.name().to_string(),
|
816
|
-
Wrap(fld.
|
818
|
+
Wrap(fld.dtype().clone()).into_value(),
|
817
819
|
)
|
818
820
|
.unwrap();
|
819
821
|
});
|
data/ext/polars/src/lib.rs
CHANGED
@@ -711,7 +711,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
711
711
|
"new_from_parquet",
|
712
712
|
function!(RbLazyFrame::new_from_parquet, 14),
|
713
713
|
)?;
|
714
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
714
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 9))?;
|
715
715
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
716
716
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
717
717
|
class.define_method(
|