polars-df 0.21.1 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +120 -90
- data/Cargo.toml +3 -0
- data/README.md +20 -7
- data/ext/polars/Cargo.toml +18 -12
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +39 -37
- data/ext/polars/src/conversion/chunked_array.rs +36 -29
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +244 -51
- data/ext/polars/src/dataframe/construction.rs +5 -17
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -17
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -5
- data/ext/polars/src/expr/datatype.rs +4 -4
- data/ext/polars/src/expr/datetime.rs +22 -28
- data/ext/polars/src/expr/general.rs +3 -10
- data/ext/polars/src/expr/list.rs +8 -24
- data/ext/polars/src/expr/meta.rs +4 -6
- data/ext/polars/src/expr/mod.rs +2 -0
- data/ext/polars/src/expr/name.rs +11 -14
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +5 -10
- data/ext/polars/src/file.rs +20 -14
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +7 -4
- data/ext/polars/src/functions/lazy.rs +7 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +23 -3
- data/ext/polars/src/lazyframe/general.rs +35 -50
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +57 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +144 -19
- data/ext/polars/src/map/dataframe.rs +18 -15
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +7 -6
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +49 -29
- data/ext/polars/src/series/construction.rs +2 -0
- data/ext/polars/src/series/export.rs +38 -33
- data/ext/polars/src/series/general.rs +69 -31
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +1 -1
- data/lib/polars/data_frame.rb +119 -15
- data/lib/polars/data_types.rb +23 -6
- data/lib/polars/date_time_expr.rb +36 -15
- data/lib/polars/expr.rb +41 -32
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/lazy.rb +1 -1
- data/lib/polars/iceberg_dataset.rb +113 -0
- data/lib/polars/io/iceberg.rb +34 -0
- data/lib/polars/io/ipc.rb +28 -49
- data/lib/polars/io/parquet.rb +7 -4
- data/lib/polars/io/scan_options.rb +12 -3
- data/lib/polars/io/utils.rb +17 -0
- data/lib/polars/lazy_frame.rb +97 -10
- data/lib/polars/list_expr.rb +21 -13
- data/lib/polars/list_name_space.rb +33 -21
- data/lib/polars/meta_expr.rb +25 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +23 -1
- data/lib/polars/schema.rb +1 -1
- data/lib/polars/selectors.rb +8 -8
- data/lib/polars/series.rb +26 -2
- data/lib/polars/string_expr.rb +27 -28
- data/lib/polars/string_name_space.rb +18 -5
- data/lib/polars/utils/convert.rb +2 -2
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +4 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +10 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{IntoValue,
|
|
1
|
+
use magnus::{IntoValue, Ruby, Value, prelude::*};
|
|
2
2
|
|
|
3
3
|
use super::*;
|
|
4
4
|
use crate::RbResult;
|
|
@@ -6,14 +6,15 @@ use crate::conversion::{ObjectValue, Wrap};
|
|
|
6
6
|
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
|
7
7
|
|
|
8
8
|
impl RbDataFrame {
|
|
9
|
-
pub fn row_tuple(&
|
|
9
|
+
pub fn row_tuple(ruby: &Ruby, rb_self: &Self, idx: i64) -> Value {
|
|
10
10
|
let idx = if idx < 0 {
|
|
11
|
-
(
|
|
11
|
+
(rb_self.df.borrow().height() as i64 + idx) as usize
|
|
12
12
|
} else {
|
|
13
13
|
idx as usize
|
|
14
14
|
};
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
ruby.ary_from_iter(
|
|
16
|
+
rb_self
|
|
17
|
+
.df
|
|
17
18
|
.borrow()
|
|
18
19
|
.get_columns()
|
|
19
20
|
.iter()
|
|
@@ -22,17 +23,18 @@ impl RbDataFrame {
|
|
|
22
23
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
|
23
24
|
obj.unwrap().to_value()
|
|
24
25
|
}
|
|
25
|
-
_ => Wrap(s.get(idx).unwrap()).
|
|
26
|
+
_ => Wrap(s.get(idx).unwrap()).into_value_with(ruby),
|
|
26
27
|
}),
|
|
27
28
|
)
|
|
28
29
|
.as_value()
|
|
29
30
|
}
|
|
30
31
|
|
|
31
|
-
pub fn row_tuples(&
|
|
32
|
-
let df = &
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
pub fn row_tuples(ruby: &Ruby, rb_self: &Self) -> Value {
|
|
33
|
+
let df = &rb_self.df;
|
|
34
|
+
ruby.ary_from_iter((0..df.borrow().height()).map(|idx| {
|
|
35
|
+
ruby.ary_from_iter(
|
|
36
|
+
rb_self
|
|
37
|
+
.df
|
|
36
38
|
.borrow()
|
|
37
39
|
.get_columns()
|
|
38
40
|
.iter()
|
|
@@ -41,15 +43,15 @@ impl RbDataFrame {
|
|
|
41
43
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
|
42
44
|
obj.unwrap().to_value()
|
|
43
45
|
}
|
|
44
|
-
_ => Wrap(s.get(idx).unwrap()).
|
|
46
|
+
_ => Wrap(s.get(idx).unwrap()).into_value_with(ruby),
|
|
45
47
|
}),
|
|
46
48
|
)
|
|
47
49
|
}))
|
|
48
50
|
.as_value()
|
|
49
51
|
}
|
|
50
52
|
|
|
51
|
-
pub fn __arrow_c_stream__(&
|
|
52
|
-
|
|
53
|
-
dataframe_to_stream(&
|
|
53
|
+
pub fn __arrow_c_stream__(ruby: &Ruby, rb_self: &Self) -> RbResult<Value> {
|
|
54
|
+
rb_self.df.borrow_mut().align_chunks();
|
|
55
|
+
dataframe_to_stream(&rb_self.df.borrow(), ruby)
|
|
54
56
|
}
|
|
55
57
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
use std::hash::BuildHasher;
|
|
2
2
|
|
|
3
3
|
use either::Either;
|
|
4
|
-
use magnus::{IntoValue, RArray, Value, prelude
|
|
4
|
+
use magnus::{IntoValue, RArray, Ruby, Value, prelude::*};
|
|
5
5
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
|
6
6
|
use polars::prelude::*;
|
|
7
7
|
|
|
@@ -149,12 +149,13 @@ impl RbDataFrame {
|
|
|
149
149
|
Ok(())
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
pub fn dtypes(&
|
|
153
|
-
|
|
154
|
-
|
|
152
|
+
pub fn dtypes(ruby: &Ruby, rb_self: &Self) -> RArray {
|
|
153
|
+
ruby.ary_from_iter(
|
|
154
|
+
rb_self
|
|
155
|
+
.df
|
|
155
156
|
.borrow()
|
|
156
157
|
.iter()
|
|
157
|
-
.map(|s| Wrap(s.dtype().clone()).
|
|
158
|
+
.map(|s| Wrap(s.dtype().clone()).into_value_with(ruby)),
|
|
158
159
|
)
|
|
159
160
|
}
|
|
160
161
|
|
|
@@ -393,18 +394,19 @@ impl RbDataFrame {
|
|
|
393
394
|
}
|
|
394
395
|
|
|
395
396
|
pub fn partition_by(
|
|
396
|
-
&
|
|
397
|
+
ruby: &Ruby,
|
|
398
|
+
rb_self: &Self,
|
|
397
399
|
by: Vec<String>,
|
|
398
400
|
maintain_order: bool,
|
|
399
401
|
include_key: bool,
|
|
400
402
|
) -> RbResult<RArray> {
|
|
401
403
|
let out = if maintain_order {
|
|
402
|
-
|
|
404
|
+
rb_self.df.borrow().partition_by_stable(by, include_key)
|
|
403
405
|
} else {
|
|
404
|
-
|
|
406
|
+
rb_self.df.borrow().partition_by(by, include_key)
|
|
405
407
|
}
|
|
406
408
|
.map_err(RbPolarsErr::from)?;
|
|
407
|
-
Ok(
|
|
409
|
+
Ok(ruby.ary_from_iter(out.into_iter().map(RbDataFrame::new)))
|
|
408
410
|
}
|
|
409
411
|
|
|
410
412
|
pub fn lazy(&self) -> RbLazyFrame {
|
|
@@ -440,12 +442,13 @@ impl RbDataFrame {
|
|
|
440
442
|
}
|
|
441
443
|
|
|
442
444
|
pub fn map_rows(
|
|
443
|
-
&
|
|
445
|
+
ruby: &Ruby,
|
|
446
|
+
rb_self: &Self,
|
|
444
447
|
lambda: Value,
|
|
445
448
|
output_type: Option<Wrap<DataType>>,
|
|
446
449
|
inference_size: usize,
|
|
447
450
|
) -> RbResult<(Value, bool)> {
|
|
448
|
-
let df = &
|
|
451
|
+
let df = &rb_self.df.borrow();
|
|
449
452
|
|
|
450
453
|
let output_type = output_type.map(|dt| dt.0);
|
|
451
454
|
let out = match output_type {
|
|
@@ -490,7 +493,7 @@ impl RbDataFrame {
|
|
|
490
493
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
|
491
494
|
};
|
|
492
495
|
|
|
493
|
-
Ok((
|
|
496
|
+
Ok((ruby.obj_wrap(RbSeries::from(out)).as_value(), false))
|
|
494
497
|
}
|
|
495
498
|
|
|
496
499
|
pub fn shrink_to_fit(&self) {
|
|
@@ -549,11 +552,6 @@ impl RbDataFrame {
|
|
|
549
552
|
s.into_series().into()
|
|
550
553
|
}
|
|
551
554
|
|
|
552
|
-
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
|
553
|
-
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
|
554
|
-
Ok(df.into())
|
|
555
|
-
}
|
|
556
|
-
|
|
557
555
|
pub fn clear(&self) -> Self {
|
|
558
556
|
self.df.borrow().clear().into()
|
|
559
557
|
}
|
|
@@ -249,14 +249,13 @@ impl RbDataFrame {
|
|
|
249
249
|
include_header: bool,
|
|
250
250
|
separator: u8,
|
|
251
251
|
quote_char: u8,
|
|
252
|
-
batch_size:
|
|
252
|
+
batch_size: NonZeroUsize,
|
|
253
253
|
datetime_format: Option<String>,
|
|
254
254
|
date_format: Option<String>,
|
|
255
255
|
time_format: Option<String>,
|
|
256
256
|
float_precision: Option<usize>,
|
|
257
257
|
null_value: Option<String>,
|
|
258
258
|
) -> RbResult<()> {
|
|
259
|
-
let batch_size = batch_size.0;
|
|
260
259
|
let null = null_value.unwrap_or_default();
|
|
261
260
|
let mut buf = get_file_like(rb_f, true)?;
|
|
262
261
|
CsvWriter::new(&mut buf)
|
|
@@ -4,10 +4,14 @@ mod general;
|
|
|
4
4
|
mod io;
|
|
5
5
|
mod serde;
|
|
6
6
|
|
|
7
|
+
use magnus::{DataTypeFunctions, TypedData, gc};
|
|
7
8
|
use polars::prelude::*;
|
|
8
9
|
use std::cell::RefCell;
|
|
9
10
|
|
|
10
|
-
|
|
11
|
+
use crate::series::mark_series;
|
|
12
|
+
|
|
13
|
+
#[derive(TypedData)]
|
|
14
|
+
#[magnus(class = "Polars::RbDataFrame", mark)]
|
|
11
15
|
pub struct RbDataFrame {
|
|
12
16
|
pub df: RefCell<DataFrame>,
|
|
13
17
|
}
|
|
@@ -25,3 +29,23 @@ impl RbDataFrame {
|
|
|
25
29
|
}
|
|
26
30
|
}
|
|
27
31
|
}
|
|
32
|
+
|
|
33
|
+
impl DataTypeFunctions for RbDataFrame {
|
|
34
|
+
fn mark(&self, marker: &gc::Marker) {
|
|
35
|
+
// this is really, really not ideal, as objects will not be marked if unable to borrow
|
|
36
|
+
// currently, this should only happen for write_* methods,
|
|
37
|
+
// which should refuse to write Object datatype, and therefore be safe,
|
|
38
|
+
// since GC will not have a chance to run
|
|
39
|
+
if let Ok(df) = self.df.try_borrow() {
|
|
40
|
+
for column in df.get_columns() {
|
|
41
|
+
if let DataType::Object(_) = column.dtype() {
|
|
42
|
+
match column {
|
|
43
|
+
Column::Series(s) => mark_series(marker, s),
|
|
44
|
+
Column::Partitioned(s) => mark_series(marker, s.partitions()),
|
|
45
|
+
Column::Scalar(s) => mark_series(marker, &s.as_single_value_series()),
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -1,15 +1,30 @@
|
|
|
1
|
-
use
|
|
1
|
+
use std::io::{BufReader, BufWriter};
|
|
2
|
+
|
|
3
|
+
use polars::prelude::*;
|
|
4
|
+
|
|
2
5
|
use crate::file::get_file_like;
|
|
3
|
-
use crate::
|
|
6
|
+
use crate::utils::to_rb_err;
|
|
7
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
|
4
8
|
use magnus::Value;
|
|
5
|
-
use std::io::BufWriter;
|
|
6
9
|
|
|
7
10
|
impl RbDataFrame {
|
|
8
|
-
|
|
9
|
-
pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> {
|
|
11
|
+
pub fn serialize_binary(&self, rb_f: Value) -> RbResult<()> {
|
|
10
12
|
let file = get_file_like(rb_f, true)?;
|
|
11
|
-
let writer = BufWriter::new(file);
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
let mut writer = BufWriter::new(file);
|
|
14
|
+
|
|
15
|
+
Ok(self
|
|
16
|
+
.df
|
|
17
|
+
.borrow_mut()
|
|
18
|
+
.serialize_into_writer(&mut writer)
|
|
19
|
+
.map_err(RbPolarsErr::from)?)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
pub fn deserialize_binary(rb_f: Value) -> RbResult<Self> {
|
|
23
|
+
let file = get_file_like(rb_f, false)?;
|
|
24
|
+
let mut file = BufReader::new(file);
|
|
25
|
+
|
|
26
|
+
DataFrame::deserialize_from_reader(&mut file)
|
|
27
|
+
.map(|v| v.into())
|
|
28
|
+
.map_err(to_rb_err)
|
|
14
29
|
}
|
|
15
30
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use crate::rb_modules;
|
|
2
|
-
use magnus::{Error,
|
|
2
|
+
use magnus::{Error, Ruby};
|
|
3
3
|
use std::borrow::Cow;
|
|
4
4
|
|
|
5
5
|
macro_rules! create_exception {
|
|
@@ -17,9 +17,12 @@ macro_rules! create_exception {
|
|
|
17
17
|
};
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
-
create_exception!(RbTypeError,
|
|
21
|
-
create_exception!(RbValueError,
|
|
22
|
-
create_exception!(
|
|
23
|
-
|
|
20
|
+
create_exception!(RbTypeError, Ruby::get().unwrap().exception_type_error());
|
|
21
|
+
create_exception!(RbValueError, Ruby::get().unwrap().exception_arg_error());
|
|
22
|
+
create_exception!(
|
|
23
|
+
RbOverflowError,
|
|
24
|
+
Ruby::get().unwrap().exception_range_error()
|
|
25
|
+
);
|
|
26
|
+
create_exception!(RbIndexError, Ruby::get().unwrap().exception_index_error());
|
|
24
27
|
create_exception!(ComputeError, rb_modules::compute_error());
|
|
25
28
|
create_exception!(InvalidOperationError, rb_modules::invalid_operation_error());
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{IntoValue, Value};
|
|
1
|
+
use magnus::{IntoValue, Ruby, Value};
|
|
2
2
|
use polars::prelude::{DataType, DataTypeExpr, Schema};
|
|
3
3
|
|
|
4
4
|
use crate::prelude::Wrap;
|
|
@@ -26,12 +26,12 @@ impl RbDataTypeExpr {
|
|
|
26
26
|
DataTypeExpr::OfExpr(Box::new(expr.inner.clone())).into()
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
pub fn collect_dtype(&
|
|
30
|
-
let dtype =
|
|
29
|
+
pub fn collect_dtype(ruby: &Ruby, rb_self: &Self, schema: Wrap<Schema>) -> RbResult<Value> {
|
|
30
|
+
let dtype = rb_self
|
|
31
31
|
.clone()
|
|
32
32
|
.inner
|
|
33
33
|
.into_datatype(&schema.0)
|
|
34
34
|
.map_err(RbPolarsErr::from)?;
|
|
35
|
-
Ok(Wrap(dtype).
|
|
35
|
+
Ok(Wrap(dtype).into_value_with(ruby))
|
|
36
36
|
}
|
|
37
37
|
}
|
|
@@ -26,20 +26,6 @@ impl RbExpr {
|
|
|
26
26
|
self.inner.clone().dt().offset_by(by.inner.clone()).into()
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
pub fn dt_epoch_seconds(&self) -> Self {
|
|
30
|
-
self.inner
|
|
31
|
-
.clone()
|
|
32
|
-
.map(
|
|
33
|
-
|s| {
|
|
34
|
-
s.take_materialized_series()
|
|
35
|
-
.timestamp(TimeUnit::Milliseconds)
|
|
36
|
-
.map(|ca| Some((ca / 1000).into_column()))
|
|
37
|
-
},
|
|
38
|
-
GetOutput::from_type(DataType::Int64),
|
|
39
|
-
)
|
|
40
|
-
.into()
|
|
41
|
-
}
|
|
42
|
-
|
|
43
29
|
pub fn dt_with_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
|
|
44
30
|
self.inner.clone().dt().with_time_unit(tu.0).into()
|
|
45
31
|
}
|
|
@@ -232,31 +218,39 @@ impl RbExpr {
|
|
|
232
218
|
self.inner.clone().dt().timestamp(tu.0).into()
|
|
233
219
|
}
|
|
234
220
|
|
|
235
|
-
pub fn dt_total_days(&self) -> Self {
|
|
236
|
-
self.inner.clone().dt().total_days().into()
|
|
221
|
+
pub fn dt_total_days(&self, fractional: bool) -> Self {
|
|
222
|
+
self.inner.clone().dt().total_days(fractional).into()
|
|
237
223
|
}
|
|
238
224
|
|
|
239
|
-
pub fn dt_total_hours(&self) -> Self {
|
|
240
|
-
self.inner.clone().dt().total_hours().into()
|
|
225
|
+
pub fn dt_total_hours(&self, fractional: bool) -> Self {
|
|
226
|
+
self.inner.clone().dt().total_hours(fractional).into()
|
|
241
227
|
}
|
|
242
228
|
|
|
243
|
-
pub fn dt_total_minutes(&self) -> Self {
|
|
244
|
-
self.inner.clone().dt().total_minutes().into()
|
|
229
|
+
pub fn dt_total_minutes(&self, fractional: bool) -> Self {
|
|
230
|
+
self.inner.clone().dt().total_minutes(fractional).into()
|
|
245
231
|
}
|
|
246
232
|
|
|
247
|
-
pub fn dt_total_seconds(&self) -> Self {
|
|
248
|
-
self.inner.clone().dt().total_seconds().into()
|
|
233
|
+
pub fn dt_total_seconds(&self, fractional: bool) -> Self {
|
|
234
|
+
self.inner.clone().dt().total_seconds(fractional).into()
|
|
249
235
|
}
|
|
250
236
|
|
|
251
|
-
pub fn dt_total_milliseconds(&self) -> Self {
|
|
252
|
-
self.inner
|
|
237
|
+
pub fn dt_total_milliseconds(&self, fractional: bool) -> Self {
|
|
238
|
+
self.inner
|
|
239
|
+
.clone()
|
|
240
|
+
.dt()
|
|
241
|
+
.total_milliseconds(fractional)
|
|
242
|
+
.into()
|
|
253
243
|
}
|
|
254
244
|
|
|
255
|
-
pub fn dt_total_microseconds(&self) -> Self {
|
|
256
|
-
self.inner
|
|
245
|
+
pub fn dt_total_microseconds(&self, fractional: bool) -> Self {
|
|
246
|
+
self.inner
|
|
247
|
+
.clone()
|
|
248
|
+
.dt()
|
|
249
|
+
.total_microseconds(fractional)
|
|
250
|
+
.into()
|
|
257
251
|
}
|
|
258
252
|
|
|
259
|
-
pub fn dt_total_nanoseconds(&self) -> Self {
|
|
260
|
-
self.inner.clone().dt().total_nanoseconds().into()
|
|
253
|
+
pub fn dt_total_nanoseconds(&self, fractional: bool) -> Self {
|
|
254
|
+
self.inner.clone().dt().total_nanoseconds(fractional).into()
|
|
261
255
|
}
|
|
262
256
|
}
|
|
@@ -483,10 +483,7 @@ impl RbExpr {
|
|
|
483
483
|
}
|
|
484
484
|
|
|
485
485
|
pub fn rechunk(&self) -> Self {
|
|
486
|
-
self.inner
|
|
487
|
-
.clone()
|
|
488
|
-
.map(|s| Ok(Some(s.rechunk())), GetOutput::same_type())
|
|
489
|
-
.into()
|
|
486
|
+
self.inner.clone().rechunk().into()
|
|
490
487
|
}
|
|
491
488
|
|
|
492
489
|
pub fn round(&self, decimals: u32, mode: Wrap<RoundMode>) -> Self {
|
|
@@ -665,10 +662,6 @@ impl RbExpr {
|
|
|
665
662
|
self.inner.clone().product().into()
|
|
666
663
|
}
|
|
667
664
|
|
|
668
|
-
pub fn shrink_dtype(&self) -> Self {
|
|
669
|
-
self.inner.clone().shrink_dtype().into()
|
|
670
|
-
}
|
|
671
|
-
|
|
672
665
|
pub fn map_batches(
|
|
673
666
|
&self,
|
|
674
667
|
lambda: Value,
|
|
@@ -868,8 +861,8 @@ impl RbExpr {
|
|
|
868
861
|
self.inner.clone().all(drop_nulls).into()
|
|
869
862
|
}
|
|
870
863
|
|
|
871
|
-
pub fn log(&self, base:
|
|
872
|
-
self.inner.clone().log(base).into()
|
|
864
|
+
pub fn log(&self, base: &RbExpr) -> Self {
|
|
865
|
+
self.inner.clone().log(base.inner.clone()).into()
|
|
873
866
|
}
|
|
874
867
|
|
|
875
868
|
pub fn log1p(&self) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::{
|
|
1
|
+
use magnus::{RArray, prelude::*};
|
|
2
2
|
use polars::lazy::dsl::lit;
|
|
3
3
|
use polars::prelude::*;
|
|
4
4
|
use polars::series::ops::NullBehavior;
|
|
@@ -195,33 +195,17 @@ impl RbExpr {
|
|
|
195
195
|
self.inner.clone().list().to_array(width).into()
|
|
196
196
|
}
|
|
197
197
|
|
|
198
|
-
pub fn list_to_struct(
|
|
199
|
-
&self,
|
|
200
|
-
width_strat: Wrap<ListToStructWidthStrategy>,
|
|
201
|
-
name_gen: Option<Value>,
|
|
202
|
-
upper_bound: Option<usize>,
|
|
203
|
-
) -> RbResult<Self> {
|
|
204
|
-
let name_gen = name_gen.map(|lambda| {
|
|
205
|
-
let lambda = Opaque::from(lambda);
|
|
206
|
-
Arc::new(move |idx: usize| {
|
|
207
|
-
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
|
208
|
-
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
|
209
|
-
PlSmallStr::from_string(out)
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
// non-Ruby thread
|
|
213
|
-
todo!();
|
|
214
|
-
});
|
|
215
|
-
|
|
198
|
+
pub fn list_to_struct(&self, names: RArray) -> RbResult<Self> {
|
|
216
199
|
Ok(self
|
|
217
200
|
.inner
|
|
218
201
|
.clone()
|
|
219
202
|
.list()
|
|
220
|
-
.to_struct(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
203
|
+
.to_struct(
|
|
204
|
+
names
|
|
205
|
+
.into_iter()
|
|
206
|
+
.map(|x| Ok(Wrap::<PlSmallStr>::try_convert(x)?.0))
|
|
207
|
+
.collect::<RbResult<Arc<[_]>>>()?,
|
|
208
|
+
)
|
|
225
209
|
.into())
|
|
226
210
|
}
|
|
227
211
|
|
data/ext/polars/src/expr/meta.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use magnus::RArray;
|
|
1
|
+
use magnus::{RArray, Ruby};
|
|
2
2
|
use polars::prelude::Schema;
|
|
3
3
|
|
|
4
4
|
use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
|
|
@@ -8,17 +8,15 @@ impl RbExpr {
|
|
|
8
8
|
self.inner == other.inner
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
-
pub fn meta_pop(&
|
|
11
|
+
pub fn meta_pop(ruby: &Ruby, rb_self: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
|
|
12
12
|
let schema = schema.as_ref().map(|s| &s.0);
|
|
13
|
-
let exprs =
|
|
13
|
+
let exprs = rb_self
|
|
14
14
|
.inner
|
|
15
15
|
.clone()
|
|
16
16
|
.meta()
|
|
17
17
|
.pop(schema)
|
|
18
18
|
.map_err(RbPolarsErr::from)?;
|
|
19
|
-
Ok(
|
|
20
|
-
exprs.iter().map(|e| RbExpr::from(e.clone())),
|
|
21
|
-
))
|
|
19
|
+
Ok(ruby.ary_from_iter(exprs.iter().map(|e| RbExpr::from(e.clone()))))
|
|
22
20
|
}
|
|
23
21
|
|
|
24
22
|
pub fn meta_root_names(&self) -> Vec<String> {
|
data/ext/polars/src/expr/mod.rs
CHANGED
data/ext/polars/src/expr/name.rs
CHANGED
|
@@ -11,20 +11,17 @@ impl RbExpr {
|
|
|
11
11
|
|
|
12
12
|
pub fn name_map(&self, lambda: Proc) -> Self {
|
|
13
13
|
let lambda = Opaque::from(lambda);
|
|
14
|
-
|
|
15
|
-
.
|
|
16
|
-
.name()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
}
|
|
26
|
-
})
|
|
27
|
-
.into()
|
|
14
|
+
let func = PlanCallback::new(move |name: PlSmallStr| {
|
|
15
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
|
16
|
+
let out = lambda.call::<_, String>((name.as_str(),));
|
|
17
|
+
match out {
|
|
18
|
+
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
|
19
|
+
Err(e) => Err(PolarsError::ComputeError(
|
|
20
|
+
format!("Ruby function in 'name.map' produced an error: {e}.").into(),
|
|
21
|
+
)),
|
|
22
|
+
}
|
|
23
|
+
});
|
|
24
|
+
self.inner.clone().name().map(func).into()
|
|
28
25
|
}
|
|
29
26
|
|
|
30
27
|
pub fn name_prefix(&self, prefix: String) -> Self {
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
use std::io::{BufReader, BufWriter};
|
|
2
|
+
|
|
3
|
+
use magnus::Value;
|
|
4
|
+
use polars::lazy::prelude::Expr;
|
|
5
|
+
use polars_utils::pl_serialize;
|
|
6
|
+
|
|
7
|
+
use crate::exceptions::ComputeError;
|
|
8
|
+
use crate::file::get_file_like;
|
|
9
|
+
use crate::{RbExpr, RbResult};
|
|
10
|
+
|
|
11
|
+
impl RbExpr {
|
|
12
|
+
pub fn serialize_binary(&self, rb_f: Value) -> RbResult<()> {
|
|
13
|
+
let file = get_file_like(rb_f, true)?;
|
|
14
|
+
let writer = BufWriter::new(file);
|
|
15
|
+
pl_serialize::SerializeOptions::default()
|
|
16
|
+
.serialize_into_writer::<_, _, true>(writer, &self.inner)
|
|
17
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
pub fn deserialize_binary(rb_f: Value) -> RbResult<RbExpr> {
|
|
21
|
+
let file = get_file_like(rb_f, false)?;
|
|
22
|
+
let reader = BufReader::new(file);
|
|
23
|
+
let expr: Expr = pl_serialize::SerializeOptions::default()
|
|
24
|
+
.deserialize_from_reader::<_, _, true>(reader)
|
|
25
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))?;
|
|
26
|
+
Ok(expr.into())
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
use polars::prelude::*;
|
|
2
2
|
|
|
3
3
|
use crate::conversion::Wrap;
|
|
4
|
-
use crate::{RbExpr, RbPolarsErr, RbResult};
|
|
4
|
+
use crate::{RbDataTypeExpr, RbExpr, RbPolarsErr, RbResult};
|
|
5
5
|
|
|
6
6
|
impl RbExpr {
|
|
7
7
|
pub fn str_join(&self, delimiter: String, ignore_nulls: bool) -> Self {
|
|
@@ -265,16 +265,11 @@ impl RbExpr {
|
|
|
265
265
|
.into()
|
|
266
266
|
}
|
|
267
267
|
|
|
268
|
-
pub fn str_json_decode(
|
|
269
|
-
&self,
|
|
270
|
-
dtype: Option<Wrap<DataType>>,
|
|
271
|
-
infer_schema_len: Option<usize>,
|
|
272
|
-
) -> Self {
|
|
273
|
-
let dtype = dtype.map(|wrap| wrap.0);
|
|
268
|
+
pub fn str_json_decode(&self, dtype: &RbDataTypeExpr) -> Self {
|
|
274
269
|
self.inner
|
|
275
270
|
.clone()
|
|
276
271
|
.str()
|
|
277
|
-
.json_decode(dtype
|
|
272
|
+
.json_decode(dtype.inner.clone())
|
|
278
273
|
.into()
|
|
279
274
|
}
|
|
280
275
|
|
|
@@ -352,8 +347,8 @@ impl RbExpr {
|
|
|
352
347
|
self.inner.clone().str().splitn(by.inner.clone(), n).into()
|
|
353
348
|
}
|
|
354
349
|
|
|
355
|
-
pub fn str_to_decimal(&self,
|
|
356
|
-
self.inner.clone().str().to_decimal(
|
|
350
|
+
pub fn str_to_decimal(&self, scale: usize) -> Self {
|
|
351
|
+
self.inner.clone().str().to_decimal(scale).into()
|
|
357
352
|
}
|
|
358
353
|
|
|
359
354
|
pub fn str_contains_any(&self, patterns: &RbExpr, ascii_case_insensitive: bool) -> Self {
|