polars-df 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/Cargo.lock +270 -97
- data/LICENSE.txt +1 -1
- data/README.md +1 -3
- data/ext/polars/Cargo.toml +19 -18
- data/ext/polars/src/catalog/unity.rs +15 -20
- data/ext/polars/src/conversion/any_value.rs +53 -29
- data/ext/polars/src/conversion/chunked_array.rs +58 -56
- data/ext/polars/src/conversion/datetime.rs +58 -7
- data/ext/polars/src/conversion/mod.rs +200 -150
- data/ext/polars/src/dataframe/export.rs +15 -12
- data/ext/polars/src/dataframe/general.rs +25 -7
- data/ext/polars/src/dataframe/map.rs +6 -4
- data/ext/polars/src/error.rs +1 -1
- data/ext/polars/src/expr/array.rs +0 -24
- data/ext/polars/src/expr/datatype.rs +13 -3
- data/ext/polars/src/expr/datetime.rs +4 -4
- data/ext/polars/src/expr/general.rs +35 -15
- data/ext/polars/src/expr/list.rs +0 -26
- data/ext/polars/src/expr/rolling.rs +24 -0
- data/ext/polars/src/functions/business.rs +2 -2
- data/ext/polars/src/functions/io.rs +4 -3
- data/ext/polars/src/functions/lazy.rs +65 -46
- data/ext/polars/src/functions/meta.rs +6 -5
- data/ext/polars/src/functions/mod.rs +0 -1
- data/ext/polars/src/functions/range.rs +13 -0
- data/ext/polars/src/functions/utils.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +4 -2
- data/ext/polars/src/interop/arrow/to_rb.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
- data/ext/polars/src/io/scan_options.rs +6 -3
- data/ext/polars/src/io/sink_options.rs +2 -0
- data/ext/polars/src/lazyframe/general.rs +243 -17
- data/ext/polars/src/lazyframe/optflags.rs +2 -1
- data/ext/polars/src/lib.rs +39 -35
- data/ext/polars/src/map/lazy.rs +5 -2
- data/ext/polars/src/map/series.rs +19 -18
- data/ext/polars/src/on_startup.rs +25 -6
- data/ext/polars/src/ruby/numo.rs +3 -4
- data/ext/polars/src/ruby/plan_callback.rs +1 -4
- data/ext/polars/src/ruby/rb_modules.rs +2 -4
- data/ext/polars/src/ruby/ruby_udf.rs +7 -9
- data/ext/polars/src/ruby/utils.rs +12 -1
- data/ext/polars/src/series/aggregation.rs +13 -1
- data/ext/polars/src/series/construction.rs +31 -50
- data/ext/polars/src/series/export.rs +33 -38
- data/ext/polars/src/series/general.rs +6 -6
- data/ext/polars/src/series/map.rs +3 -2
- data/ext/polars/src/series/scatter.rs +4 -4
- data/ext/polars/src/utils.rs +31 -7
- data/lib/polars/array_expr.rb +23 -7
- data/lib/polars/array_name_space.rb +16 -2
- data/lib/polars/binary_name_space.rb +32 -0
- data/lib/polars/collect_batches.rb +4 -0
- data/lib/polars/data_frame.rb +144 -11
- data/lib/polars/data_type_group.rb +5 -0
- data/lib/polars/date_time_expr.rb +91 -3
- data/lib/polars/date_time_name_space.rb +7 -1
- data/lib/polars/expr.rb +247 -44
- data/lib/polars/functions/business.rb +2 -2
- data/lib/polars/functions/datatype.rb +30 -0
- data/lib/polars/functions/eager.rb +80 -7
- data/lib/polars/functions/lazy.rb +97 -2
- data/lib/polars/functions/range/linear_space.rb +118 -0
- data/lib/polars/io/csv.rb +27 -5
- data/lib/polars/io/database.rb +2 -3
- data/lib/polars/io/ipc.rb +2 -2
- data/lib/polars/io/lines.rb +172 -0
- data/lib/polars/io/parquet.rb +1 -1
- data/lib/polars/io/sink_options.rb +5 -2
- data/lib/polars/lazy_frame.rb +517 -14
- data/lib/polars/list_expr.rb +21 -7
- data/lib/polars/list_name_space.rb +16 -2
- data/lib/polars/query_opt_flags.rb +23 -5
- data/lib/polars/selectors.rb +2 -2
- data/lib/polars/series.rb +176 -19
- data/lib/polars/sql_context.rb +2 -2
- data/lib/polars/string_cache.rb +19 -72
- data/lib/polars/string_expr.rb +1 -7
- data/lib/polars/string_name_space.rb +1 -7
- data/lib/polars/utils/construction/series.rb +24 -39
- data/lib/polars/utils/convert.rb +16 -6
- data/lib/polars/utils/parse.rb +7 -0
- data/lib/polars/utils/reduce_balanced.rb +43 -0
- data/lib/polars/utils/various.rb +5 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -17
- data/ext/polars/src/functions/string_cache.rs +0 -24
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
use std::hash::BuildHasher;
|
|
2
2
|
|
|
3
|
+
use arrow::bitmap::MutableBitmap;
|
|
3
4
|
use either::Either;
|
|
4
|
-
use magnus::{
|
|
5
|
+
use magnus::{RArray, Ruby, Value, prelude::*, value::Opaque};
|
|
5
6
|
use polars::prelude::*;
|
|
6
7
|
|
|
7
8
|
use crate::conversion::*;
|
|
@@ -9,6 +10,7 @@ use crate::prelude::strings_to_pl_smallstr;
|
|
|
9
10
|
use crate::rb_modules::pl_utils;
|
|
10
11
|
use crate::ruby::exceptions::RbIndexError;
|
|
11
12
|
use crate::ruby::gvl::GvlExt;
|
|
13
|
+
use crate::ruby::utils::TryIntoValue;
|
|
12
14
|
use crate::series::ToRbSeries;
|
|
13
15
|
use crate::series::to_series;
|
|
14
16
|
use crate::utils::EnterPolarsExt;
|
|
@@ -143,13 +145,13 @@ impl RbDataFrame {
|
|
|
143
145
|
Ok(())
|
|
144
146
|
}
|
|
145
147
|
|
|
146
|
-
pub fn dtypes(ruby: &Ruby, self_: &Self) -> RArray {
|
|
148
|
+
pub fn dtypes(ruby: &Ruby, self_: &Self) -> RbResult<RArray> {
|
|
147
149
|
let df = self_.df.read();
|
|
148
150
|
let iter = df
|
|
149
151
|
.columns()
|
|
150
152
|
.iter()
|
|
151
|
-
.map(|s| Wrap(s.dtype().clone()).
|
|
152
|
-
ruby.
|
|
153
|
+
.map(|s| Wrap(s.dtype().clone()).try_into_value_with(ruby));
|
|
154
|
+
ruby.ary_try_from_iter(iter)
|
|
153
155
|
}
|
|
154
156
|
|
|
155
157
|
pub fn n_chunks(&self) -> usize {
|
|
@@ -503,10 +505,26 @@ impl RbDataFrame {
|
|
|
503
505
|
})
|
|
504
506
|
}
|
|
505
507
|
|
|
506
|
-
pub fn to_struct(
|
|
508
|
+
pub fn to_struct(
|
|
509
|
+
rb: &Ruby,
|
|
510
|
+
self_: &Self,
|
|
511
|
+
name: String,
|
|
512
|
+
invalid_indices: Vec<usize>,
|
|
513
|
+
) -> RbResult<RbSeries> {
|
|
507
514
|
rb.enter_polars_series(|| {
|
|
508
|
-
let ca = self_.df.read().clone().into_struct(name.into());
|
|
509
|
-
|
|
515
|
+
let mut ca = self_.df.read().clone().into_struct(name.into());
|
|
516
|
+
|
|
517
|
+
if !invalid_indices.is_empty() {
|
|
518
|
+
let mut validity = MutableBitmap::with_capacity(ca.len());
|
|
519
|
+
validity.extend_constant(ca.len(), true);
|
|
520
|
+
for i in invalid_indices {
|
|
521
|
+
validity.set(i, false);
|
|
522
|
+
}
|
|
523
|
+
ca.rechunk_mut();
|
|
524
|
+
Ok(ca.with_outer_validity(Some(validity.freeze())))
|
|
525
|
+
} else {
|
|
526
|
+
Ok(ca)
|
|
527
|
+
}
|
|
510
528
|
})
|
|
511
529
|
}
|
|
512
530
|
|
|
@@ -5,7 +5,7 @@ use polars_core::utils::CustomIterTools;
|
|
|
5
5
|
use super::*;
|
|
6
6
|
use crate::error::RbPolarsErr;
|
|
7
7
|
use crate::prelude::*;
|
|
8
|
-
use crate::ruby::utils::to_pl_err;
|
|
8
|
+
use crate::ruby::utils::{TryIntoValue, to_pl_err};
|
|
9
9
|
use crate::series::construction::series_from_objects;
|
|
10
10
|
use crate::{RbResult, RbSeries, raise_err};
|
|
11
11
|
|
|
@@ -28,9 +28,11 @@ impl RbDataFrame {
|
|
|
28
28
|
drop(df); // Release lock before calling lambda.
|
|
29
29
|
|
|
30
30
|
let lambda_result_iter = (0..height).map(move |_| {
|
|
31
|
-
let iter = iters
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
let iter = iters
|
|
32
|
+
.iter_mut()
|
|
33
|
+
.map(|it| Wrap(it.next().unwrap()).try_into_value_with(rb));
|
|
34
|
+
rb.ary_try_from_iter(iter)
|
|
35
|
+
.and_then(|tpl| lambda.funcall::<_, _, Value>("call", (tpl,)))
|
|
34
36
|
});
|
|
35
37
|
|
|
36
38
|
// Simple case: return type set.
|
data/ext/polars/src/error.rs
CHANGED
|
@@ -63,7 +63,7 @@ impl From<RbPolarsErr> for Error {
|
|
|
63
63
|
PolarsError::StructFieldNotFound(name) => {
|
|
64
64
|
StructFieldNotFoundError::new_err(name.to_string())
|
|
65
65
|
}
|
|
66
|
-
PolarsError::Context { .. } => {
|
|
66
|
+
PolarsError::Context { .. } | PolarsError::ExprContext { .. } => {
|
|
67
67
|
let tmp = RbPolarsErr::Polars(err.context_trace());
|
|
68
68
|
RbErr::from(tmp)
|
|
69
69
|
}
|
|
@@ -38,30 +38,10 @@ impl RbExpr {
|
|
|
38
38
|
self.inner.clone().arr().median().into()
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
-
pub fn arr_unique(&self, maintain_order: bool) -> Self {
|
|
42
|
-
if maintain_order {
|
|
43
|
-
self.inner.clone().arr().unique_stable().into()
|
|
44
|
-
} else {
|
|
45
|
-
self.inner.clone().arr().unique().into()
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
pub fn arr_n_unique(&self) -> Self {
|
|
50
|
-
self.inner.clone().arr().n_unique().into()
|
|
51
|
-
}
|
|
52
|
-
|
|
53
41
|
pub fn arr_to_list(&self) -> Self {
|
|
54
42
|
self.inner.clone().arr().to_list().into()
|
|
55
43
|
}
|
|
56
44
|
|
|
57
|
-
pub fn arr_all(&self) -> Self {
|
|
58
|
-
self.inner.clone().arr().all().into()
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
pub fn arr_any(&self) -> Self {
|
|
62
|
-
self.inner.clone().arr().any().into()
|
|
63
|
-
}
|
|
64
|
-
|
|
65
45
|
pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
|
66
46
|
self.inner
|
|
67
47
|
.clone()
|
|
@@ -74,10 +54,6 @@ impl RbExpr {
|
|
|
74
54
|
.into()
|
|
75
55
|
}
|
|
76
56
|
|
|
77
|
-
pub fn arr_reverse(&self) -> Self {
|
|
78
|
-
self.inner.clone().arr().reverse().into()
|
|
79
|
-
}
|
|
80
|
-
|
|
81
57
|
pub fn arr_arg_min(&self) -> Self {
|
|
82
58
|
self.inner.clone().arr().arg_min().into()
|
|
83
59
|
}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
use magnus::{
|
|
2
|
-
use polars::prelude::{DataType, DataTypeExpr, Schema};
|
|
1
|
+
use magnus::{RArray, Ruby, TryConvert, Value};
|
|
2
|
+
use polars::prelude::{DataType, DataTypeExpr, PlSmallStr, Schema};
|
|
3
3
|
|
|
4
4
|
use crate::prelude::Wrap;
|
|
5
|
+
use crate::ruby::utils::TryIntoValue;
|
|
5
6
|
use crate::{RbExpr, RbPolarsErr, RbResult};
|
|
6
7
|
|
|
7
8
|
#[magnus::wrap(class = "Polars::RbDataTypeExpr")]
|
|
@@ -36,6 +37,15 @@ impl RbDataTypeExpr {
|
|
|
36
37
|
.inner
|
|
37
38
|
.into_datatype(&schema.0)
|
|
38
39
|
.map_err(RbPolarsErr::from)?;
|
|
39
|
-
|
|
40
|
+
Wrap(dtype).try_into_value_with(ruby)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
pub fn struct_with_fields(rb_fields: RArray) -> RbResult<Self> {
|
|
44
|
+
let mut fields = Vec::new();
|
|
45
|
+
for v in rb_fields.into_iter() {
|
|
46
|
+
let (name, dt_expr) = <(String, &RbDataTypeExpr)>::try_convert(v)?;
|
|
47
|
+
fields.push((PlSmallStr::from_string(name), dt_expr.inner.clone()));
|
|
48
|
+
}
|
|
49
|
+
Ok(DataTypeExpr::StructWithFields(fields).into())
|
|
40
50
|
}
|
|
41
51
|
}
|
|
@@ -8,13 +8,13 @@ impl RbExpr {
|
|
|
8
8
|
&self,
|
|
9
9
|
n: &RbExpr,
|
|
10
10
|
week_mask: [bool; 7],
|
|
11
|
-
holidays:
|
|
11
|
+
holidays: &RbExpr,
|
|
12
12
|
roll: Wrap<Roll>,
|
|
13
13
|
) -> Self {
|
|
14
14
|
self.inner
|
|
15
15
|
.clone()
|
|
16
16
|
.dt()
|
|
17
|
-
.add_business_days(n.inner.clone(), week_mask, holidays, roll.0)
|
|
17
|
+
.add_business_days(n.inner.clone(), week_mask, holidays.inner.clone(), roll.0)
|
|
18
18
|
.into()
|
|
19
19
|
}
|
|
20
20
|
|
|
@@ -133,11 +133,11 @@ impl RbExpr {
|
|
|
133
133
|
self.clone().inner.dt().year().into()
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
-
pub fn dt_is_business_day(&self, week_mask: [bool; 7], holidays:
|
|
136
|
+
pub fn dt_is_business_day(&self, week_mask: [bool; 7], holidays: &RbExpr) -> Self {
|
|
137
137
|
self.inner
|
|
138
138
|
.clone()
|
|
139
139
|
.dt()
|
|
140
|
-
.is_business_day(week_mask, holidays)
|
|
140
|
+
.is_business_day(week_mask, holidays.inner.clone())
|
|
141
141
|
.into()
|
|
142
142
|
}
|
|
143
143
|
|
|
@@ -5,13 +5,13 @@ use polars::lazy::dsl;
|
|
|
5
5
|
use polars::prelude::*;
|
|
6
6
|
use polars::series::ops::NullBehavior;
|
|
7
7
|
use polars_core::chunked_array::cast::CastOptions;
|
|
8
|
-
use
|
|
8
|
+
use polars_plan::plans::AExprSorted;
|
|
9
9
|
|
|
10
10
|
use super::datatype::RbDataTypeExpr;
|
|
11
11
|
use super::selector::RbSelector;
|
|
12
12
|
use crate::conversion::{Wrap, parse_fill_null_strategy};
|
|
13
13
|
use crate::expr::ToExprs;
|
|
14
|
-
use crate::{RbExpr, RbPolarsErr, RbResult};
|
|
14
|
+
use crate::{RbDataType, RbExpr, RbPolarsErr, RbResult};
|
|
15
15
|
|
|
16
16
|
impl RbExpr {
|
|
17
17
|
pub fn add(&self, rhs: &Self) -> RbResult<Self> {
|
|
@@ -118,6 +118,14 @@ impl RbExpr {
|
|
|
118
118
|
self.inner.clone().max().into()
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
+
pub fn min_by(&self, by: &Self) -> Self {
|
|
122
|
+
self.inner.clone().min_by(by.inner.clone()).into()
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
pub fn max_by(&self, by: &Self) -> Self {
|
|
126
|
+
self.inner.clone().max_by(by.inner.clone()).into()
|
|
127
|
+
}
|
|
128
|
+
|
|
121
129
|
pub fn nan_max(&self) -> Self {
|
|
122
130
|
self.inner.clone().nan_max().into()
|
|
123
131
|
}
|
|
@@ -174,8 +182,8 @@ impl RbExpr {
|
|
|
174
182
|
self.inner.clone().item(allow_empty).into()
|
|
175
183
|
}
|
|
176
184
|
|
|
177
|
-
pub fn implode(&self) -> Self {
|
|
178
|
-
self.inner.clone().implode().into()
|
|
185
|
+
pub fn implode(&self, maintain_order: bool) -> Self {
|
|
186
|
+
self.inner.clone().implode(maintain_order).into()
|
|
179
187
|
}
|
|
180
188
|
|
|
181
189
|
pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
|
|
@@ -358,8 +366,11 @@ impl RbExpr {
|
|
|
358
366
|
.into()
|
|
359
367
|
}
|
|
360
368
|
|
|
361
|
-
pub fn gather(&self, idx: &Self) -> Self {
|
|
362
|
-
self.inner
|
|
369
|
+
pub fn gather(&self, idx: &Self, null_on_oob: bool) -> Self {
|
|
370
|
+
self.inner
|
|
371
|
+
.clone()
|
|
372
|
+
.gather(idx.inner.clone(), null_on_oob)
|
|
373
|
+
.into()
|
|
363
374
|
}
|
|
364
375
|
|
|
365
376
|
pub fn get(&self, idx: &Self, null_on_oob: bool) -> Self {
|
|
@@ -522,6 +533,10 @@ impl RbExpr {
|
|
|
522
533
|
self.clone().inner.round_sig_figs(digits).into()
|
|
523
534
|
}
|
|
524
535
|
|
|
536
|
+
pub fn truncate(&self, decimals: u32) -> Self {
|
|
537
|
+
self.inner.clone().truncate(decimals).into()
|
|
538
|
+
}
|
|
539
|
+
|
|
525
540
|
pub fn floor(&self) -> Self {
|
|
526
541
|
self.inner.clone().floor().into()
|
|
527
542
|
}
|
|
@@ -719,8 +734,11 @@ impl RbExpr {
|
|
|
719
734
|
self.inner.clone().dot(other.inner.clone()).into()
|
|
720
735
|
}
|
|
721
736
|
|
|
722
|
-
pub fn reinterpret(&self, signed: bool) -> Self {
|
|
723
|
-
self.inner
|
|
737
|
+
pub fn reinterpret(&self, signed: Option<bool>, dtype: Option<RbDataType>) -> Self {
|
|
738
|
+
self.inner
|
|
739
|
+
.clone()
|
|
740
|
+
.reinterpret(signed, dtype.map(|dt| dt.0))
|
|
741
|
+
.into()
|
|
724
742
|
}
|
|
725
743
|
|
|
726
744
|
pub fn mode(&self, maintain_order: bool) -> Self {
|
|
@@ -896,6 +914,10 @@ impl RbExpr {
|
|
|
896
914
|
self.inner.clone().all(drop_nulls).into()
|
|
897
915
|
}
|
|
898
916
|
|
|
917
|
+
pub fn is_empty(&self, ignore_nulls: bool) -> Self {
|
|
918
|
+
self.inner.clone().is_empty(ignore_nulls).into()
|
|
919
|
+
}
|
|
920
|
+
|
|
899
921
|
pub fn log(&self, base: &RbExpr) -> Self {
|
|
900
922
|
self.inner.clone().log(base.inner.clone()).into()
|
|
901
923
|
}
|
|
@@ -916,13 +938,11 @@ impl RbExpr {
|
|
|
916
938
|
self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
|
|
917
939
|
}
|
|
918
940
|
|
|
919
|
-
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
|
920
|
-
let
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
};
|
|
925
|
-
self.inner.clone().set_sorted_flag(is_sorted).into()
|
|
941
|
+
pub fn set_sorted_flag(&self, descending: bool, nulls_last: bool) -> Self {
|
|
942
|
+
let sortedness = AExprSorted::default()
|
|
943
|
+
.with_desc(Some(descending))
|
|
944
|
+
.with_nulls_last(Some(nulls_last));
|
|
945
|
+
self.inner.clone().set_sorted_flag(sortedness).into()
|
|
926
946
|
}
|
|
927
947
|
|
|
928
948
|
pub fn replace(&self, old: &Self, new: &Self) -> Self {
|
data/ext/polars/src/expr/list.rs
CHANGED
|
@@ -7,14 +7,6 @@ use crate::conversion::Wrap;
|
|
|
7
7
|
use crate::{RbExpr, RbResult};
|
|
8
8
|
|
|
9
9
|
impl RbExpr {
|
|
10
|
-
pub fn list_all(&self) -> Self {
|
|
11
|
-
self.inner.clone().list().all().into()
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
pub fn list_any(&self) -> Self {
|
|
15
|
-
self.inner.clone().list().any().into()
|
|
16
|
-
}
|
|
17
|
-
|
|
18
10
|
pub fn list_arg_max(&self) -> Self {
|
|
19
11
|
self.inner.clone().list().arg_max().into()
|
|
20
12
|
}
|
|
@@ -103,10 +95,6 @@ impl RbExpr {
|
|
|
103
95
|
self.inner.clone().list().min().into()
|
|
104
96
|
}
|
|
105
97
|
|
|
106
|
-
pub fn list_reverse(&self) -> Self {
|
|
107
|
-
self.inner.clone().list().reverse().into()
|
|
108
|
-
}
|
|
109
|
-
|
|
110
98
|
pub fn list_shift(&self, periods: &RbExpr) -> Self {
|
|
111
99
|
self.inner
|
|
112
100
|
.clone()
|
|
@@ -213,20 +201,6 @@ impl RbExpr {
|
|
|
213
201
|
.into())
|
|
214
202
|
}
|
|
215
203
|
|
|
216
|
-
pub fn list_n_unique(&self) -> Self {
|
|
217
|
-
self.inner.clone().list().n_unique().into()
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
|
221
|
-
let e = self.inner.clone();
|
|
222
|
-
|
|
223
|
-
if maintain_order {
|
|
224
|
-
e.list().unique_stable().into()
|
|
225
|
-
} else {
|
|
226
|
-
e.list().unique().into()
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
204
|
pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
|
|
231
205
|
let e = self.inner.clone().list();
|
|
232
206
|
match operation.0 {
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
use magnus::Value;
|
|
1
2
|
use polars::prelude::*;
|
|
2
3
|
|
|
3
4
|
use crate::conversion::Wrap;
|
|
5
|
+
use crate::ruby::plan_callback::PlanCallbackExt;
|
|
6
|
+
use crate::ruby::ruby_function::RubyObject;
|
|
4
7
|
use crate::{RbExpr, RbPolarsErr, RbResult};
|
|
5
8
|
|
|
6
9
|
impl RbExpr {
|
|
@@ -406,4 +409,25 @@ impl RbExpr {
|
|
|
406
409
|
|
|
407
410
|
self.inner.clone().rolling_kurtosis(options).into()
|
|
408
411
|
}
|
|
412
|
+
|
|
413
|
+
pub fn rolling_map(
|
|
414
|
+
&self,
|
|
415
|
+
lambda: Value,
|
|
416
|
+
window_size: usize,
|
|
417
|
+
weights: Option<Vec<f64>>,
|
|
418
|
+
min_periods: Option<usize>,
|
|
419
|
+
center: bool,
|
|
420
|
+
) -> Self {
|
|
421
|
+
let min_periods = min_periods.unwrap_or(window_size);
|
|
422
|
+
let options = RollingOptionsFixedWindow {
|
|
423
|
+
window_size,
|
|
424
|
+
weights,
|
|
425
|
+
min_periods,
|
|
426
|
+
center,
|
|
427
|
+
..Default::default()
|
|
428
|
+
};
|
|
429
|
+
let function = PlanCallback::new_ruby(RubyObject::from(lambda));
|
|
430
|
+
|
|
431
|
+
self.inner.clone().rolling_map(function, options).into()
|
|
432
|
+
}
|
|
409
433
|
}
|
|
@@ -6,9 +6,9 @@ pub fn business_day_count(
|
|
|
6
6
|
start: &RbExpr,
|
|
7
7
|
end: &RbExpr,
|
|
8
8
|
week_mask: [bool; 7],
|
|
9
|
-
holidays:
|
|
9
|
+
holidays: &RbExpr,
|
|
10
10
|
) -> RbExpr {
|
|
11
11
|
let start = start.inner.clone();
|
|
12
12
|
let end = end.inner.clone();
|
|
13
|
-
dsl::business_day_count(start, end, week_mask, holidays).into()
|
|
13
|
+
dsl::business_day_count(start, end, week_mask, holidays.inner.clone()).into()
|
|
14
14
|
}
|
|
@@ -8,6 +8,7 @@ use crate::conversion::Wrap;
|
|
|
8
8
|
use crate::file::{EitherRustRubyFile, get_either_file};
|
|
9
9
|
use crate::io::cloud_options::OptRbCloudOptions;
|
|
10
10
|
use crate::ruby::gvl::GvlExt;
|
|
11
|
+
use crate::ruby::utils::TryIntoValue;
|
|
11
12
|
use crate::{RbPolarsErr, RbResult};
|
|
12
13
|
|
|
13
14
|
pub fn read_ipc_schema(rb: &Ruby, rb_f: Value) -> RbResult<RHash> {
|
|
@@ -32,7 +33,6 @@ pub fn read_parquet_metadata(
|
|
|
32
33
|
) -> RbResult<RHash> {
|
|
33
34
|
use std::io::Cursor;
|
|
34
35
|
|
|
35
|
-
use polars_io::pl_async::get_runtime;
|
|
36
36
|
use polars_parquet::read::read_metadata;
|
|
37
37
|
use polars_parquet::read::schema::read_custom_key_value_metadata;
|
|
38
38
|
|
|
@@ -53,7 +53,7 @@ pub fn read_parquet_metadata(
|
|
|
53
53
|
use polars_error::PolarsResult;
|
|
54
54
|
|
|
55
55
|
rb.detach(|| {
|
|
56
|
-
|
|
56
|
+
polars_core::runtime::ASYNC.block_on(async {
|
|
57
57
|
let mut reader =
|
|
58
58
|
ParquetObjectStore::from_uri(p, cloud_options.as_ref(), None).await?;
|
|
59
59
|
let result = reader.get_metadata().await?;
|
|
@@ -97,9 +97,10 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
|
100
|
+
let ruby = &Ruby::get_with(*dict);
|
|
100
101
|
for field in schema.iter_values() {
|
|
101
102
|
let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
|
|
102
|
-
dict.aset(field.name.as_str(), dt)?;
|
|
103
|
+
dict.aset(field.name.as_str(), dt.try_into_value_with(ruby)?)?;
|
|
103
104
|
}
|
|
104
105
|
Ok(())
|
|
105
106
|
}
|
|
@@ -1,17 +1,23 @@
|
|
|
1
1
|
use magnus::encoding::EncodingCapable;
|
|
2
|
-
use magnus::{
|
|
2
|
+
use magnus::{
|
|
3
|
+
Float, Integer, RArray, RString, Ruby, Value, prelude::*, typed_data::Obj, value::Qfalse,
|
|
4
|
+
value::Qtrue,
|
|
5
|
+
};
|
|
3
6
|
use polars::lazy::dsl;
|
|
4
7
|
use polars::prelude::*;
|
|
8
|
+
use polars_plan::plans::DynLiteralValue;
|
|
5
9
|
|
|
10
|
+
use crate::conversion::any_value::rb_object_to_any_value;
|
|
6
11
|
use crate::conversion::{Wrap, get_lf, get_rbseq};
|
|
7
12
|
use crate::expr::ToExprs;
|
|
8
13
|
use crate::expr::datatype::RbDataTypeExpr;
|
|
9
14
|
use crate::lazyframe::RbOptFlags;
|
|
15
|
+
use crate::ruby::exceptions::{RbTypeError, RbValueError};
|
|
10
16
|
use crate::ruby::plan_callback::PlanCallbackExt;
|
|
11
17
|
use crate::ruby::ruby_function::RubyObject;
|
|
12
18
|
use crate::ruby::thread::start_background_ruby_thread;
|
|
13
19
|
use crate::utils::EnterPolarsExt;
|
|
14
|
-
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries,
|
|
20
|
+
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, map};
|
|
15
21
|
|
|
16
22
|
macro_rules! set_unwrapped_or_0 {
|
|
17
23
|
($($var:ident),+ $(,)?) => {
|
|
@@ -130,6 +136,13 @@ pub fn collect_all(
|
|
|
130
136
|
Ok(ruby.ary_from_iter(dfs.into_iter().map(Into::<RbDataFrame>::into)))
|
|
131
137
|
}
|
|
132
138
|
|
|
139
|
+
pub fn explain_all(rb: &Ruby, lfs: RArray, optflags: &RbOptFlags) -> RbResult<String> {
|
|
140
|
+
let plans = lfs_to_plans(lfs)?;
|
|
141
|
+
let explained =
|
|
142
|
+
rb.enter_polars(|| LazyFrame::explain_all(plans, optflags.clone().inner.into_inner()))?;
|
|
143
|
+
Ok(explained)
|
|
144
|
+
}
|
|
145
|
+
|
|
133
146
|
pub fn collect_all_lazy(lfs: RArray, optflags: &RbOptFlags) -> RbResult<RbLazyFrame> {
|
|
134
147
|
let plans = lfs_to_plans(lfs)?;
|
|
135
148
|
|
|
@@ -371,34 +384,28 @@ pub fn fold(
|
|
|
371
384
|
.into())
|
|
372
385
|
}
|
|
373
386
|
|
|
374
|
-
pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
|
|
387
|
+
pub fn lit(rb: &Ruby, value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
|
|
375
388
|
let ruby = Ruby::get_with(value);
|
|
376
|
-
if value
|
|
377
|
-
Ok(dsl::lit(
|
|
378
|
-
} else if
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
}
|
|
392
|
-
} else if let Some(v) = Float::from_value(value) {
|
|
393
|
-
Ok(dsl::lit(v.to_f64()).into())
|
|
394
|
-
} else if let Some(v) = RString::from_value(value) {
|
|
395
|
-
if v.enc_get() == ruby.utf8_encindex() {
|
|
396
|
-
Ok(dsl::lit(v.to_string()?).into())
|
|
389
|
+
if Qtrue::from_value(value).is_some() {
|
|
390
|
+
Ok(dsl::lit(true).into())
|
|
391
|
+
} else if Qfalse::from_value(value).is_some() {
|
|
392
|
+
Ok(dsl::lit(false).into())
|
|
393
|
+
} else if let Some(int) = Integer::from_value(value) {
|
|
394
|
+
let v = i128::try_convert(int.as_value())
|
|
395
|
+
.map_err(|e| polars_err!(InvalidOperation: "integer too large for Polars: {e}"))
|
|
396
|
+
.map_err(RbPolarsErr::from)?;
|
|
397
|
+
Ok(Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(v))).into())
|
|
398
|
+
} else if let Some(float) = Float::from_value(value) {
|
|
399
|
+
let val = f64::try_convert(float.as_value())?;
|
|
400
|
+
Ok(Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(val))).into())
|
|
401
|
+
} else if let Some(rbstr) = RString::from_value(value) {
|
|
402
|
+
if rbstr.enc_get() == ruby.utf8_encindex() {
|
|
403
|
+
Ok(dsl::lit(rbstr.to_string()?).into())
|
|
397
404
|
} else {
|
|
398
|
-
Ok(dsl::lit(unsafe {
|
|
405
|
+
Ok(dsl::lit(unsafe { rbstr.as_slice() }).into())
|
|
399
406
|
}
|
|
400
|
-
} else if let Ok(series) =
|
|
401
|
-
let s = series.series.
|
|
407
|
+
} else if let Ok(series) = <&RbSeries>::try_convert(value) {
|
|
408
|
+
let s = series.clone().series.into_inner();
|
|
402
409
|
if is_scalar {
|
|
403
410
|
let av = s
|
|
404
411
|
.get(0)
|
|
@@ -406,17 +413,39 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
|
|
|
406
413
|
let av = av.into_static();
|
|
407
414
|
Ok(dsl::lit(Scalar::new(s.dtype().clone(), av)).into())
|
|
408
415
|
} else {
|
|
409
|
-
Ok(dsl::lit(s
|
|
416
|
+
Ok(dsl::lit(s).into())
|
|
410
417
|
}
|
|
411
418
|
} else if value.is_nil() {
|
|
412
419
|
Ok(dsl::lit(Null {}).into())
|
|
413
|
-
} else if allow_object {
|
|
414
|
-
todo!()
|
|
415
420
|
} else {
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
421
|
+
let raise = || {
|
|
422
|
+
RbTypeError::new_err(format!(
|
|
423
|
+
"cannot create expression literal for value of type {}.\
|
|
424
|
+
\n\nHint: Pass `allow_object: true` to accept any value and create a literal of type Object.",
|
|
425
|
+
unsafe { value.classname() },
|
|
426
|
+
))
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
let av = rb_object_to_any_value(value, true, allow_object).map_err(|_| raise())?;
|
|
430
|
+
match av {
|
|
431
|
+
AnyValue::ObjectOwned(_) => {
|
|
432
|
+
// Check again for object allowance as for cached addresses this is not checked.
|
|
433
|
+
if allow_object {
|
|
434
|
+
let s = RbSeries::new_object(
|
|
435
|
+
rb,
|
|
436
|
+
"".to_string(),
|
|
437
|
+
rb.ary_new_from_values(&[value]),
|
|
438
|
+
false,
|
|
439
|
+
)?
|
|
440
|
+
.series
|
|
441
|
+
.into_inner();
|
|
442
|
+
Ok(dsl::lit(s).into())
|
|
443
|
+
} else {
|
|
444
|
+
Err(raise())
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
_ => Ok(Expr::Literal(LiteralValue::from(av)).into()),
|
|
448
|
+
}
|
|
420
449
|
}
|
|
421
450
|
}
|
|
422
451
|
|
|
@@ -454,7 +483,7 @@ pub fn reduce(
|
|
|
454
483
|
.into())
|
|
455
484
|
}
|
|
456
485
|
|
|
457
|
-
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) ->
|
|
486
|
+
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbExpr {
|
|
458
487
|
let mut value = value.inner.clone();
|
|
459
488
|
let n = n.inner.clone();
|
|
460
489
|
|
|
@@ -462,17 +491,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
|
462
491
|
value = value.cast(dtype.0);
|
|
463
492
|
}
|
|
464
493
|
|
|
465
|
-
|
|
466
|
-
let av = lv.to_any_value().unwrap();
|
|
467
|
-
// Integer inputs that fit in Int32 are parsed as such
|
|
468
|
-
if let DataType::Int64 = av.dtype() {
|
|
469
|
-
let int_value = av.try_extract::<i64>().unwrap();
|
|
470
|
-
if int_value >= i32::MIN as i64 && int_value <= i32::MAX as i64 {
|
|
471
|
-
value = value.cast(DataType::Int32);
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
}
|
|
475
|
-
Ok(dsl::repeat(value, n).into())
|
|
494
|
+
dsl::repeat(value, n).into()
|
|
476
495
|
}
|
|
477
496
|
|
|
478
497
|
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
|
@@ -1,18 +1,19 @@
|
|
|
1
|
-
use magnus::{
|
|
1
|
+
use magnus::{Ruby, Value};
|
|
2
2
|
use polars_core;
|
|
3
|
-
use polars_core::POOL;
|
|
4
3
|
use polars_core::fmt::FloatFmt;
|
|
5
4
|
use polars_core::prelude::IDX_DTYPE;
|
|
5
|
+
use polars_core::runtime::RAYON;
|
|
6
6
|
|
|
7
7
|
use crate::conversion::Wrap;
|
|
8
|
+
use crate::ruby::utils::TryIntoValue;
|
|
8
9
|
use crate::{RbResult, RbValueError};
|
|
9
10
|
|
|
10
|
-
pub fn get_index_type(ruby: &Ruby) -> Value {
|
|
11
|
-
Wrap(IDX_DTYPE).
|
|
11
|
+
pub fn get_index_type(ruby: &Ruby) -> RbResult<Value> {
|
|
12
|
+
Wrap(IDX_DTYPE).try_into_value_with(ruby)
|
|
12
13
|
}
|
|
13
14
|
|
|
14
15
|
pub fn thread_pool_size() -> usize {
|
|
15
|
-
|
|
16
|
+
RAYON.current_num_threads()
|
|
16
17
|
}
|
|
17
18
|
|
|
18
19
|
pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
|
@@ -138,6 +138,19 @@ pub fn time_ranges(
|
|
|
138
138
|
Ok(dsl::time_ranges(start, end, every, closed).into())
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
+
pub fn linear_space(
|
|
142
|
+
start: &RbExpr,
|
|
143
|
+
end: &RbExpr,
|
|
144
|
+
num_samples: &RbExpr,
|
|
145
|
+
closed: Wrap<ClosedInterval>,
|
|
146
|
+
) -> RbResult<RbExpr> {
|
|
147
|
+
let start = start.inner.clone();
|
|
148
|
+
let end = end.inner.clone();
|
|
149
|
+
let num_samples = num_samples.inner.clone();
|
|
150
|
+
let closed = closed.0;
|
|
151
|
+
Ok(dsl::linear_space(start, end, num_samples, closed).into())
|
|
152
|
+
}
|
|
153
|
+
|
|
141
154
|
pub fn linear_spaces(
|
|
142
155
|
start: &RbExpr,
|
|
143
156
|
end: &RbExpr,
|