polars-df 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
@@ -1,35 +1,58 @@
|
|
1
|
-
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, r_hash::ForEach, RArray, RHash, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::object::PolarsObjectSafe;
|
3
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
4
4
|
use polars::datatypes::AnyValue;
|
5
|
+
use polars::frame::row::Row;
|
5
6
|
use polars::frame::DataFrame;
|
7
|
+
use polars::io::avro::AvroCompression;
|
6
8
|
use polars::prelude::*;
|
7
9
|
use polars::series::ops::NullBehavior;
|
8
10
|
use std::fmt::{Display, Formatter};
|
9
11
|
use std::hash::{Hash, Hasher};
|
10
12
|
|
11
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
12
14
|
|
15
|
+
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
16
|
+
// Safety:
|
17
|
+
// Wrap is transparent.
|
18
|
+
unsafe { std::mem::transmute(slice) }
|
19
|
+
}
|
20
|
+
|
21
|
+
#[repr(transparent)]
|
13
22
|
pub struct Wrap<T>(pub T);
|
14
23
|
|
24
|
+
impl<T> Clone for Wrap<T>
|
25
|
+
where
|
26
|
+
T: Clone,
|
27
|
+
{
|
28
|
+
fn clone(&self) -> Self {
|
29
|
+
Wrap(self.0.clone())
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
15
33
|
impl<T> From<T> for Wrap<T> {
|
16
34
|
fn from(t: T) -> Self {
|
17
35
|
Wrap(t)
|
18
36
|
}
|
19
37
|
}
|
20
38
|
|
21
|
-
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
39
|
+
pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
22
40
|
let seq: RArray = obj.try_convert()?;
|
23
41
|
let len = seq.len();
|
24
42
|
Ok((seq, len))
|
25
43
|
}
|
26
44
|
|
27
|
-
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
45
|
+
pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
28
46
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
29
47
|
Ok(rbdf.df.borrow().clone())
|
30
48
|
}
|
31
49
|
|
32
|
-
pub fn
|
50
|
+
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
51
|
+
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
52
|
+
Ok(rbdf.ldf.clone())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
33
56
|
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
34
57
|
Ok(rbs.series.borrow().clone())
|
35
58
|
}
|
@@ -171,6 +194,39 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
171
194
|
}
|
172
195
|
}
|
173
196
|
|
197
|
+
impl TryConvert for Wrap<AsofStrategy> {
|
198
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
199
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
200
|
+
"backward" => AsofStrategy::Backward,
|
201
|
+
"forward" => AsofStrategy::Forward,
|
202
|
+
v => {
|
203
|
+
return Err(RbValueError::new_err(format!(
|
204
|
+
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
205
|
+
v
|
206
|
+
)))
|
207
|
+
}
|
208
|
+
};
|
209
|
+
Ok(Wrap(parsed))
|
210
|
+
}
|
211
|
+
}
|
212
|
+
|
213
|
+
impl TryConvert for Wrap<Option<AvroCompression>> {
|
214
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
215
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
216
|
+
"uncompressed" => None,
|
217
|
+
"snappy" => Some(AvroCompression::Snappy),
|
218
|
+
"deflate" => Some(AvroCompression::Deflate),
|
219
|
+
v => {
|
220
|
+
return Err(RbValueError::new_err(format!(
|
221
|
+
"compression must be one of {{'uncompressed', 'snappy', 'deflate'}}, got {}",
|
222
|
+
v
|
223
|
+
)))
|
224
|
+
}
|
225
|
+
};
|
226
|
+
Ok(Wrap(parsed))
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
174
230
|
impl TryConvert for Wrap<CategoricalOrdering> {
|
175
231
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
176
232
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -462,6 +518,32 @@ pub fn parse_parquet_compression(
|
|
462
518
|
Ok(parsed)
|
463
519
|
}
|
464
520
|
|
521
|
+
impl<'s> TryConvert for Wrap<Row<'s>> {
|
522
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
523
|
+
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
524
|
+
for item in ob.try_convert::<RArray>()?.each() {
|
525
|
+
vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
|
526
|
+
}
|
527
|
+
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
528
|
+
Ok(Wrap(Row(vals)))
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
532
|
+
impl TryConvert for Wrap<Schema> {
|
533
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
534
|
+
let dict = ob.try_convert::<RHash>()?;
|
535
|
+
|
536
|
+
let mut schema = Vec::new();
|
537
|
+
dict.foreach(|key: String, val: Wrap<DataType>| {
|
538
|
+
schema.push(Field::new(&key, val.0));
|
539
|
+
Ok(ForEach::Continue)
|
540
|
+
})
|
541
|
+
.unwrap();
|
542
|
+
|
543
|
+
Ok(Wrap(schema.into_iter().into()))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
|
465
547
|
#[derive(Clone, Debug)]
|
466
548
|
pub struct ObjectValue {
|
467
549
|
pub inner: Value,
|
@@ -503,18 +585,31 @@ impl From<Value> for ObjectValue {
|
|
503
585
|
}
|
504
586
|
}
|
505
587
|
|
588
|
+
impl TryConvert for ObjectValue {
|
589
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
590
|
+
Ok(ObjectValue { inner: ob })
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
506
594
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
507
595
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
508
596
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
509
597
|
}
|
510
598
|
}
|
511
599
|
|
600
|
+
// TODO remove
|
512
601
|
impl ObjectValue {
|
513
602
|
pub fn to_object(&self) -> Value {
|
514
603
|
self.inner
|
515
604
|
}
|
516
605
|
}
|
517
606
|
|
607
|
+
impl From<ObjectValue> for Value {
|
608
|
+
fn from(val: ObjectValue) -> Self {
|
609
|
+
val.inner
|
610
|
+
}
|
611
|
+
}
|
612
|
+
|
518
613
|
impl Default for ObjectValue {
|
519
614
|
fn default() -> Self {
|
520
615
|
ObjectValue { inner: *QNIL }
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,15 +1,21 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
2
|
+
use polars::io::avro::AvroCompression;
|
2
3
|
use polars::io::mmap::ReaderBytes;
|
3
4
|
use polars::io::RowCount;
|
5
|
+
use polars::prelude::pivot::{pivot, pivot_stable};
|
4
6
|
use polars::prelude::*;
|
5
7
|
use std::cell::RefCell;
|
6
8
|
use std::io::{BufWriter, Cursor};
|
7
9
|
use std::ops::Deref;
|
8
10
|
|
11
|
+
use crate::apply::dataframe::{
|
12
|
+
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
13
|
+
apply_lambda_with_utf8_out_type,
|
14
|
+
};
|
9
15
|
use crate::conversion::*;
|
10
16
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
11
17
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
12
|
-
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
18
|
+
use crate::{series, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
19
|
|
14
20
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
15
21
|
pub struct RbDataFrame {
|
@@ -179,6 +185,48 @@ impl RbDataFrame {
|
|
179
185
|
Ok(RbDataFrame::new(df))
|
180
186
|
}
|
181
187
|
|
188
|
+
pub fn read_avro(
|
189
|
+
rb_f: Value,
|
190
|
+
columns: Option<Vec<String>>,
|
191
|
+
projection: Option<Vec<usize>>,
|
192
|
+
n_rows: Option<usize>,
|
193
|
+
) -> RbResult<Self> {
|
194
|
+
use polars::io::avro::AvroReader;
|
195
|
+
|
196
|
+
let file = get_file_like(rb_f, false)?;
|
197
|
+
let df = AvroReader::new(file)
|
198
|
+
.with_projection(projection)
|
199
|
+
.with_columns(columns)
|
200
|
+
.with_n_rows(n_rows)
|
201
|
+
.finish()
|
202
|
+
.map_err(RbPolarsErr::from)?;
|
203
|
+
Ok(RbDataFrame::new(df))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn write_avro(
|
207
|
+
&self,
|
208
|
+
rb_f: Value,
|
209
|
+
compression: Wrap<Option<AvroCompression>>,
|
210
|
+
) -> RbResult<()> {
|
211
|
+
use polars::io::avro::AvroWriter;
|
212
|
+
|
213
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
214
|
+
let f = std::fs::File::create(&s).unwrap();
|
215
|
+
AvroWriter::new(f)
|
216
|
+
.with_compression(compression.0)
|
217
|
+
.finish(&mut self.df.borrow_mut())
|
218
|
+
.map_err(RbPolarsErr::from)?;
|
219
|
+
} else {
|
220
|
+
let mut buf = get_file_like(rb_f, true)?;
|
221
|
+
AvroWriter::new(&mut buf)
|
222
|
+
.with_compression(compression.0)
|
223
|
+
.finish(&mut self.df.borrow_mut())
|
224
|
+
.map_err(RbPolarsErr::from)?;
|
225
|
+
}
|
226
|
+
|
227
|
+
Ok(())
|
228
|
+
}
|
229
|
+
|
182
230
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
183
231
|
// memmap the file first
|
184
232
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -238,6 +286,14 @@ impl RbDataFrame {
|
|
238
286
|
Ok(())
|
239
287
|
}
|
240
288
|
|
289
|
+
pub fn read_hashes(
|
290
|
+
_dicts: Value,
|
291
|
+
_infer_schema_length: Option<usize>,
|
292
|
+
_schema_overwrite: Option<Wrap<Schema>>,
|
293
|
+
) -> RbResult<Self> {
|
294
|
+
Err(RbPolarsErr::todo())
|
295
|
+
}
|
296
|
+
|
241
297
|
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
242
298
|
let mut cols: Vec<Series> = Vec::new();
|
243
299
|
data.foreach(|name: String, values: Value| {
|
@@ -751,6 +807,31 @@ impl RbDataFrame {
|
|
751
807
|
Ok(RbDataFrame::new(df))
|
752
808
|
}
|
753
809
|
|
810
|
+
pub fn pivot_expr(
|
811
|
+
&self,
|
812
|
+
values: Vec<String>,
|
813
|
+
index: Vec<String>,
|
814
|
+
columns: Vec<String>,
|
815
|
+
aggregate_expr: &RbExpr,
|
816
|
+
maintain_order: bool,
|
817
|
+
sort_columns: bool,
|
818
|
+
) -> RbResult<Self> {
|
819
|
+
let fun = match maintain_order {
|
820
|
+
true => pivot_stable,
|
821
|
+
false => pivot,
|
822
|
+
};
|
823
|
+
let df = fun(
|
824
|
+
&self.df.borrow(),
|
825
|
+
values,
|
826
|
+
index,
|
827
|
+
columns,
|
828
|
+
aggregate_expr.inner.clone(),
|
829
|
+
sort_columns,
|
830
|
+
)
|
831
|
+
.map_err(RbPolarsErr::from)?;
|
832
|
+
Ok(RbDataFrame::new(df))
|
833
|
+
}
|
834
|
+
|
754
835
|
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
755
836
|
let out = if stable {
|
756
837
|
self.df.borrow().partition_by_stable(groups)
|
@@ -870,10 +951,74 @@ impl RbDataFrame {
|
|
870
951
|
df.into()
|
871
952
|
}
|
872
953
|
|
954
|
+
pub fn apply(
|
955
|
+
&self,
|
956
|
+
lambda: Value,
|
957
|
+
output_type: Option<Wrap<DataType>>,
|
958
|
+
inference_size: usize,
|
959
|
+
) -> RbResult<(Value, bool)> {
|
960
|
+
let df = &self.df.borrow();
|
961
|
+
|
962
|
+
let output_type = output_type.map(|dt| dt.0);
|
963
|
+
let out = match output_type {
|
964
|
+
Some(DataType::Int32) => {
|
965
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
966
|
+
}
|
967
|
+
Some(DataType::Int64) => {
|
968
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
969
|
+
}
|
970
|
+
Some(DataType::UInt32) => {
|
971
|
+
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
972
|
+
.into_series()
|
973
|
+
}
|
974
|
+
Some(DataType::UInt64) => {
|
975
|
+
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
976
|
+
.into_series()
|
977
|
+
}
|
978
|
+
Some(DataType::Float32) => {
|
979
|
+
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
980
|
+
.into_series()
|
981
|
+
}
|
982
|
+
Some(DataType::Float64) => {
|
983
|
+
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
984
|
+
.into_series()
|
985
|
+
}
|
986
|
+
Some(DataType::Boolean) => {
|
987
|
+
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
988
|
+
}
|
989
|
+
Some(DataType::Date) => {
|
990
|
+
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
991
|
+
.into_date()
|
992
|
+
.into_series()
|
993
|
+
}
|
994
|
+
Some(DataType::Datetime(tu, tz)) => {
|
995
|
+
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
996
|
+
.into_datetime(tu, tz)
|
997
|
+
.into_series()
|
998
|
+
}
|
999
|
+
Some(DataType::Utf8) => {
|
1000
|
+
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1001
|
+
}
|
1002
|
+
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1003
|
+
};
|
1004
|
+
|
1005
|
+
Ok((RbSeries::from(out).into(), false))
|
1006
|
+
}
|
1007
|
+
|
873
1008
|
pub fn shrink_to_fit(&self) {
|
874
1009
|
self.df.borrow_mut().shrink_to_fit();
|
875
1010
|
}
|
876
1011
|
|
1012
|
+
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1013
|
+
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1014
|
+
let hash = self
|
1015
|
+
.df
|
1016
|
+
.borrow_mut()
|
1017
|
+
.hash_rows(Some(hb))
|
1018
|
+
.map_err(RbPolarsErr::from)?;
|
1019
|
+
Ok(hash.into_series().into())
|
1020
|
+
}
|
1021
|
+
|
877
1022
|
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
878
1023
|
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
879
1024
|
if include_header {
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,7 +1,39 @@
|
|
1
1
|
use magnus::Value;
|
2
|
-
use polars::
|
3
|
-
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::lazy::dsl::RbExpr;
|
5
|
+
use crate::Wrap;
|
4
6
|
|
5
7
|
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series> {
|
6
8
|
todo!();
|
7
9
|
}
|
10
|
+
|
11
|
+
pub fn map_single(
|
12
|
+
rbexpr: &RbExpr,
|
13
|
+
_lambda: Value,
|
14
|
+
output_type: Option<Wrap<DataType>>,
|
15
|
+
agg_list: bool,
|
16
|
+
) -> RbExpr {
|
17
|
+
let output_type = output_type.map(|wrap| wrap.0);
|
18
|
+
|
19
|
+
let output_type2 = output_type.clone();
|
20
|
+
let function = move |_s: Series| {
|
21
|
+
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
22
|
+
|
23
|
+
todo!();
|
24
|
+
};
|
25
|
+
|
26
|
+
let output_map = GetOutput::map_field(move |fld| match output_type {
|
27
|
+
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
28
|
+
None => {
|
29
|
+
let mut fld = fld.clone();
|
30
|
+
fld.coerce(DataType::Unknown);
|
31
|
+
fld
|
32
|
+
}
|
33
|
+
});
|
34
|
+
if agg_list {
|
35
|
+
rbexpr.clone().inner.map_list(function, output_map).into()
|
36
|
+
} else {
|
37
|
+
rbexpr.clone().inner.map(function, output_map).into()
|
38
|
+
}
|
39
|
+
}
|
@@ -3,7 +3,7 @@ use polars::io::RowCount;
|
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::io::BufWriter;
|
6
|
+
use std::io::{BufWriter, Read};
|
7
7
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::file::get_file_like;
|
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
impl RbLazyFrame {
|
56
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
57
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
58
|
+
// so don't bother with files.
|
59
|
+
let mut json = String::new();
|
60
|
+
let _ = get_file_like(rb_f, false)?
|
61
|
+
.read_to_string(&mut json)
|
62
|
+
.unwrap();
|
63
|
+
|
64
|
+
// Safety
|
65
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
66
|
+
// so we actually don't have a lifetime at all when serializing.
|
67
|
+
|
68
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
69
|
+
// in this scope
|
70
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
71
|
+
|
72
|
+
let lp = serde_json::from_str::<LogicalPlan>(json)
|
73
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
74
|
+
Ok(LazyFrame::from(lp).into())
|
75
|
+
}
|
76
|
+
|
56
77
|
pub fn new_from_ndjson(
|
57
78
|
path: String,
|
58
79
|
infer_schema_length: Option<usize>,
|
@@ -349,6 +370,56 @@ impl RbLazyFrame {
|
|
349
370
|
})
|
350
371
|
}
|
351
372
|
|
373
|
+
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
374
|
+
let contexts = contexts
|
375
|
+
.each()
|
376
|
+
.map(|v| v.unwrap().try_convert())
|
377
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
378
|
+
let contexts = contexts
|
379
|
+
.into_iter()
|
380
|
+
.map(|ldf| ldf.ldf.clone())
|
381
|
+
.collect::<Vec<_>>();
|
382
|
+
Ok(self.ldf.clone().with_context(contexts).into())
|
383
|
+
}
|
384
|
+
|
385
|
+
#[allow(clippy::too_many_arguments)]
|
386
|
+
pub fn join_asof(
|
387
|
+
&self,
|
388
|
+
other: &RbLazyFrame,
|
389
|
+
left_on: &RbExpr,
|
390
|
+
right_on: &RbExpr,
|
391
|
+
left_by: Option<Vec<String>>,
|
392
|
+
right_by: Option<Vec<String>>,
|
393
|
+
allow_parallel: bool,
|
394
|
+
force_parallel: bool,
|
395
|
+
suffix: String,
|
396
|
+
strategy: Wrap<AsofStrategy>,
|
397
|
+
tolerance: Option<Wrap<AnyValue<'_>>>,
|
398
|
+
tolerance_str: Option<String>,
|
399
|
+
) -> RbResult<Self> {
|
400
|
+
let ldf = self.ldf.clone();
|
401
|
+
let other = other.ldf.clone();
|
402
|
+
let left_on = left_on.inner.clone();
|
403
|
+
let right_on = right_on.inner.clone();
|
404
|
+
Ok(ldf
|
405
|
+
.join_builder()
|
406
|
+
.with(other)
|
407
|
+
.left_on([left_on])
|
408
|
+
.right_on([right_on])
|
409
|
+
.allow_parallel(allow_parallel)
|
410
|
+
.force_parallel(force_parallel)
|
411
|
+
.how(JoinType::AsOf(AsOfOptions {
|
412
|
+
strategy: strategy.0,
|
413
|
+
left_by,
|
414
|
+
right_by,
|
415
|
+
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
416
|
+
tolerance_str,
|
417
|
+
}))
|
418
|
+
.suffix(suffix)
|
419
|
+
.finish()
|
420
|
+
.into())
|
421
|
+
}
|
422
|
+
|
352
423
|
#[allow(clippy::too_many_arguments)]
|
353
424
|
pub fn join(
|
354
425
|
&self,
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
use magnus::block::Proc;
|
1
2
|
use magnus::{class, RArray, RString, Value};
|
2
3
|
use polars::chunked_array::ops::SortOptions;
|
3
4
|
use polars::lazy::dsl;
|
@@ -946,6 +947,10 @@ impl RbExpr {
|
|
946
947
|
self.inner.clone().dt().round(&every, &offset).into()
|
947
948
|
}
|
948
949
|
|
950
|
+
pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
|
951
|
+
map_single(self, lambda, output_type, agg_list)
|
952
|
+
}
|
953
|
+
|
949
954
|
pub fn dot(&self, other: &RbExpr) -> Self {
|
950
955
|
self.inner.clone().dot(other.inner.clone()).into()
|
951
956
|
}
|
@@ -979,6 +984,23 @@ impl RbExpr {
|
|
979
984
|
self.inner.clone().suffix(&suffix).into()
|
980
985
|
}
|
981
986
|
|
987
|
+
pub fn map_alias(&self, lambda: Proc) -> Self {
|
988
|
+
self.inner
|
989
|
+
.clone()
|
990
|
+
.map_alias(move |name| {
|
991
|
+
let out = lambda.call::<_, String>((name,));
|
992
|
+
// TODO switch to match
|
993
|
+
out.unwrap()
|
994
|
+
// match out {
|
995
|
+
// Ok(out) => Ok(out.to_string()),
|
996
|
+
// Err(e) => Err(PolarsError::ComputeError(
|
997
|
+
// format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
998
|
+
// )),
|
999
|
+
// }
|
1000
|
+
})
|
1001
|
+
.into()
|
1002
|
+
}
|
1003
|
+
|
982
1004
|
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
983
1005
|
self.inner.clone().exclude(columns).into()
|
984
1006
|
}
|
@@ -1450,6 +1472,10 @@ impl RbExpr {
|
|
1450
1472
|
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1451
1473
|
self.inner.clone().entropy(base, normalize).into()
|
1452
1474
|
}
|
1475
|
+
|
1476
|
+
pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
|
1477
|
+
self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
|
1478
|
+
}
|
1453
1479
|
}
|
1454
1480
|
|
1455
1481
|
pub fn col(name: String) -> RbExpr {
|
@@ -1479,6 +1505,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1479
1505
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
1480
1506
|
}
|
1481
1507
|
|
1508
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
1509
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
1510
|
+
|
1511
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
1512
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
1513
|
+
}
|
1514
|
+
|
1482
1515
|
// TODO improve
|
1483
1516
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1484
1517
|
if value.is_nil() {
|
@@ -1531,6 +1564,11 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
|
1531
1564
|
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1532
1565
|
}
|
1533
1566
|
|
1567
|
+
pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1568
|
+
let by = rb_exprs_to_exprs(by)?;
|
1569
|
+
Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
|
1570
|
+
}
|
1571
|
+
|
1534
1572
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1535
1573
|
#[derive(Clone)]
|
1536
1574
|
pub struct RbWhen {
|