polars-df 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -3,7 +3,7 @@ use polars::io::RowCount;
|
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::io::BufWriter;
|
6
|
+
use std::io::{BufWriter, Read};
|
7
7
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::file::get_file_like;
|
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
impl RbLazyFrame {
|
56
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
57
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
58
|
+
// so don't bother with files.
|
59
|
+
let mut json = String::new();
|
60
|
+
let _ = get_file_like(rb_f, false)?
|
61
|
+
.read_to_string(&mut json)
|
62
|
+
.unwrap();
|
63
|
+
|
64
|
+
// Safety
|
65
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
66
|
+
// so we actually don't have a lifetime at all when serializing.
|
67
|
+
|
68
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
69
|
+
// in this scope
|
70
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
71
|
+
|
72
|
+
let lp = serde_json::from_str::<LogicalPlan>(json)
|
73
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
74
|
+
Ok(LazyFrame::from(lp).into())
|
75
|
+
}
|
76
|
+
|
56
77
|
pub fn new_from_ndjson(
|
57
78
|
path: String,
|
58
79
|
infer_schema_length: Option<usize>,
|
@@ -327,6 +348,7 @@ impl RbLazyFrame {
|
|
327
348
|
include_boundaries: bool,
|
328
349
|
closed: Wrap<ClosedWindow>,
|
329
350
|
by: RArray,
|
351
|
+
start_by: Wrap<StartBy>,
|
330
352
|
) -> RbResult<RbLazyGroupBy> {
|
331
353
|
let closed_window = closed.0;
|
332
354
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -341,6 +363,7 @@ impl RbLazyFrame {
|
|
341
363
|
truncate,
|
342
364
|
include_boundaries,
|
343
365
|
closed_window,
|
366
|
+
start_by: start_by.0,
|
344
367
|
},
|
345
368
|
);
|
346
369
|
|
@@ -349,6 +372,56 @@ impl RbLazyFrame {
|
|
349
372
|
})
|
350
373
|
}
|
351
374
|
|
375
|
+
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
376
|
+
let contexts = contexts
|
377
|
+
.each()
|
378
|
+
.map(|v| v.unwrap().try_convert())
|
379
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
380
|
+
let contexts = contexts
|
381
|
+
.into_iter()
|
382
|
+
.map(|ldf| ldf.ldf.clone())
|
383
|
+
.collect::<Vec<_>>();
|
384
|
+
Ok(self.ldf.clone().with_context(contexts).into())
|
385
|
+
}
|
386
|
+
|
387
|
+
#[allow(clippy::too_many_arguments)]
|
388
|
+
pub fn join_asof(
|
389
|
+
&self,
|
390
|
+
other: &RbLazyFrame,
|
391
|
+
left_on: &RbExpr,
|
392
|
+
right_on: &RbExpr,
|
393
|
+
left_by: Option<Vec<String>>,
|
394
|
+
right_by: Option<Vec<String>>,
|
395
|
+
allow_parallel: bool,
|
396
|
+
force_parallel: bool,
|
397
|
+
suffix: String,
|
398
|
+
strategy: Wrap<AsofStrategy>,
|
399
|
+
tolerance: Option<Wrap<AnyValue<'_>>>,
|
400
|
+
tolerance_str: Option<String>,
|
401
|
+
) -> RbResult<Self> {
|
402
|
+
let ldf = self.ldf.clone();
|
403
|
+
let other = other.ldf.clone();
|
404
|
+
let left_on = left_on.inner.clone();
|
405
|
+
let right_on = right_on.inner.clone();
|
406
|
+
Ok(ldf
|
407
|
+
.join_builder()
|
408
|
+
.with(other)
|
409
|
+
.left_on([left_on])
|
410
|
+
.right_on([right_on])
|
411
|
+
.allow_parallel(allow_parallel)
|
412
|
+
.force_parallel(force_parallel)
|
413
|
+
.how(JoinType::AsOf(AsOfOptions {
|
414
|
+
strategy: strategy.0,
|
415
|
+
left_by,
|
416
|
+
right_by,
|
417
|
+
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
418
|
+
tolerance_str,
|
419
|
+
}))
|
420
|
+
.suffix(suffix)
|
421
|
+
.finish()
|
422
|
+
.into())
|
423
|
+
}
|
424
|
+
|
352
425
|
#[allow(clippy::too_many_arguments)]
|
353
426
|
pub fn join(
|
354
427
|
&self,
|
@@ -443,9 +516,13 @@ impl RbLazyFrame {
|
|
443
516
|
ldf.median().into()
|
444
517
|
}
|
445
518
|
|
446
|
-
pub fn quantile(
|
519
|
+
pub fn quantile(
|
520
|
+
&self,
|
521
|
+
quantile: &RbExpr,
|
522
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
523
|
+
) -> Self {
|
447
524
|
let ldf = self.ldf.clone();
|
448
|
-
ldf.quantile(quantile, interpolation.0).into()
|
525
|
+
ldf.quantile(quantile.inner.clone(), interpolation.0).into()
|
449
526
|
}
|
450
527
|
|
451
528
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
use magnus::block::Proc;
|
1
2
|
use magnus::{class, RArray, RString, Value};
|
2
3
|
use polars::chunked_array::ops::SortOptions;
|
3
4
|
use polars::lazy::dsl;
|
@@ -163,10 +164,14 @@ impl RbExpr {
|
|
163
164
|
self.clone().inner.list().into()
|
164
165
|
}
|
165
166
|
|
166
|
-
pub fn quantile(
|
167
|
+
pub fn quantile(
|
168
|
+
&self,
|
169
|
+
quantile: &RbExpr,
|
170
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
171
|
+
) -> Self {
|
167
172
|
self.clone()
|
168
173
|
.inner
|
169
|
-
.quantile(quantile, interpolation.0)
|
174
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
170
175
|
.into()
|
171
176
|
}
|
172
177
|
|
@@ -505,7 +510,13 @@ impl RbExpr {
|
|
505
510
|
self.inner.clone().shrink_dtype().into()
|
506
511
|
}
|
507
512
|
|
508
|
-
pub fn str_parse_date(
|
513
|
+
pub fn str_parse_date(
|
514
|
+
&self,
|
515
|
+
fmt: Option<String>,
|
516
|
+
strict: bool,
|
517
|
+
exact: bool,
|
518
|
+
cache: bool,
|
519
|
+
) -> Self {
|
509
520
|
self.inner
|
510
521
|
.clone()
|
511
522
|
.str()
|
@@ -514,11 +525,20 @@ impl RbExpr {
|
|
514
525
|
fmt,
|
515
526
|
strict,
|
516
527
|
exact,
|
528
|
+
cache,
|
529
|
+
tz_aware: false,
|
517
530
|
})
|
518
531
|
.into()
|
519
532
|
}
|
520
533
|
|
521
|
-
pub fn str_parse_datetime(
|
534
|
+
pub fn str_parse_datetime(
|
535
|
+
&self,
|
536
|
+
fmt: Option<String>,
|
537
|
+
strict: bool,
|
538
|
+
exact: bool,
|
539
|
+
cache: bool,
|
540
|
+
tz_aware: bool,
|
541
|
+
) -> Self {
|
522
542
|
let tu = match fmt {
|
523
543
|
Some(ref fmt) => {
|
524
544
|
if fmt.contains("%.9f")
|
@@ -543,11 +563,19 @@ impl RbExpr {
|
|
543
563
|
fmt,
|
544
564
|
strict,
|
545
565
|
exact,
|
566
|
+
cache,
|
567
|
+
tz_aware,
|
546
568
|
})
|
547
569
|
.into()
|
548
570
|
}
|
549
571
|
|
550
|
-
pub fn str_parse_time(
|
572
|
+
pub fn str_parse_time(
|
573
|
+
&self,
|
574
|
+
fmt: Option<String>,
|
575
|
+
strict: bool,
|
576
|
+
exact: bool,
|
577
|
+
cache: bool,
|
578
|
+
) -> Self {
|
551
579
|
self.inner
|
552
580
|
.clone()
|
553
581
|
.str()
|
@@ -556,6 +584,8 @@ impl RbExpr {
|
|
556
584
|
fmt,
|
557
585
|
strict,
|
558
586
|
exact,
|
587
|
+
cache,
|
588
|
+
tz_aware: false,
|
559
589
|
})
|
560
590
|
.into()
|
561
591
|
}
|
@@ -722,8 +752,12 @@ impl RbExpr {
|
|
722
752
|
self.inner.clone().str().extract(&pat, group_index).into()
|
723
753
|
}
|
724
754
|
|
725
|
-
pub fn str_extract_all(&self, pat:
|
726
|
-
self.inner
|
755
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
756
|
+
self.inner
|
757
|
+
.clone()
|
758
|
+
.str()
|
759
|
+
.extract_all(pat.inner.clone())
|
760
|
+
.into()
|
727
761
|
}
|
728
762
|
|
729
763
|
pub fn count_match(&self, pat: String) -> Self {
|
@@ -946,6 +980,10 @@ impl RbExpr {
|
|
946
980
|
self.inner.clone().dt().round(&every, &offset).into()
|
947
981
|
}
|
948
982
|
|
983
|
+
pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
|
984
|
+
map_single(self, lambda, output_type, agg_list)
|
985
|
+
}
|
986
|
+
|
949
987
|
pub fn dot(&self, other: &RbExpr) -> Self {
|
950
988
|
self.inner.clone().dot(other.inner.clone()).into()
|
951
989
|
}
|
@@ -979,12 +1017,27 @@ impl RbExpr {
|
|
979
1017
|
self.inner.clone().suffix(&suffix).into()
|
980
1018
|
}
|
981
1019
|
|
1020
|
+
pub fn map_alias(&self, lambda: Proc) -> Self {
|
1021
|
+
self.inner
|
1022
|
+
.clone()
|
1023
|
+
.map_alias(move |name| {
|
1024
|
+
let out = lambda.call::<_, String>((name,));
|
1025
|
+
match out {
|
1026
|
+
Ok(out) => Ok(out),
|
1027
|
+
Err(e) => Err(PolarsError::ComputeError(
|
1028
|
+
format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
1029
|
+
)),
|
1030
|
+
}
|
1031
|
+
})
|
1032
|
+
.into()
|
1033
|
+
}
|
1034
|
+
|
982
1035
|
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
983
1036
|
self.inner.clone().exclude(columns).into()
|
984
1037
|
}
|
985
1038
|
|
986
|
-
pub fn interpolate(&self) -> Self {
|
987
|
-
self.inner.clone().interpolate().into()
|
1039
|
+
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
1040
|
+
self.inner.clone().interpolate(method.0).into()
|
988
1041
|
}
|
989
1042
|
|
990
1043
|
pub fn rolling_sum(
|
@@ -1275,6 +1328,7 @@ impl RbExpr {
|
|
1275
1328
|
&self,
|
1276
1329
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
1277
1330
|
_name_gen: Option<Value>,
|
1331
|
+
upper_bound: usize,
|
1278
1332
|
) -> RbResult<Self> {
|
1279
1333
|
// TODO fix
|
1280
1334
|
let name_gen = None;
|
@@ -1289,7 +1343,7 @@ impl RbExpr {
|
|
1289
1343
|
.inner
|
1290
1344
|
.clone()
|
1291
1345
|
.arr()
|
1292
|
-
.to_struct(width_strat.0, name_gen)
|
1346
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
1293
1347
|
.into())
|
1294
1348
|
}
|
1295
1349
|
|
@@ -1450,6 +1504,10 @@ impl RbExpr {
|
|
1450
1504
|
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1451
1505
|
self.inner.clone().entropy(base, normalize).into()
|
1452
1506
|
}
|
1507
|
+
|
1508
|
+
pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
|
1509
|
+
self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
|
1510
|
+
}
|
1453
1511
|
}
|
1454
1512
|
|
1455
1513
|
pub fn col(name: String) -> RbExpr {
|
@@ -1472,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
1472
1530
|
dsl::cols(names).into()
|
1473
1531
|
}
|
1474
1532
|
|
1533
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
1534
|
+
dsl::dtype_cols(dtypes).into()
|
1535
|
+
}
|
1536
|
+
|
1475
1537
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
1476
1538
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
1477
1539
|
|
@@ -1479,6 +1541,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1479
1541
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
1480
1542
|
}
|
1481
1543
|
|
1544
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
1545
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
1546
|
+
|
1547
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
1548
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
1549
|
+
}
|
1550
|
+
|
1482
1551
|
// TODO improve
|
1483
1552
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1484
1553
|
if value.is_nil() {
|
@@ -1531,6 +1600,11 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
|
1531
1600
|
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1532
1601
|
}
|
1533
1602
|
|
1603
|
+
pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1604
|
+
let by = rb_exprs_to_exprs(by)?;
|
1605
|
+
Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
|
1606
|
+
}
|
1607
|
+
|
1534
1608
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1535
1609
|
#[derive(Clone)]
|
1536
1610
|
pub struct RbWhen {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod apply;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
@@ -25,7 +26,7 @@ use polars::datatypes::{DataType, TimeUnit};
|
|
25
26
|
use polars::error::PolarsResult;
|
26
27
|
use polars::frame::DataFrame;
|
27
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
28
|
-
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
29
|
+
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
29
30
|
use series::RbSeries;
|
30
31
|
|
31
32
|
#[cfg(target_os = "linux")]
|
@@ -55,13 +56,19 @@ fn series() -> RClass {
|
|
55
56
|
#[magnus::init]
|
56
57
|
fn init() -> RbResult<()> {
|
57
58
|
let module = module();
|
59
|
+
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
60
|
+
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
58
61
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
62
|
+
module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
|
59
63
|
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
60
64
|
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
61
65
|
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
62
66
|
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
63
67
|
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
68
|
+
module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
|
64
69
|
module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
|
70
|
+
module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
|
71
|
+
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
65
72
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
66
73
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
67
74
|
|
@@ -74,10 +81,13 @@ fn init() -> RbResult<()> {
|
|
74
81
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
75
82
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
|
76
83
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
84
|
+
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
85
|
+
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
77
86
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
78
87
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
79
88
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
80
89
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
90
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
81
91
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
82
92
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
83
93
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
@@ -143,6 +153,7 @@ fn init() -> RbResult<()> {
|
|
143
153
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
144
154
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
145
155
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
156
|
+
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
|
146
157
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
147
158
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
148
159
|
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
@@ -161,7 +172,9 @@ fn init() -> RbResult<()> {
|
|
161
172
|
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
162
173
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
|
163
174
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
175
|
+
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
164
176
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
177
|
+
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
165
178
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
166
179
|
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
167
180
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
@@ -277,9 +290,9 @@ fn init() -> RbResult<()> {
|
|
277
290
|
class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
|
278
291
|
class.define_method("product", method!(RbExpr::product, 0))?;
|
279
292
|
class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
|
280
|
-
class.define_method("str_parse_date", method!(RbExpr::str_parse_date,
|
281
|
-
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime,
|
282
|
-
class.define_method("str_parse_time", method!(RbExpr::str_parse_time,
|
293
|
+
class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
|
294
|
+
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 5))?;
|
295
|
+
class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
|
283
296
|
class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
|
284
297
|
class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
|
285
298
|
class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
|
@@ -361,14 +374,16 @@ fn init() -> RbResult<()> {
|
|
361
374
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
362
375
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
363
376
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
377
|
+
class.define_method("map", method!(RbExpr::map, 3))?;
|
364
378
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
365
379
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
366
380
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
367
381
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
368
382
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
369
383
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
384
|
+
class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
|
370
385
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
371
|
-
class.define_method("interpolate", method!(RbExpr::interpolate,
|
386
|
+
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
372
387
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
373
388
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
374
389
|
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
@@ -396,7 +411,7 @@ fn init() -> RbResult<()> {
|
|
396
411
|
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
397
412
|
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
398
413
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
399
|
-
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct,
|
414
|
+
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
|
400
415
|
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
401
416
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
402
417
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
@@ -431,6 +446,7 @@ fn init() -> RbResult<()> {
|
|
431
446
|
class.define_method("log", method!(RbExpr::log, 1))?;
|
432
447
|
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
433
448
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
449
|
+
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
434
450
|
|
435
451
|
// meta
|
436
452
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -446,6 +462,7 @@ fn init() -> RbResult<()> {
|
|
446
462
|
class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
|
447
463
|
class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
|
448
464
|
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
465
|
+
class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
|
449
466
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
450
467
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
451
468
|
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
@@ -455,11 +472,13 @@ fn init() -> RbResult<()> {
|
|
455
472
|
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
456
473
|
)?;
|
457
474
|
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
475
|
+
class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
|
458
476
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
459
477
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
460
478
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
461
479
|
|
462
480
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
481
|
+
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
463
482
|
class.define_singleton_method(
|
464
483
|
"new_from_ndjson",
|
465
484
|
function!(RbLazyFrame::new_from_ndjson, 7),
|
@@ -489,7 +508,9 @@ fn init() -> RbResult<()> {
|
|
489
508
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
490
509
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
491
510
|
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
492
|
-
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic,
|
511
|
+
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 9))?;
|
512
|
+
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
513
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
493
514
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
494
515
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
495
516
|
class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
|
@@ -597,6 +618,7 @@ fn init() -> RbResult<()> {
|
|
597
618
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
598
619
|
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
599
620
|
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
621
|
+
class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
|
600
622
|
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
601
623
|
class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
|
602
624
|
class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
|
@@ -611,6 +633,79 @@ fn init() -> RbResult<()> {
|
|
611
633
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
612
634
|
class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
|
613
635
|
|
636
|
+
// set
|
637
|
+
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
638
|
+
class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
|
639
|
+
class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
|
640
|
+
class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
|
641
|
+
class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
|
642
|
+
class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
|
643
|
+
class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
|
644
|
+
class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
|
645
|
+
class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
|
646
|
+
class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
|
647
|
+
class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
|
648
|
+
class.define_method(
|
649
|
+
"set_with_mask_bool",
|
650
|
+
method!(RbSeries::set_with_mask_bool, 2),
|
651
|
+
)?;
|
652
|
+
|
653
|
+
// arithmetic
|
654
|
+
class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
|
655
|
+
class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
|
656
|
+
class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
|
657
|
+
class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
|
658
|
+
class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
|
659
|
+
class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
|
660
|
+
class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
|
661
|
+
class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
|
662
|
+
class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
|
663
|
+
class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
|
664
|
+
class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
|
665
|
+
class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
|
666
|
+
class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
|
667
|
+
class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
|
668
|
+
class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
|
669
|
+
class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
|
670
|
+
class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
|
671
|
+
class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
|
672
|
+
class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
|
673
|
+
class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
|
674
|
+
class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
|
675
|
+
class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
|
676
|
+
class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
|
677
|
+
class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
|
678
|
+
class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
|
679
|
+
class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
|
680
|
+
class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
|
681
|
+
class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
|
682
|
+
class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
|
683
|
+
class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
|
684
|
+
class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
|
685
|
+
class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
|
686
|
+
class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
|
687
|
+
class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
|
688
|
+
class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
|
689
|
+
class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
|
690
|
+
class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
|
691
|
+
class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
|
692
|
+
class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
|
693
|
+
class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
|
694
|
+
class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
|
695
|
+
class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
|
696
|
+
class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
|
697
|
+
class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
|
698
|
+
class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
|
699
|
+
class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
|
700
|
+
class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
|
701
|
+
class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
|
702
|
+
class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
|
703
|
+
class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
|
704
|
+
class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
|
705
|
+
class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
|
706
|
+
class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
|
707
|
+
class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
|
708
|
+
|
614
709
|
// eq
|
615
710
|
class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
|
616
711
|
class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
|
@@ -698,6 +793,40 @@ fn init() -> RbResult<()> {
|
|
698
793
|
Ok(())
|
699
794
|
}
|
700
795
|
|
796
|
+
fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
797
|
+
let dtypes = dtypes
|
798
|
+
.each()
|
799
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
800
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
801
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
802
|
+
Ok(crate::lazy::dsl::dtype_cols(dtypes))
|
803
|
+
}
|
804
|
+
|
805
|
+
#[allow(clippy::too_many_arguments)]
|
806
|
+
fn rb_duration(
|
807
|
+
days: Option<&RbExpr>,
|
808
|
+
seconds: Option<&RbExpr>,
|
809
|
+
nanoseconds: Option<&RbExpr>,
|
810
|
+
microseconds: Option<&RbExpr>,
|
811
|
+
milliseconds: Option<&RbExpr>,
|
812
|
+
minutes: Option<&RbExpr>,
|
813
|
+
hours: Option<&RbExpr>,
|
814
|
+
weeks: Option<&RbExpr>,
|
815
|
+
) -> RbExpr {
|
816
|
+
let args = DurationArgs {
|
817
|
+
days: days.map(|e| e.inner.clone()),
|
818
|
+
seconds: seconds.map(|e| e.inner.clone()),
|
819
|
+
nanoseconds: nanoseconds.map(|e| e.inner.clone()),
|
820
|
+
microseconds: microseconds.map(|e| e.inner.clone()),
|
821
|
+
milliseconds: milliseconds.map(|e| e.inner.clone()),
|
822
|
+
minutes: minutes.map(|e| e.inner.clone()),
|
823
|
+
hours: hours.map(|e| e.inner.clone()),
|
824
|
+
weeks: weeks.map(|e| e.inner.clone()),
|
825
|
+
};
|
826
|
+
|
827
|
+
polars::lazy::dsl::duration(args).into()
|
828
|
+
}
|
829
|
+
|
701
830
|
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
702
831
|
let mut iter = seq.each();
|
703
832
|
let first = iter.next().unwrap()?;
|
@@ -726,6 +855,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
726
855
|
Ok(df.into())
|
727
856
|
}
|
728
857
|
|
858
|
+
fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
859
|
+
let (seq, len) = get_rbseq(lfs)?;
|
860
|
+
let mut lfs = Vec::with_capacity(len);
|
861
|
+
|
862
|
+
for res in seq.each() {
|
863
|
+
let item = res?;
|
864
|
+
let lf = get_lf(item)?;
|
865
|
+
lfs.push(lf);
|
866
|
+
}
|
867
|
+
|
868
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
869
|
+
Ok(lf.into())
|
870
|
+
}
|
871
|
+
|
729
872
|
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
730
873
|
let mut dfs = Vec::new();
|
731
874
|
for item in seq.each() {
|
@@ -786,6 +929,25 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
|
786
929
|
Ok(dict.into())
|
787
930
|
}
|
788
931
|
|
932
|
+
fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
933
|
+
use polars_core::utils::rayon::prelude::*;
|
934
|
+
|
935
|
+
let lfs = lfs
|
936
|
+
.each()
|
937
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
938
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
939
|
+
|
940
|
+
polars_core::POOL.install(|| {
|
941
|
+
lfs.par_iter()
|
942
|
+
.map(|lf| {
|
943
|
+
let df = lf.ldf.clone().collect()?;
|
944
|
+
Ok(RbDataFrame::new(df))
|
945
|
+
})
|
946
|
+
.collect::<polars_core::error::PolarsResult<Vec<_>>>()
|
947
|
+
.map_err(RbPolarsErr::from)
|
948
|
+
})
|
949
|
+
}
|
950
|
+
|
789
951
|
fn rb_date_range(
|
790
952
|
start: i64,
|
791
953
|
stop: i64,
|
@@ -802,12 +964,22 @@ fn rb_date_range(
|
|
802
964
|
Duration::parse(&every),
|
803
965
|
closed.0,
|
804
966
|
tu.0,
|
805
|
-
tz,
|
967
|
+
tz.as_ref(),
|
806
968
|
)
|
807
969
|
.into_series()
|
808
970
|
.into()
|
809
971
|
}
|
810
972
|
|
973
|
+
fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
974
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
975
|
+
Ok(polars::lazy::dsl::coalesce(&exprs).into())
|
976
|
+
}
|
977
|
+
|
978
|
+
fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
979
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
980
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
981
|
+
}
|
982
|
+
|
811
983
|
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
812
984
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
813
985
|
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|