polars-df 0.1.4 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/Cargo.lock +430 -217
- data/Cargo.toml +2 -0
- data/LICENSE.txt +1 -1
- data/README.md +0 -2
- data/ext/polars/Cargo.toml +9 -3
- data/ext/polars/src/apply/dataframe.rs +303 -0
- data/ext/polars/src/apply/mod.rs +253 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +254 -35
- data/ext/polars/src/dataframe.rs +151 -6
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +80 -3
- data/ext/polars/src/lazy/dsl.rs +84 -10
- data/ext/polars/src/lib.rs +180 -8
- data/ext/polars/src/series.rs +328 -10
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1480 -77
- data/lib/polars/data_types.rb +122 -0
- data/lib/polars/date_time_expr.rb +10 -10
- data/lib/polars/date_time_name_space.rb +8 -8
- data/lib/polars/dynamic_group_by.rb +52 -0
- data/lib/polars/expr.rb +262 -12
- data/lib/polars/functions.rb +194 -5
- data/lib/polars/group_by.rb +76 -36
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +798 -25
- data/lib/polars/lazy_functions.rb +569 -30
- data/lib/polars/list_expr.rb +1 -1
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +192 -27
- data/lib/polars/string_expr.rb +6 -5
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +38 -29
- metadata +11 -4
@@ -3,7 +3,7 @@ use polars::io::RowCount;
|
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
5
5
|
use std::cell::RefCell;
|
6
|
-
use std::io::BufWriter;
|
6
|
+
use std::io::{BufWriter, Read};
|
7
7
|
|
8
8
|
use crate::conversion::*;
|
9
9
|
use crate::file::get_file_like;
|
@@ -53,6 +53,27 @@ impl From<LazyFrame> for RbLazyFrame {
|
|
53
53
|
}
|
54
54
|
|
55
55
|
impl RbLazyFrame {
|
56
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
57
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
58
|
+
// so don't bother with files.
|
59
|
+
let mut json = String::new();
|
60
|
+
let _ = get_file_like(rb_f, false)?
|
61
|
+
.read_to_string(&mut json)
|
62
|
+
.unwrap();
|
63
|
+
|
64
|
+
// Safety
|
65
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
66
|
+
// so we actually don't have a lifetime at all when serializing.
|
67
|
+
|
68
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
69
|
+
// in this scope
|
70
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
71
|
+
|
72
|
+
let lp = serde_json::from_str::<LogicalPlan>(json)
|
73
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
74
|
+
Ok(LazyFrame::from(lp).into())
|
75
|
+
}
|
76
|
+
|
56
77
|
pub fn new_from_ndjson(
|
57
78
|
path: String,
|
58
79
|
infer_schema_length: Option<usize>,
|
@@ -327,6 +348,7 @@ impl RbLazyFrame {
|
|
327
348
|
include_boundaries: bool,
|
328
349
|
closed: Wrap<ClosedWindow>,
|
329
350
|
by: RArray,
|
351
|
+
start_by: Wrap<StartBy>,
|
330
352
|
) -> RbResult<RbLazyGroupBy> {
|
331
353
|
let closed_window = closed.0;
|
332
354
|
let by = rb_exprs_to_exprs(by)?;
|
@@ -341,6 +363,7 @@ impl RbLazyFrame {
|
|
341
363
|
truncate,
|
342
364
|
include_boundaries,
|
343
365
|
closed_window,
|
366
|
+
start_by: start_by.0,
|
344
367
|
},
|
345
368
|
);
|
346
369
|
|
@@ -349,6 +372,56 @@ impl RbLazyFrame {
|
|
349
372
|
})
|
350
373
|
}
|
351
374
|
|
375
|
+
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
376
|
+
let contexts = contexts
|
377
|
+
.each()
|
378
|
+
.map(|v| v.unwrap().try_convert())
|
379
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
380
|
+
let contexts = contexts
|
381
|
+
.into_iter()
|
382
|
+
.map(|ldf| ldf.ldf.clone())
|
383
|
+
.collect::<Vec<_>>();
|
384
|
+
Ok(self.ldf.clone().with_context(contexts).into())
|
385
|
+
}
|
386
|
+
|
387
|
+
#[allow(clippy::too_many_arguments)]
|
388
|
+
pub fn join_asof(
|
389
|
+
&self,
|
390
|
+
other: &RbLazyFrame,
|
391
|
+
left_on: &RbExpr,
|
392
|
+
right_on: &RbExpr,
|
393
|
+
left_by: Option<Vec<String>>,
|
394
|
+
right_by: Option<Vec<String>>,
|
395
|
+
allow_parallel: bool,
|
396
|
+
force_parallel: bool,
|
397
|
+
suffix: String,
|
398
|
+
strategy: Wrap<AsofStrategy>,
|
399
|
+
tolerance: Option<Wrap<AnyValue<'_>>>,
|
400
|
+
tolerance_str: Option<String>,
|
401
|
+
) -> RbResult<Self> {
|
402
|
+
let ldf = self.ldf.clone();
|
403
|
+
let other = other.ldf.clone();
|
404
|
+
let left_on = left_on.inner.clone();
|
405
|
+
let right_on = right_on.inner.clone();
|
406
|
+
Ok(ldf
|
407
|
+
.join_builder()
|
408
|
+
.with(other)
|
409
|
+
.left_on([left_on])
|
410
|
+
.right_on([right_on])
|
411
|
+
.allow_parallel(allow_parallel)
|
412
|
+
.force_parallel(force_parallel)
|
413
|
+
.how(JoinType::AsOf(AsOfOptions {
|
414
|
+
strategy: strategy.0,
|
415
|
+
left_by,
|
416
|
+
right_by,
|
417
|
+
tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
|
418
|
+
tolerance_str,
|
419
|
+
}))
|
420
|
+
.suffix(suffix)
|
421
|
+
.finish()
|
422
|
+
.into())
|
423
|
+
}
|
424
|
+
|
352
425
|
#[allow(clippy::too_many_arguments)]
|
353
426
|
pub fn join(
|
354
427
|
&self,
|
@@ -443,9 +516,13 @@ impl RbLazyFrame {
|
|
443
516
|
ldf.median().into()
|
444
517
|
}
|
445
518
|
|
446
|
-
pub fn quantile(
|
519
|
+
pub fn quantile(
|
520
|
+
&self,
|
521
|
+
quantile: &RbExpr,
|
522
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
523
|
+
) -> Self {
|
447
524
|
let ldf = self.ldf.clone();
|
448
|
-
ldf.quantile(quantile, interpolation.0).into()
|
525
|
+
ldf.quantile(quantile.inner.clone(), interpolation.0).into()
|
449
526
|
}
|
450
527
|
|
451
528
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
use magnus::block::Proc;
|
1
2
|
use magnus::{class, RArray, RString, Value};
|
2
3
|
use polars::chunked_array::ops::SortOptions;
|
3
4
|
use polars::lazy::dsl;
|
@@ -163,10 +164,14 @@ impl RbExpr {
|
|
163
164
|
self.clone().inner.list().into()
|
164
165
|
}
|
165
166
|
|
166
|
-
pub fn quantile(
|
167
|
+
pub fn quantile(
|
168
|
+
&self,
|
169
|
+
quantile: &RbExpr,
|
170
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
171
|
+
) -> Self {
|
167
172
|
self.clone()
|
168
173
|
.inner
|
169
|
-
.quantile(quantile, interpolation.0)
|
174
|
+
.quantile(quantile.inner.clone(), interpolation.0)
|
170
175
|
.into()
|
171
176
|
}
|
172
177
|
|
@@ -505,7 +510,13 @@ impl RbExpr {
|
|
505
510
|
self.inner.clone().shrink_dtype().into()
|
506
511
|
}
|
507
512
|
|
508
|
-
pub fn str_parse_date(
|
513
|
+
pub fn str_parse_date(
|
514
|
+
&self,
|
515
|
+
fmt: Option<String>,
|
516
|
+
strict: bool,
|
517
|
+
exact: bool,
|
518
|
+
cache: bool,
|
519
|
+
) -> Self {
|
509
520
|
self.inner
|
510
521
|
.clone()
|
511
522
|
.str()
|
@@ -514,11 +525,20 @@ impl RbExpr {
|
|
514
525
|
fmt,
|
515
526
|
strict,
|
516
527
|
exact,
|
528
|
+
cache,
|
529
|
+
tz_aware: false,
|
517
530
|
})
|
518
531
|
.into()
|
519
532
|
}
|
520
533
|
|
521
|
-
pub fn str_parse_datetime(
|
534
|
+
pub fn str_parse_datetime(
|
535
|
+
&self,
|
536
|
+
fmt: Option<String>,
|
537
|
+
strict: bool,
|
538
|
+
exact: bool,
|
539
|
+
cache: bool,
|
540
|
+
tz_aware: bool,
|
541
|
+
) -> Self {
|
522
542
|
let tu = match fmt {
|
523
543
|
Some(ref fmt) => {
|
524
544
|
if fmt.contains("%.9f")
|
@@ -543,11 +563,19 @@ impl RbExpr {
|
|
543
563
|
fmt,
|
544
564
|
strict,
|
545
565
|
exact,
|
566
|
+
cache,
|
567
|
+
tz_aware,
|
546
568
|
})
|
547
569
|
.into()
|
548
570
|
}
|
549
571
|
|
550
|
-
pub fn str_parse_time(
|
572
|
+
pub fn str_parse_time(
|
573
|
+
&self,
|
574
|
+
fmt: Option<String>,
|
575
|
+
strict: bool,
|
576
|
+
exact: bool,
|
577
|
+
cache: bool,
|
578
|
+
) -> Self {
|
551
579
|
self.inner
|
552
580
|
.clone()
|
553
581
|
.str()
|
@@ -556,6 +584,8 @@ impl RbExpr {
|
|
556
584
|
fmt,
|
557
585
|
strict,
|
558
586
|
exact,
|
587
|
+
cache,
|
588
|
+
tz_aware: false,
|
559
589
|
})
|
560
590
|
.into()
|
561
591
|
}
|
@@ -722,8 +752,12 @@ impl RbExpr {
|
|
722
752
|
self.inner.clone().str().extract(&pat, group_index).into()
|
723
753
|
}
|
724
754
|
|
725
|
-
pub fn str_extract_all(&self, pat:
|
726
|
-
self.inner
|
755
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
756
|
+
self.inner
|
757
|
+
.clone()
|
758
|
+
.str()
|
759
|
+
.extract_all(pat.inner.clone())
|
760
|
+
.into()
|
727
761
|
}
|
728
762
|
|
729
763
|
pub fn count_match(&self, pat: String) -> Self {
|
@@ -946,6 +980,10 @@ impl RbExpr {
|
|
946
980
|
self.inner.clone().dt().round(&every, &offset).into()
|
947
981
|
}
|
948
982
|
|
983
|
+
pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
|
984
|
+
map_single(self, lambda, output_type, agg_list)
|
985
|
+
}
|
986
|
+
|
949
987
|
pub fn dot(&self, other: &RbExpr) -> Self {
|
950
988
|
self.inner.clone().dot(other.inner.clone()).into()
|
951
989
|
}
|
@@ -979,12 +1017,27 @@ impl RbExpr {
|
|
979
1017
|
self.inner.clone().suffix(&suffix).into()
|
980
1018
|
}
|
981
1019
|
|
1020
|
+
pub fn map_alias(&self, lambda: Proc) -> Self {
|
1021
|
+
self.inner
|
1022
|
+
.clone()
|
1023
|
+
.map_alias(move |name| {
|
1024
|
+
let out = lambda.call::<_, String>((name,));
|
1025
|
+
match out {
|
1026
|
+
Ok(out) => Ok(out),
|
1027
|
+
Err(e) => Err(PolarsError::ComputeError(
|
1028
|
+
format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
1029
|
+
)),
|
1030
|
+
}
|
1031
|
+
})
|
1032
|
+
.into()
|
1033
|
+
}
|
1034
|
+
|
982
1035
|
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
983
1036
|
self.inner.clone().exclude(columns).into()
|
984
1037
|
}
|
985
1038
|
|
986
|
-
pub fn interpolate(&self) -> Self {
|
987
|
-
self.inner.clone().interpolate().into()
|
1039
|
+
pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
|
1040
|
+
self.inner.clone().interpolate(method.0).into()
|
988
1041
|
}
|
989
1042
|
|
990
1043
|
pub fn rolling_sum(
|
@@ -1275,6 +1328,7 @@ impl RbExpr {
|
|
1275
1328
|
&self,
|
1276
1329
|
width_strat: Wrap<ListToStructWidthStrategy>,
|
1277
1330
|
_name_gen: Option<Value>,
|
1331
|
+
upper_bound: usize,
|
1278
1332
|
) -> RbResult<Self> {
|
1279
1333
|
// TODO fix
|
1280
1334
|
let name_gen = None;
|
@@ -1289,7 +1343,7 @@ impl RbExpr {
|
|
1289
1343
|
.inner
|
1290
1344
|
.clone()
|
1291
1345
|
.arr()
|
1292
|
-
.to_struct(width_strat.0, name_gen)
|
1346
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
1293
1347
|
.into())
|
1294
1348
|
}
|
1295
1349
|
|
@@ -1450,6 +1504,10 @@ impl RbExpr {
|
|
1450
1504
|
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1451
1505
|
self.inner.clone().entropy(base, normalize).into()
|
1452
1506
|
}
|
1507
|
+
|
1508
|
+
pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
|
1509
|
+
self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
|
1510
|
+
}
|
1453
1511
|
}
|
1454
1512
|
|
1455
1513
|
pub fn col(name: String) -> RbExpr {
|
@@ -1472,6 +1530,10 @@ pub fn cols(names: Vec<String>) -> RbExpr {
|
|
1472
1530
|
dsl::cols(names).into()
|
1473
1531
|
}
|
1474
1532
|
|
1533
|
+
pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
|
1534
|
+
dsl::dtype_cols(dtypes).into()
|
1535
|
+
}
|
1536
|
+
|
1475
1537
|
pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
1476
1538
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
1477
1539
|
|
@@ -1479,6 +1541,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1479
1541
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
1480
1542
|
}
|
1481
1543
|
|
1544
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
1545
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
1546
|
+
|
1547
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
1548
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
1549
|
+
}
|
1550
|
+
|
1482
1551
|
// TODO improve
|
1483
1552
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1484
1553
|
if value.is_nil() {
|
@@ -1531,6 +1600,11 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
|
1531
1600
|
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1532
1601
|
}
|
1533
1602
|
|
1603
|
+
pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1604
|
+
let by = rb_exprs_to_exprs(by)?;
|
1605
|
+
Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
|
1606
|
+
}
|
1607
|
+
|
1534
1608
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1535
1609
|
#[derive(Clone)]
|
1536
1610
|
pub struct RbWhen {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
mod apply;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
@@ -25,7 +26,7 @@ use polars::datatypes::{DataType, TimeUnit};
|
|
25
26
|
use polars::error::PolarsResult;
|
26
27
|
use polars::frame::DataFrame;
|
27
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
28
|
-
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
29
|
+
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
29
30
|
use series::RbSeries;
|
30
31
|
|
31
32
|
#[cfg(target_os = "linux")]
|
@@ -55,13 +56,19 @@ fn series() -> RClass {
|
|
55
56
|
#[magnus::init]
|
56
57
|
fn init() -> RbResult<()> {
|
57
58
|
let module = module();
|
59
|
+
module.define_singleton_method("_dtype_cols", function!(dtype_cols, 1))?;
|
60
|
+
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
58
61
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
62
|
+
module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
|
59
63
|
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
60
64
|
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
61
65
|
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
62
66
|
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
63
67
|
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
68
|
+
module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
|
64
69
|
module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
|
70
|
+
module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
|
71
|
+
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
65
72
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
66
73
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
67
74
|
|
@@ -74,10 +81,13 @@ fn init() -> RbResult<()> {
|
|
74
81
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
75
82
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
|
76
83
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
84
|
+
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
85
|
+
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
77
86
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
78
87
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
79
88
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
80
89
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
90
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
81
91
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
82
92
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
83
93
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
@@ -143,6 +153,7 @@ fn init() -> RbResult<()> {
|
|
143
153
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
144
154
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
145
155
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
156
|
+
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
|
146
157
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
147
158
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
148
159
|
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
@@ -161,7 +172,9 @@ fn init() -> RbResult<()> {
|
|
161
172
|
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
162
173
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
|
163
174
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
175
|
+
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
164
176
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
177
|
+
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
165
178
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
166
179
|
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
167
180
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
@@ -277,9 +290,9 @@ fn init() -> RbResult<()> {
|
|
277
290
|
class.define_method("cumprod", method!(RbExpr::cumprod, 1))?;
|
278
291
|
class.define_method("product", method!(RbExpr::product, 0))?;
|
279
292
|
class.define_method("shrink_dtype", method!(RbExpr::shrink_dtype, 0))?;
|
280
|
-
class.define_method("str_parse_date", method!(RbExpr::str_parse_date,
|
281
|
-
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime,
|
282
|
-
class.define_method("str_parse_time", method!(RbExpr::str_parse_time,
|
293
|
+
class.define_method("str_parse_date", method!(RbExpr::str_parse_date, 4))?;
|
294
|
+
class.define_method("str_parse_datetime", method!(RbExpr::str_parse_datetime, 5))?;
|
295
|
+
class.define_method("str_parse_time", method!(RbExpr::str_parse_time, 4))?;
|
283
296
|
class.define_method("str_strip", method!(RbExpr::str_strip, 1))?;
|
284
297
|
class.define_method("str_rstrip", method!(RbExpr::str_rstrip, 1))?;
|
285
298
|
class.define_method("str_lstrip", method!(RbExpr::str_lstrip, 1))?;
|
@@ -361,14 +374,16 @@ fn init() -> RbResult<()> {
|
|
361
374
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
362
375
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
363
376
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
377
|
+
class.define_method("map", method!(RbExpr::map, 3))?;
|
364
378
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
365
379
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
366
380
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
367
381
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
368
382
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
369
383
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
384
|
+
class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
|
370
385
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
371
|
-
class.define_method("interpolate", method!(RbExpr::interpolate,
|
386
|
+
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
372
387
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
373
388
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 6))?;
|
374
389
|
class.define_method("rolling_max", method!(RbExpr::rolling_max, 6))?;
|
@@ -396,7 +411,7 @@ fn init() -> RbResult<()> {
|
|
396
411
|
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
397
412
|
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
398
413
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
399
|
-
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct,
|
414
|
+
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 3))?;
|
400
415
|
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
401
416
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
402
417
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
@@ -431,6 +446,7 @@ fn init() -> RbResult<()> {
|
|
431
446
|
class.define_method("log", method!(RbExpr::log, 1))?;
|
432
447
|
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
433
448
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
449
|
+
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
434
450
|
|
435
451
|
// meta
|
436
452
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -446,6 +462,7 @@ fn init() -> RbResult<()> {
|
|
446
462
|
class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
|
447
463
|
class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
|
448
464
|
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
465
|
+
class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
|
449
466
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
450
467
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
451
468
|
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
@@ -455,11 +472,13 @@ fn init() -> RbResult<()> {
|
|
455
472
|
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
456
473
|
)?;
|
457
474
|
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
475
|
+
class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
|
458
476
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
459
477
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
460
478
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
461
479
|
|
462
480
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
481
|
+
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
463
482
|
class.define_singleton_method(
|
464
483
|
"new_from_ndjson",
|
465
484
|
function!(RbLazyFrame::new_from_ndjson, 7),
|
@@ -489,7 +508,9 @@ fn init() -> RbResult<()> {
|
|
489
508
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
490
509
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
491
510
|
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
492
|
-
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic,
|
511
|
+
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 9))?;
|
512
|
+
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
513
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
493
514
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
494
515
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
495
516
|
class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
|
@@ -597,6 +618,7 @@ fn init() -> RbResult<()> {
|
|
597
618
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
598
619
|
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
599
620
|
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
621
|
+
class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
|
600
622
|
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
601
623
|
class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
|
602
624
|
class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
|
@@ -611,6 +633,79 @@ fn init() -> RbResult<()> {
|
|
611
633
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
612
634
|
class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
|
613
635
|
|
636
|
+
// set
|
637
|
+
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
638
|
+
class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
|
639
|
+
class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
|
640
|
+
class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
|
641
|
+
class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
|
642
|
+
class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
|
643
|
+
class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
|
644
|
+
class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
|
645
|
+
class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
|
646
|
+
class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
|
647
|
+
class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
|
648
|
+
class.define_method(
|
649
|
+
"set_with_mask_bool",
|
650
|
+
method!(RbSeries::set_with_mask_bool, 2),
|
651
|
+
)?;
|
652
|
+
|
653
|
+
// arithmetic
|
654
|
+
class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
|
655
|
+
class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
|
656
|
+
class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
|
657
|
+
class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
|
658
|
+
class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
|
659
|
+
class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
|
660
|
+
class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
|
661
|
+
class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
|
662
|
+
class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
|
663
|
+
class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
|
664
|
+
class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
|
665
|
+
class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
|
666
|
+
class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
|
667
|
+
class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
|
668
|
+
class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
|
669
|
+
class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
|
670
|
+
class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
|
671
|
+
class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
|
672
|
+
class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
|
673
|
+
class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
|
674
|
+
class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
|
675
|
+
class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
|
676
|
+
class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
|
677
|
+
class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
|
678
|
+
class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
|
679
|
+
class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
|
680
|
+
class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
|
681
|
+
class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
|
682
|
+
class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
|
683
|
+
class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
|
684
|
+
class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
|
685
|
+
class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
|
686
|
+
class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
|
687
|
+
class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
|
688
|
+
class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
|
689
|
+
class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
|
690
|
+
class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
|
691
|
+
class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
|
692
|
+
class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
|
693
|
+
class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
|
694
|
+
class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
|
695
|
+
class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
|
696
|
+
class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
|
697
|
+
class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
|
698
|
+
class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
|
699
|
+
class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
|
700
|
+
class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
|
701
|
+
class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
|
702
|
+
class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
|
703
|
+
class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
|
704
|
+
class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
|
705
|
+
class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
|
706
|
+
class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
|
707
|
+
class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
|
708
|
+
|
614
709
|
// eq
|
615
710
|
class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
|
616
711
|
class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
|
@@ -698,6 +793,40 @@ fn init() -> RbResult<()> {
|
|
698
793
|
Ok(())
|
699
794
|
}
|
700
795
|
|
796
|
+
fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
797
|
+
let dtypes = dtypes
|
798
|
+
.each()
|
799
|
+
.map(|v| v?.try_convert::<Wrap<DataType>>())
|
800
|
+
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
801
|
+
let dtypes = vec_extract_wrapped(dtypes);
|
802
|
+
Ok(crate::lazy::dsl::dtype_cols(dtypes))
|
803
|
+
}
|
804
|
+
|
805
|
+
#[allow(clippy::too_many_arguments)]
|
806
|
+
fn rb_duration(
|
807
|
+
days: Option<&RbExpr>,
|
808
|
+
seconds: Option<&RbExpr>,
|
809
|
+
nanoseconds: Option<&RbExpr>,
|
810
|
+
microseconds: Option<&RbExpr>,
|
811
|
+
milliseconds: Option<&RbExpr>,
|
812
|
+
minutes: Option<&RbExpr>,
|
813
|
+
hours: Option<&RbExpr>,
|
814
|
+
weeks: Option<&RbExpr>,
|
815
|
+
) -> RbExpr {
|
816
|
+
let args = DurationArgs {
|
817
|
+
days: days.map(|e| e.inner.clone()),
|
818
|
+
seconds: seconds.map(|e| e.inner.clone()),
|
819
|
+
nanoseconds: nanoseconds.map(|e| e.inner.clone()),
|
820
|
+
microseconds: microseconds.map(|e| e.inner.clone()),
|
821
|
+
milliseconds: milliseconds.map(|e| e.inner.clone()),
|
822
|
+
minutes: minutes.map(|e| e.inner.clone()),
|
823
|
+
hours: hours.map(|e| e.inner.clone()),
|
824
|
+
weeks: weeks.map(|e| e.inner.clone()),
|
825
|
+
};
|
826
|
+
|
827
|
+
polars::lazy::dsl::duration(args).into()
|
828
|
+
}
|
829
|
+
|
701
830
|
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
702
831
|
let mut iter = seq.each();
|
703
832
|
let first = iter.next().unwrap()?;
|
@@ -726,6 +855,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
726
855
|
Ok(df.into())
|
727
856
|
}
|
728
857
|
|
858
|
+
fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
859
|
+
let (seq, len) = get_rbseq(lfs)?;
|
860
|
+
let mut lfs = Vec::with_capacity(len);
|
861
|
+
|
862
|
+
for res in seq.each() {
|
863
|
+
let item = res?;
|
864
|
+
let lf = get_lf(item)?;
|
865
|
+
lfs.push(lf);
|
866
|
+
}
|
867
|
+
|
868
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
869
|
+
Ok(lf.into())
|
870
|
+
}
|
871
|
+
|
729
872
|
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
730
873
|
let mut dfs = Vec::new();
|
731
874
|
for item in seq.each() {
|
@@ -786,6 +929,25 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
|
786
929
|
Ok(dict.into())
|
787
930
|
}
|
788
931
|
|
932
|
+
fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
933
|
+
use polars_core::utils::rayon::prelude::*;
|
934
|
+
|
935
|
+
let lfs = lfs
|
936
|
+
.each()
|
937
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
938
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
939
|
+
|
940
|
+
polars_core::POOL.install(|| {
|
941
|
+
lfs.par_iter()
|
942
|
+
.map(|lf| {
|
943
|
+
let df = lf.ldf.clone().collect()?;
|
944
|
+
Ok(RbDataFrame::new(df))
|
945
|
+
})
|
946
|
+
.collect::<polars_core::error::PolarsResult<Vec<_>>>()
|
947
|
+
.map_err(RbPolarsErr::from)
|
948
|
+
})
|
949
|
+
}
|
950
|
+
|
789
951
|
fn rb_date_range(
|
790
952
|
start: i64,
|
791
953
|
stop: i64,
|
@@ -802,12 +964,22 @@ fn rb_date_range(
|
|
802
964
|
Duration::parse(&every),
|
803
965
|
closed.0,
|
804
966
|
tu.0,
|
805
|
-
tz,
|
967
|
+
tz.as_ref(),
|
806
968
|
)
|
807
969
|
.into_series()
|
808
970
|
.into()
|
809
971
|
}
|
810
972
|
|
973
|
+
fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
974
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
975
|
+
Ok(polars::lazy::dsl::coalesce(&exprs).into())
|
976
|
+
}
|
977
|
+
|
978
|
+
fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
979
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
980
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
981
|
+
}
|
982
|
+
|
811
983
|
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
812
984
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
813
985
|
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|