polars-df 0.1.3 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +142 -11
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +17 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +180 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +12 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +74 -3
- data/ext/polars/src/lazy/dsl.rs +136 -0
- data/ext/polars/src/lib.rs +199 -1
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +331 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1558 -60
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +4072 -107
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +44 -3
- data/lib/polars/io.rb +20 -4
- data/lib/polars/lazy_frame.rb +800 -26
- data/lib/polars/lazy_functions.rb +687 -43
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +934 -62
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +44 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +14 -1
- metadata +15 -3
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
use magnus::block::Proc;
|
1
2
|
use magnus::{class, RArray, RString, Value};
|
2
3
|
use polars::chunked_array::ops::SortOptions;
|
3
4
|
use polars::lazy::dsl;
|
@@ -659,6 +660,65 @@ impl RbExpr {
|
|
659
660
|
self.inner.clone().str().starts_with(sub).into()
|
660
661
|
}
|
661
662
|
|
663
|
+
pub fn str_hex_encode(&self) -> Self {
|
664
|
+
self.clone()
|
665
|
+
.inner
|
666
|
+
.map(
|
667
|
+
move |s| s.utf8().map(|s| s.hex_encode().into_series()),
|
668
|
+
GetOutput::same_type(),
|
669
|
+
)
|
670
|
+
.with_fmt("str.hex_encode")
|
671
|
+
.into()
|
672
|
+
}
|
673
|
+
|
674
|
+
pub fn str_hex_decode(&self, strict: Option<bool>) -> Self {
|
675
|
+
self.clone()
|
676
|
+
.inner
|
677
|
+
.map(
|
678
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| s.into_series()),
|
679
|
+
GetOutput::same_type(),
|
680
|
+
)
|
681
|
+
.with_fmt("str.hex_decode")
|
682
|
+
.into()
|
683
|
+
}
|
684
|
+
|
685
|
+
pub fn str_base64_encode(&self) -> Self {
|
686
|
+
self.clone()
|
687
|
+
.inner
|
688
|
+
.map(
|
689
|
+
move |s| s.utf8().map(|s| s.base64_encode().into_series()),
|
690
|
+
GetOutput::same_type(),
|
691
|
+
)
|
692
|
+
.with_fmt("str.base64_encode")
|
693
|
+
.into()
|
694
|
+
}
|
695
|
+
|
696
|
+
pub fn str_base64_decode(&self, strict: Option<bool>) -> Self {
|
697
|
+
self.clone()
|
698
|
+
.inner
|
699
|
+
.map(
|
700
|
+
move |s| s.utf8()?.base64_decode(strict).map(|s| s.into_series()),
|
701
|
+
GetOutput::same_type(),
|
702
|
+
)
|
703
|
+
.with_fmt("str.base64_decode")
|
704
|
+
.into()
|
705
|
+
}
|
706
|
+
|
707
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
708
|
+
let function = move |s: Series| {
|
709
|
+
let ca = s.utf8()?;
|
710
|
+
match ca.json_path_match(&pat) {
|
711
|
+
Ok(ca) => Ok(ca.into_series()),
|
712
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
713
|
+
}
|
714
|
+
};
|
715
|
+
self.clone()
|
716
|
+
.inner
|
717
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
718
|
+
.with_fmt("str.json_path_match")
|
719
|
+
.into()
|
720
|
+
}
|
721
|
+
|
662
722
|
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
663
723
|
self.inner.clone().str().extract(&pat, group_index).into()
|
664
724
|
}
|
@@ -887,6 +947,14 @@ impl RbExpr {
|
|
887
947
|
self.inner.clone().dt().round(&every, &offset).into()
|
888
948
|
}
|
889
949
|
|
950
|
+
pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
|
951
|
+
map_single(self, lambda, output_type, agg_list)
|
952
|
+
}
|
953
|
+
|
954
|
+
pub fn dot(&self, other: &RbExpr) -> Self {
|
955
|
+
self.inner.clone().dot(other.inner.clone()).into()
|
956
|
+
}
|
957
|
+
|
890
958
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
891
959
|
let function = move |s: Series| reinterpret(&s, signed);
|
892
960
|
let dt = if signed {
|
@@ -916,6 +984,23 @@ impl RbExpr {
|
|
916
984
|
self.inner.clone().suffix(&suffix).into()
|
917
985
|
}
|
918
986
|
|
987
|
+
pub fn map_alias(&self, lambda: Proc) -> Self {
|
988
|
+
self.inner
|
989
|
+
.clone()
|
990
|
+
.map_alias(move |name| {
|
991
|
+
let out = lambda.call::<_, String>((name,));
|
992
|
+
// TODO switch to match
|
993
|
+
out.unwrap()
|
994
|
+
// match out {
|
995
|
+
// Ok(out) => Ok(out.to_string()),
|
996
|
+
// Err(e) => Err(PolarsError::ComputeError(
|
997
|
+
// format!("Ruby function in 'map_alias' produced an error: {}.", e).into(),
|
998
|
+
// )),
|
999
|
+
// }
|
1000
|
+
})
|
1001
|
+
.into()
|
1002
|
+
}
|
1003
|
+
|
919
1004
|
pub fn exclude(&self, columns: Vec<String>) -> Self {
|
920
1005
|
self.inner.clone().exclude(columns).into()
|
921
1006
|
}
|
@@ -1208,6 +1293,28 @@ impl RbExpr {
|
|
1208
1293
|
.into()
|
1209
1294
|
}
|
1210
1295
|
|
1296
|
+
pub fn lst_to_struct(
|
1297
|
+
&self,
|
1298
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
1299
|
+
_name_gen: Option<Value>,
|
1300
|
+
) -> RbResult<Self> {
|
1301
|
+
// TODO fix
|
1302
|
+
let name_gen = None;
|
1303
|
+
// let name_gen = name_gen.map(|lambda| {
|
1304
|
+
// Arc::new(move |idx: usize| {
|
1305
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
1306
|
+
// out.try_convert::<String>().unwrap()
|
1307
|
+
// }) as NameGenerator
|
1308
|
+
// });
|
1309
|
+
|
1310
|
+
Ok(self
|
1311
|
+
.inner
|
1312
|
+
.clone()
|
1313
|
+
.arr()
|
1314
|
+
.to_struct(width_strat.0, name_gen)
|
1315
|
+
.into())
|
1316
|
+
}
|
1317
|
+
|
1211
1318
|
pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
|
1212
1319
|
let options = RankOptions {
|
1213
1320
|
method: method.0,
|
@@ -1365,6 +1472,10 @@ impl RbExpr {
|
|
1365
1472
|
pub fn entropy(&self, base: f64, normalize: bool) -> Self {
|
1366
1473
|
self.inner.clone().entropy(base, normalize).into()
|
1367
1474
|
}
|
1475
|
+
|
1476
|
+
pub fn hash(&self, seed: u64, seed_1: u64, seed_2: u64, seed_3: u64) -> Self {
|
1477
|
+
self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
|
1478
|
+
}
|
1368
1479
|
}
|
1369
1480
|
|
1370
1481
|
pub fn col(name: String) -> RbExpr {
|
@@ -1394,6 +1505,13 @@ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
|
|
1394
1505
|
Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
|
1395
1506
|
}
|
1396
1507
|
|
1508
|
+
pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
|
1509
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
1510
|
+
|
1511
|
+
let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
|
1512
|
+
Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
|
1513
|
+
}
|
1514
|
+
|
1397
1515
|
// TODO improve
|
1398
1516
|
pub fn lit(value: Value) -> RbResult<RbExpr> {
|
1399
1517
|
if value.is_nil() {
|
@@ -1433,6 +1551,24 @@ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
|
1433
1551
|
}
|
1434
1552
|
}
|
1435
1553
|
|
1554
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
1555
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
1556
|
+
}
|
1557
|
+
|
1558
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
1559
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
1560
|
+
.into()
|
1561
|
+
}
|
1562
|
+
|
1563
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
1564
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1565
|
+
}
|
1566
|
+
|
1567
|
+
pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
|
1568
|
+
let by = rb_exprs_to_exprs(by)?;
|
1569
|
+
Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
|
1570
|
+
}
|
1571
|
+
|
1436
1572
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1437
1573
|
#[derive(Clone)]
|
1438
1574
|
pub struct RbWhen {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
mod apply;
|
1
2
|
mod batched_csv;
|
2
3
|
mod conversion;
|
3
4
|
mod dataframe;
|
4
5
|
mod error;
|
5
6
|
mod file;
|
6
7
|
mod lazy;
|
8
|
+
mod list_construction;
|
7
9
|
mod series;
|
8
10
|
mod set;
|
9
11
|
mod utils;
|
@@ -24,9 +26,23 @@ use polars::datatypes::{DataType, TimeUnit};
|
|
24
26
|
use polars::error::PolarsResult;
|
25
27
|
use polars::frame::DataFrame;
|
26
28
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
27
|
-
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
29
|
+
use polars::prelude::{ClosedWindow, Duration, DurationArgs, IntoSeries, TimeZone};
|
28
30
|
use series::RbSeries;
|
29
31
|
|
32
|
+
#[cfg(target_os = "linux")]
|
33
|
+
use jemallocator::Jemalloc;
|
34
|
+
|
35
|
+
#[cfg(not(target_os = "linux"))]
|
36
|
+
use mimalloc::MiMalloc;
|
37
|
+
|
38
|
+
#[global_allocator]
|
39
|
+
#[cfg(target_os = "linux")]
|
40
|
+
static GLOBAL: Jemalloc = Jemalloc;
|
41
|
+
|
42
|
+
#[global_allocator]
|
43
|
+
#[cfg(not(target_os = "linux"))]
|
44
|
+
static GLOBAL: MiMalloc = MiMalloc;
|
45
|
+
|
30
46
|
type RbResult<T> = Result<T, Error>;
|
31
47
|
|
32
48
|
fn module() -> RModule {
|
@@ -40,13 +56,18 @@ fn series() -> RClass {
|
|
40
56
|
#[magnus::init]
|
41
57
|
fn init() -> RbResult<()> {
|
42
58
|
let module = module();
|
59
|
+
module.define_singleton_method("_rb_duration", function!(rb_duration, 8))?;
|
43
60
|
module.define_singleton_method("_concat_df", function!(concat_df, 1))?;
|
61
|
+
module.define_singleton_method("_concat_lf", function!(concat_lf, 3))?;
|
44
62
|
module.define_singleton_method("_diag_concat_df", function!(rb_diag_concat_df, 1))?;
|
45
63
|
module.define_singleton_method("_hor_concat_df", function!(rb_hor_concat_df, 1))?;
|
46
64
|
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
47
65
|
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
48
66
|
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
67
|
+
module.define_singleton_method("_collect_all", function!(collect_all, 1))?;
|
49
68
|
module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
|
69
|
+
module.define_singleton_method("_coalesce_exprs", function!(coalesce_exprs, 1))?;
|
70
|
+
module.define_singleton_method("_sum_exprs", function!(sum_exprs, 1))?;
|
50
71
|
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
51
72
|
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
52
73
|
|
@@ -59,10 +80,13 @@ fn init() -> RbResult<()> {
|
|
59
80
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
60
81
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
|
61
82
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
83
|
+
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
84
|
+
class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
|
62
85
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
63
86
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
64
87
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
65
88
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
89
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
66
90
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
67
91
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
68
92
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
@@ -128,6 +152,7 @@ fn init() -> RbResult<()> {
|
|
128
152
|
class.define_method("with_row_count", method!(RbDataFrame::with_row_count, 2))?;
|
129
153
|
class.define_method("_clone", method!(RbDataFrame::clone, 0))?;
|
130
154
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
155
|
+
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 6))?;
|
131
156
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
|
132
157
|
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
133
158
|
class.define_method("unique", method!(RbDataFrame::unique, 3))?;
|
@@ -146,7 +171,9 @@ fn init() -> RbResult<()> {
|
|
146
171
|
class.define_method("quantile", method!(RbDataFrame::quantile, 2))?;
|
147
172
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 1))?;
|
148
173
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
174
|
+
class.define_method("apply", method!(RbDataFrame::apply, 3))?;
|
149
175
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
176
|
+
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
150
177
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
151
178
|
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
152
179
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
@@ -281,6 +308,14 @@ fn init() -> RbResult<()> {
|
|
281
308
|
class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
|
282
309
|
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
283
310
|
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
311
|
+
class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
|
312
|
+
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
313
|
+
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
314
|
+
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
315
|
+
class.define_method(
|
316
|
+
"str_json_path_match",
|
317
|
+
method!(RbExpr::str_json_path_match, 1),
|
318
|
+
)?;
|
284
319
|
class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
|
285
320
|
class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
|
286
321
|
class.define_method("count_match", method!(RbExpr::count_match, 1))?;
|
@@ -338,11 +373,14 @@ fn init() -> RbResult<()> {
|
|
338
373
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
339
374
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
340
375
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
376
|
+
class.define_method("map", method!(RbExpr::map, 3))?;
|
377
|
+
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
341
378
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
342
379
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
343
380
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
344
381
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
345
382
|
class.define_method("suffix", method!(RbExpr::suffix, 1))?;
|
383
|
+
class.define_method("map_alias", method!(RbExpr::map_alias, 1))?;
|
346
384
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
347
385
|
class.define_method("interpolate", method!(RbExpr::interpolate, 0))?;
|
348
386
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 6))?;
|
@@ -372,6 +410,7 @@ fn init() -> RbResult<()> {
|
|
372
410
|
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
373
411
|
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
374
412
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
413
|
+
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
|
375
414
|
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
376
415
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
377
416
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
@@ -406,6 +445,7 @@ fn init() -> RbResult<()> {
|
|
406
445
|
class.define_method("log", method!(RbExpr::log, 1))?;
|
407
446
|
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
408
447
|
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
448
|
+
class.define_method("_hash", method!(RbExpr::hash, 4))?;
|
409
449
|
|
410
450
|
// meta
|
411
451
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -421,14 +461,23 @@ fn init() -> RbResult<()> {
|
|
421
461
|
class.define_singleton_method("last", function!(crate::lazy::dsl::last, 0))?;
|
422
462
|
class.define_singleton_method("cols", function!(crate::lazy::dsl::cols, 1))?;
|
423
463
|
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
464
|
+
class.define_singleton_method("cumfold", function!(crate::lazy::dsl::cumfold, 4))?;
|
424
465
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
425
466
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
426
467
|
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
468
|
+
class.define_singleton_method("pearson_corr", function!(crate::lazy::dsl::pearson_corr, 3))?;
|
469
|
+
class.define_singleton_method(
|
470
|
+
"spearman_rank_corr",
|
471
|
+
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
472
|
+
)?;
|
473
|
+
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
474
|
+
class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
|
427
475
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
428
476
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
429
477
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
430
478
|
|
431
479
|
let class = module.define_class("RbLazyFrame", Default::default())?;
|
480
|
+
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
432
481
|
class.define_singleton_method(
|
433
482
|
"new_from_ndjson",
|
434
483
|
function!(RbLazyFrame::new_from_ndjson, 7),
|
@@ -459,6 +508,8 @@ fn init() -> RbResult<()> {
|
|
459
508
|
class.define_method("groupby", method!(RbLazyFrame::groupby, 2))?;
|
460
509
|
class.define_method("groupby_rolling", method!(RbLazyFrame::groupby_rolling, 5))?;
|
461
510
|
class.define_method("groupby_dynamic", method!(RbLazyFrame::groupby_dynamic, 8))?;
|
511
|
+
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
512
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 11))?;
|
462
513
|
class.define_method("join", method!(RbLazyFrame::join, 7))?;
|
463
514
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
464
515
|
class.define_method("rename", method!(RbLazyFrame::rename, 2))?;
|
@@ -507,7 +558,10 @@ fn init() -> RbResult<()> {
|
|
507
558
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
508
559
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
509
560
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
561
|
+
class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
|
562
|
+
class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
|
510
563
|
class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
|
564
|
+
class.define_singleton_method("new_opt_datetime", function!(RbSeries::new_opt_datetime, 3))?;
|
511
565
|
class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
|
512
566
|
class.define_method(
|
513
567
|
"is_sorted_reverse_flag",
|
@@ -563,6 +617,7 @@ fn init() -> RbResult<()> {
|
|
563
617
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
564
618
|
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
565
619
|
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
620
|
+
class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
|
566
621
|
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
567
622
|
class.define_method("to_dummies", method!(RbSeries::to_dummies, 0))?;
|
568
623
|
class.define_method("peak_max", method!(RbSeries::peak_max, 0))?;
|
@@ -577,6 +632,79 @@ fn init() -> RbResult<()> {
|
|
577
632
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
578
633
|
class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
|
579
634
|
|
635
|
+
// set
|
636
|
+
// class.define_method("set_with_mask_str", method!(RbSeries::set_with_mask_str, 2))?;
|
637
|
+
class.define_method("set_with_mask_f64", method!(RbSeries::set_with_mask_f64, 2))?;
|
638
|
+
class.define_method("set_with_mask_f32", method!(RbSeries::set_with_mask_f32, 2))?;
|
639
|
+
class.define_method("set_with_mask_u8", method!(RbSeries::set_with_mask_u8, 2))?;
|
640
|
+
class.define_method("set_with_mask_u16", method!(RbSeries::set_with_mask_u16, 2))?;
|
641
|
+
class.define_method("set_with_mask_u32", method!(RbSeries::set_with_mask_u32, 2))?;
|
642
|
+
class.define_method("set_with_mask_u64", method!(RbSeries::set_with_mask_u64, 2))?;
|
643
|
+
class.define_method("set_with_mask_i8", method!(RbSeries::set_with_mask_i8, 2))?;
|
644
|
+
class.define_method("set_with_mask_i16", method!(RbSeries::set_with_mask_i16, 2))?;
|
645
|
+
class.define_method("set_with_mask_i32", method!(RbSeries::set_with_mask_i32, 2))?;
|
646
|
+
class.define_method("set_with_mask_i64", method!(RbSeries::set_with_mask_i64, 2))?;
|
647
|
+
class.define_method(
|
648
|
+
"set_with_mask_bool",
|
649
|
+
method!(RbSeries::set_with_mask_bool, 2),
|
650
|
+
)?;
|
651
|
+
|
652
|
+
// arithmetic
|
653
|
+
class.define_method("add_u8", method!(RbSeries::add_u8, 1))?;
|
654
|
+
class.define_method("add_u16", method!(RbSeries::add_u16, 1))?;
|
655
|
+
class.define_method("add_u32", method!(RbSeries::add_u32, 1))?;
|
656
|
+
class.define_method("add_u64", method!(RbSeries::add_u64, 1))?;
|
657
|
+
class.define_method("add_i8", method!(RbSeries::add_i8, 1))?;
|
658
|
+
class.define_method("add_i16", method!(RbSeries::add_i16, 1))?;
|
659
|
+
class.define_method("add_i32", method!(RbSeries::add_i32, 1))?;
|
660
|
+
class.define_method("add_i64", method!(RbSeries::add_i64, 1))?;
|
661
|
+
class.define_method("add_datetime", method!(RbSeries::add_datetime, 1))?;
|
662
|
+
class.define_method("add_duration", method!(RbSeries::add_duration, 1))?;
|
663
|
+
class.define_method("add_f32", method!(RbSeries::add_f32, 1))?;
|
664
|
+
class.define_method("add_f64", method!(RbSeries::add_f64, 1))?;
|
665
|
+
class.define_method("sub_u8", method!(RbSeries::sub_u8, 1))?;
|
666
|
+
class.define_method("sub_u16", method!(RbSeries::sub_u16, 1))?;
|
667
|
+
class.define_method("sub_u32", method!(RbSeries::sub_u32, 1))?;
|
668
|
+
class.define_method("sub_u64", method!(RbSeries::sub_u64, 1))?;
|
669
|
+
class.define_method("sub_i8", method!(RbSeries::sub_i8, 1))?;
|
670
|
+
class.define_method("sub_i16", method!(RbSeries::sub_i16, 1))?;
|
671
|
+
class.define_method("sub_i32", method!(RbSeries::sub_i32, 1))?;
|
672
|
+
class.define_method("sub_i64", method!(RbSeries::sub_i64, 1))?;
|
673
|
+
class.define_method("sub_datetime", method!(RbSeries::sub_datetime, 1))?;
|
674
|
+
class.define_method("sub_duration", method!(RbSeries::sub_duration, 1))?;
|
675
|
+
class.define_method("sub_f32", method!(RbSeries::sub_f32, 1))?;
|
676
|
+
class.define_method("sub_f64", method!(RbSeries::sub_f64, 1))?;
|
677
|
+
class.define_method("div_u8", method!(RbSeries::div_u8, 1))?;
|
678
|
+
class.define_method("div_u16", method!(RbSeries::div_u16, 1))?;
|
679
|
+
class.define_method("div_u32", method!(RbSeries::div_u32, 1))?;
|
680
|
+
class.define_method("div_u64", method!(RbSeries::div_u64, 1))?;
|
681
|
+
class.define_method("div_i8", method!(RbSeries::div_i8, 1))?;
|
682
|
+
class.define_method("div_i16", method!(RbSeries::div_i16, 1))?;
|
683
|
+
class.define_method("div_i32", method!(RbSeries::div_i32, 1))?;
|
684
|
+
class.define_method("div_i64", method!(RbSeries::div_i64, 1))?;
|
685
|
+
class.define_method("div_f32", method!(RbSeries::div_f32, 1))?;
|
686
|
+
class.define_method("div_f64", method!(RbSeries::div_f64, 1))?;
|
687
|
+
class.define_method("mul_u8", method!(RbSeries::mul_u8, 1))?;
|
688
|
+
class.define_method("mul_u16", method!(RbSeries::mul_u16, 1))?;
|
689
|
+
class.define_method("mul_u32", method!(RbSeries::mul_u32, 1))?;
|
690
|
+
class.define_method("mul_u64", method!(RbSeries::mul_u64, 1))?;
|
691
|
+
class.define_method("mul_i8", method!(RbSeries::mul_i8, 1))?;
|
692
|
+
class.define_method("mul_i16", method!(RbSeries::mul_i16, 1))?;
|
693
|
+
class.define_method("mul_i32", method!(RbSeries::mul_i32, 1))?;
|
694
|
+
class.define_method("mul_i64", method!(RbSeries::mul_i64, 1))?;
|
695
|
+
class.define_method("mul_f32", method!(RbSeries::mul_f32, 1))?;
|
696
|
+
class.define_method("mul_f64", method!(RbSeries::mul_f64, 1))?;
|
697
|
+
class.define_method("rem_u8", method!(RbSeries::rem_u8, 1))?;
|
698
|
+
class.define_method("rem_u16", method!(RbSeries::rem_u16, 1))?;
|
699
|
+
class.define_method("rem_u32", method!(RbSeries::rem_u32, 1))?;
|
700
|
+
class.define_method("rem_u64", method!(RbSeries::rem_u64, 1))?;
|
701
|
+
class.define_method("rem_i8", method!(RbSeries::rem_i8, 1))?;
|
702
|
+
class.define_method("rem_i16", method!(RbSeries::rem_i16, 1))?;
|
703
|
+
class.define_method("rem_i32", method!(RbSeries::rem_i32, 1))?;
|
704
|
+
class.define_method("rem_i64", method!(RbSeries::rem_i64, 1))?;
|
705
|
+
class.define_method("rem_f32", method!(RbSeries::rem_f32, 1))?;
|
706
|
+
class.define_method("rem_f64", method!(RbSeries::rem_f64, 1))?;
|
707
|
+
|
580
708
|
// eq
|
581
709
|
class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
|
582
710
|
class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
|
@@ -664,6 +792,31 @@ fn init() -> RbResult<()> {
|
|
664
792
|
Ok(())
|
665
793
|
}
|
666
794
|
|
795
|
+
#[allow(clippy::too_many_arguments)]
|
796
|
+
fn rb_duration(
|
797
|
+
days: Option<&RbExpr>,
|
798
|
+
seconds: Option<&RbExpr>,
|
799
|
+
nanoseconds: Option<&RbExpr>,
|
800
|
+
microseconds: Option<&RbExpr>,
|
801
|
+
milliseconds: Option<&RbExpr>,
|
802
|
+
minutes: Option<&RbExpr>,
|
803
|
+
hours: Option<&RbExpr>,
|
804
|
+
weeks: Option<&RbExpr>,
|
805
|
+
) -> RbExpr {
|
806
|
+
let args = DurationArgs {
|
807
|
+
days: days.map(|e| e.inner.clone()),
|
808
|
+
seconds: seconds.map(|e| e.inner.clone()),
|
809
|
+
nanoseconds: nanoseconds.map(|e| e.inner.clone()),
|
810
|
+
microseconds: microseconds.map(|e| e.inner.clone()),
|
811
|
+
milliseconds: milliseconds.map(|e| e.inner.clone()),
|
812
|
+
minutes: minutes.map(|e| e.inner.clone()),
|
813
|
+
hours: hours.map(|e| e.inner.clone()),
|
814
|
+
weeks: weeks.map(|e| e.inner.clone()),
|
815
|
+
};
|
816
|
+
|
817
|
+
polars::lazy::dsl::duration(args).into()
|
818
|
+
}
|
819
|
+
|
667
820
|
fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
668
821
|
let mut iter = seq.each();
|
669
822
|
let first = iter.next().unwrap()?;
|
@@ -692,6 +845,20 @@ fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
|
692
845
|
Ok(df.into())
|
693
846
|
}
|
694
847
|
|
848
|
+
fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
|
849
|
+
let (seq, len) = get_rbseq(lfs)?;
|
850
|
+
let mut lfs = Vec::with_capacity(len);
|
851
|
+
|
852
|
+
for res in seq.each() {
|
853
|
+
let item = res?;
|
854
|
+
let lf = get_lf(item)?;
|
855
|
+
lfs.push(lf);
|
856
|
+
}
|
857
|
+
|
858
|
+
let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
|
859
|
+
Ok(lf.into())
|
860
|
+
}
|
861
|
+
|
695
862
|
fn rb_diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
696
863
|
let mut dfs = Vec::new();
|
697
864
|
for item in seq.each() {
|
@@ -752,6 +919,27 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
|
752
919
|
Ok(dict.into())
|
753
920
|
}
|
754
921
|
|
922
|
+
fn collect_all(lfs: RArray) -> RbResult<Vec<RbDataFrame>> {
|
923
|
+
use polars_core::utils::rayon::prelude::*;
|
924
|
+
|
925
|
+
let lfs = lfs
|
926
|
+
.each()
|
927
|
+
.map(|v| v?.try_convert::<&RbLazyFrame>())
|
928
|
+
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
929
|
+
|
930
|
+
let out = polars_core::POOL.install(|| {
|
931
|
+
lfs.par_iter()
|
932
|
+
.map(|lf| {
|
933
|
+
let df = lf.ldf.clone().collect()?;
|
934
|
+
Ok(RbDataFrame::new(df))
|
935
|
+
})
|
936
|
+
.collect::<polars_core::error::PolarsResult<Vec<_>>>()
|
937
|
+
.map_err(RbPolarsErr::from)
|
938
|
+
});
|
939
|
+
|
940
|
+
Ok(out?)
|
941
|
+
}
|
942
|
+
|
755
943
|
fn rb_date_range(
|
756
944
|
start: i64,
|
757
945
|
stop: i64,
|
@@ -774,6 +962,16 @@ fn rb_date_range(
|
|
774
962
|
.into()
|
775
963
|
}
|
776
964
|
|
965
|
+
fn coalesce_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
966
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
967
|
+
Ok(polars::lazy::dsl::coalesce(&exprs).into())
|
968
|
+
}
|
969
|
+
|
970
|
+
fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
|
971
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
972
|
+
Ok(polars::lazy::dsl::sum_exprs(exprs).into())
|
973
|
+
}
|
974
|
+
|
777
975
|
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
778
976
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
779
977
|
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|
@@ -0,0 +1,100 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::utils::CustomIterTools;
|
4
|
+
|
5
|
+
use crate::conversion::get_rbseq;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
|
9
|
+
let (seq, len) = get_rbseq(seq)?;
|
10
|
+
|
11
|
+
let s = match dtype {
|
12
|
+
DataType::Int64 => {
|
13
|
+
let mut builder =
|
14
|
+
ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
|
15
|
+
for sub_seq in seq.each() {
|
16
|
+
let sub_seq = sub_seq?;
|
17
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
18
|
+
|
19
|
+
// safety: we know the iterators len
|
20
|
+
let iter = unsafe {
|
21
|
+
sub_seq
|
22
|
+
.each()
|
23
|
+
.map(|v| {
|
24
|
+
let v = v.unwrap();
|
25
|
+
if v.is_nil() {
|
26
|
+
None
|
27
|
+
} else {
|
28
|
+
Some(v.try_convert::<i64>().unwrap())
|
29
|
+
}
|
30
|
+
})
|
31
|
+
.trust_my_length(len)
|
32
|
+
};
|
33
|
+
builder.append_iter(iter)
|
34
|
+
}
|
35
|
+
builder.finish().into_series()
|
36
|
+
}
|
37
|
+
DataType::Float64 => {
|
38
|
+
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
|
39
|
+
name,
|
40
|
+
len,
|
41
|
+
len * 5,
|
42
|
+
DataType::Float64,
|
43
|
+
);
|
44
|
+
for sub_seq in seq.each() {
|
45
|
+
let sub_seq = sub_seq?;
|
46
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
47
|
+
// safety: we know the iterators len
|
48
|
+
let iter = unsafe {
|
49
|
+
sub_seq
|
50
|
+
.each()
|
51
|
+
.map(|v| {
|
52
|
+
let v = v.unwrap();
|
53
|
+
if v.is_nil() {
|
54
|
+
None
|
55
|
+
} else {
|
56
|
+
Some(v.try_convert::<f64>().unwrap())
|
57
|
+
}
|
58
|
+
})
|
59
|
+
.trust_my_length(len)
|
60
|
+
};
|
61
|
+
builder.append_iter(iter)
|
62
|
+
}
|
63
|
+
builder.finish().into_series()
|
64
|
+
}
|
65
|
+
DataType::Boolean => {
|
66
|
+
let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
|
67
|
+
for sub_seq in seq.each() {
|
68
|
+
let sub_seq = sub_seq?;
|
69
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
70
|
+
// safety: we know the iterators len
|
71
|
+
let iter = unsafe {
|
72
|
+
sub_seq
|
73
|
+
.each()
|
74
|
+
.map(|v| {
|
75
|
+
let v = v.unwrap();
|
76
|
+
if v.is_nil() {
|
77
|
+
None
|
78
|
+
} else {
|
79
|
+
Some(v.try_convert::<bool>().unwrap())
|
80
|
+
}
|
81
|
+
})
|
82
|
+
.trust_my_length(len)
|
83
|
+
};
|
84
|
+
builder.append_iter(iter)
|
85
|
+
}
|
86
|
+
builder.finish().into_series()
|
87
|
+
}
|
88
|
+
DataType::Utf8 => {
|
89
|
+
return Err(RbPolarsErr::todo());
|
90
|
+
}
|
91
|
+
dt => {
|
92
|
+
return Err(RbPolarsErr::other(format!(
|
93
|
+
"cannot create list array from {:?}",
|
94
|
+
dt
|
95
|
+
)));
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
Ok(s)
|
100
|
+
}
|