polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6abc9619a425d8aaa0255864b063c41835349063aa4919df133ac5a4ceb972f2
|
4
|
+
data.tar.gz: 78372a2a9eeddb3a8080b1d615991415b9ef7752752319e250f143841bfa67f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6fb27a50908c07e5f2f72c81171f07bfdf0999b5148421bdeb1ad7dc69cee1f0bae02021fa18fdad6d1740ea9273464daec513db5e3c7906d5839e77b7d6a66
|
7
|
+
data.tar.gz: 2eb9df841575711a057dd1ca2986403667306ead52cf540491899ffaa184d4878c1bdfc1015e3f5831c12c668de0d4126cbab7c63d1770684e10012f3d28183f
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -483,6 +483,12 @@ version = "0.1.0"
|
|
483
483
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
484
484
|
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
485
485
|
|
486
|
+
[[package]]
|
487
|
+
name = "fs_extra"
|
488
|
+
version = "1.2.0"
|
489
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
490
|
+
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
491
|
+
|
486
492
|
[[package]]
|
487
493
|
name = "futures"
|
488
494
|
version = "0.3.25"
|
@@ -646,6 +652,12 @@ dependencies = [
|
|
646
652
|
"libc",
|
647
653
|
]
|
648
654
|
|
655
|
+
[[package]]
|
656
|
+
name = "hex"
|
657
|
+
version = "0.4.3"
|
658
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
659
|
+
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
660
|
+
|
649
661
|
[[package]]
|
650
662
|
name = "indexmap"
|
651
663
|
version = "1.8.0"
|
@@ -663,6 +675,27 @@ version = "1.0.4"
|
|
663
675
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
664
676
|
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
|
665
677
|
|
678
|
+
[[package]]
|
679
|
+
name = "jemalloc-sys"
|
680
|
+
version = "0.5.2+5.3.0-patched"
|
681
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
682
|
+
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
683
|
+
dependencies = [
|
684
|
+
"cc",
|
685
|
+
"fs_extra",
|
686
|
+
"libc",
|
687
|
+
]
|
688
|
+
|
689
|
+
[[package]]
|
690
|
+
name = "jemallocator"
|
691
|
+
version = "0.5.0"
|
692
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
693
|
+
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
694
|
+
dependencies = [
|
695
|
+
"jemalloc-sys",
|
696
|
+
"libc",
|
697
|
+
]
|
698
|
+
|
666
699
|
[[package]]
|
667
700
|
name = "jobserver"
|
668
701
|
version = "0.1.25"
|
@@ -683,13 +716,23 @@ dependencies = [
|
|
683
716
|
|
684
717
|
[[package]]
|
685
718
|
name = "json-deserializer"
|
686
|
-
version = "0.4.
|
719
|
+
version = "0.4.3"
|
687
720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
688
|
-
checksum = "
|
721
|
+
checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
|
689
722
|
dependencies = [
|
690
723
|
"indexmap",
|
691
724
|
]
|
692
725
|
|
726
|
+
[[package]]
|
727
|
+
name = "jsonpath_lib"
|
728
|
+
version = "0.3.0"
|
729
|
+
source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
|
730
|
+
dependencies = [
|
731
|
+
"log",
|
732
|
+
"serde",
|
733
|
+
"serde_json",
|
734
|
+
]
|
735
|
+
|
693
736
|
[[package]]
|
694
737
|
name = "lazy_static"
|
695
738
|
version = "1.4.0"
|
@@ -797,6 +840,16 @@ version = "0.2.6"
|
|
797
840
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
798
841
|
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
799
842
|
|
843
|
+
[[package]]
|
844
|
+
name = "libmimalloc-sys"
|
845
|
+
version = "0.1.28"
|
846
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
847
|
+
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
848
|
+
dependencies = [
|
849
|
+
"cc",
|
850
|
+
"libc",
|
851
|
+
]
|
852
|
+
|
800
853
|
[[package]]
|
801
854
|
name = "lock_api"
|
802
855
|
version = "0.4.9"
|
@@ -882,6 +935,15 @@ dependencies = [
|
|
882
935
|
"autocfg",
|
883
936
|
]
|
884
937
|
|
938
|
+
[[package]]
|
939
|
+
name = "mimalloc"
|
940
|
+
version = "0.1.32"
|
941
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
942
|
+
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
943
|
+
dependencies = [
|
944
|
+
"libmimalloc-sys",
|
945
|
+
]
|
946
|
+
|
885
947
|
[[package]]
|
886
948
|
name = "minimal-lexical"
|
887
949
|
version = "0.2.1"
|
@@ -1160,10 +1222,13 @@ dependencies = [
|
|
1160
1222
|
|
1161
1223
|
[[package]]
|
1162
1224
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1225
|
+
version = "0.1.4"
|
1164
1226
|
dependencies = [
|
1227
|
+
"jemallocator",
|
1165
1228
|
"magnus",
|
1229
|
+
"mimalloc",
|
1166
1230
|
"polars 0.25.1",
|
1231
|
+
"polars-core",
|
1167
1232
|
"serde_json",
|
1168
1233
|
]
|
1169
1234
|
|
@@ -1202,11 +1267,13 @@ dependencies = [
|
|
1202
1267
|
"ahash 0.8.2",
|
1203
1268
|
"anyhow",
|
1204
1269
|
"arrow2",
|
1270
|
+
"base64",
|
1205
1271
|
"bitflags",
|
1206
1272
|
"chrono",
|
1207
1273
|
"chrono-tz",
|
1208
1274
|
"comfy-table",
|
1209
1275
|
"hashbrown 0.12.3",
|
1276
|
+
"hex",
|
1210
1277
|
"indexmap",
|
1211
1278
|
"num",
|
1212
1279
|
"once_cell",
|
@@ -1277,9 +1344,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1277
1344
|
checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
|
1278
1345
|
dependencies = [
|
1279
1346
|
"arrow2",
|
1347
|
+
"jsonpath_lib",
|
1280
1348
|
"polars-arrow",
|
1281
1349
|
"polars-core",
|
1282
1350
|
"polars-utils",
|
1351
|
+
"serde_json",
|
1283
1352
|
]
|
1284
1353
|
|
1285
1354
|
[[package]]
|
@@ -1548,6 +1617,7 @@ version = "1.0.88"
|
|
1548
1617
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1549
1618
|
checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
|
1550
1619
|
dependencies = [
|
1620
|
+
"indexmap",
|
1551
1621
|
"itoa",
|
1552
1622
|
"ryu",
|
1553
1623
|
"serde",
|
data/Cargo.toml
CHANGED
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -10,6 +10,7 @@ crate-type = ["cdylib"]
|
|
10
10
|
|
11
11
|
[dependencies]
|
12
12
|
magnus = "0.4"
|
13
|
+
polars-core = "0.25.1"
|
13
14
|
serde_json = "1"
|
14
15
|
|
15
16
|
[dependencies.polars]
|
@@ -19,6 +20,7 @@ features = [
|
|
19
20
|
"arange",
|
20
21
|
"arg_where",
|
21
22
|
"concat_str",
|
23
|
+
"cse",
|
22
24
|
"csv-file",
|
23
25
|
"cum_agg",
|
24
26
|
"cumulative_eval",
|
@@ -30,6 +32,7 @@ features = [
|
|
30
32
|
"dtype-full",
|
31
33
|
"dynamic_groupby",
|
32
34
|
"ewma",
|
35
|
+
"extract_jsonpath",
|
33
36
|
"fmt",
|
34
37
|
"horizontal_concat",
|
35
38
|
"interpolate",
|
@@ -49,6 +52,7 @@ features = [
|
|
49
52
|
"parquet",
|
50
53
|
"partition_by",
|
51
54
|
"pct_change",
|
55
|
+
"performant",
|
52
56
|
"product",
|
53
57
|
"propagate_nans",
|
54
58
|
"random",
|
@@ -61,6 +65,7 @@ features = [
|
|
61
65
|
"semi_anti_join",
|
62
66
|
"serde-lazy",
|
63
67
|
"sign",
|
68
|
+
"string_encoding",
|
64
69
|
"string_justify",
|
65
70
|
"strings",
|
66
71
|
"timezones",
|
@@ -69,3 +74,9 @@ features = [
|
|
69
74
|
"trigonometry",
|
70
75
|
"unique_counts",
|
71
76
|
]
|
77
|
+
|
78
|
+
[target.'cfg(target_os = "linux")'.dependencies]
|
79
|
+
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
80
|
+
|
81
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
82
|
+
mimalloc = { version = "0.1", default-features = false }
|
@@ -5,6 +5,8 @@ use polars::datatypes::AnyValue;
|
|
5
5
|
use polars::frame::DataFrame;
|
6
6
|
use polars::prelude::*;
|
7
7
|
use polars::series::ops::NullBehavior;
|
8
|
+
use std::fmt::{Display, Formatter};
|
9
|
+
use std::hash::{Hash, Hasher};
|
8
10
|
|
9
11
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
10
12
|
|
@@ -87,6 +89,25 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
87
89
|
.unwrap()
|
88
90
|
.funcall::<_, _, Value>("to_date", ())
|
89
91
|
.unwrap(),
|
92
|
+
AnyValue::Datetime(v, tu, tz) => {
|
93
|
+
let t = match tu {
|
94
|
+
TimeUnit::Nanoseconds => todo!(),
|
95
|
+
TimeUnit::Microseconds => {
|
96
|
+
let sec = v / 1000000;
|
97
|
+
let subsec = v % 1000000;
|
98
|
+
class::time()
|
99
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
100
|
+
.unwrap()
|
101
|
+
}
|
102
|
+
TimeUnit::Milliseconds => todo!(),
|
103
|
+
};
|
104
|
+
|
105
|
+
if tz.is_some() {
|
106
|
+
todo!();
|
107
|
+
} else {
|
108
|
+
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
109
|
+
}
|
110
|
+
}
|
90
111
|
_ => todo!(),
|
91
112
|
}
|
92
113
|
}
|
@@ -238,6 +259,22 @@ impl TryConvert for Wrap<JoinType> {
|
|
238
259
|
}
|
239
260
|
}
|
240
261
|
|
262
|
+
impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
263
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
264
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
265
|
+
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
|
266
|
+
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
267
|
+
v => {
|
268
|
+
return Err(RbValueError::new_err(format!(
|
269
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
|
270
|
+
v
|
271
|
+
)))
|
272
|
+
}
|
273
|
+
};
|
274
|
+
Ok(Wrap(parsed))
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
241
278
|
impl TryConvert for Wrap<NullBehavior> {
|
242
279
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
243
280
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -425,10 +462,47 @@ pub fn parse_parquet_compression(
|
|
425
462
|
Ok(parsed)
|
426
463
|
}
|
427
464
|
|
465
|
+
#[derive(Clone, Debug)]
|
428
466
|
pub struct ObjectValue {
|
429
467
|
pub inner: Value,
|
430
468
|
}
|
431
469
|
|
470
|
+
impl Hash for ObjectValue {
|
471
|
+
fn hash<H: Hasher>(&self, state: &mut H) {
|
472
|
+
let h = self
|
473
|
+
.inner
|
474
|
+
.funcall::<_, _, isize>("hash", ())
|
475
|
+
.expect("should be hashable");
|
476
|
+
state.write_isize(h)
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
impl Eq for ObjectValue {}
|
481
|
+
|
482
|
+
impl PartialEq for ObjectValue {
|
483
|
+
fn eq(&self, other: &Self) -> bool {
|
484
|
+
self.inner.eql(&other.inner).unwrap_or(false)
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
impl Display for ObjectValue {
|
489
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
490
|
+
write!(f, "{}", self.inner)
|
491
|
+
}
|
492
|
+
}
|
493
|
+
|
494
|
+
impl PolarsObject for ObjectValue {
|
495
|
+
fn type_name() -> &'static str {
|
496
|
+
"object"
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
impl From<Value> for ObjectValue {
|
501
|
+
fn from(v: Value) -> Self {
|
502
|
+
Self { inner: v }
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
432
506
|
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
433
507
|
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
434
508
|
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
@@ -440,3 +514,9 @@ impl ObjectValue {
|
|
440
514
|
self.inner
|
441
515
|
}
|
442
516
|
}
|
517
|
+
|
518
|
+
impl Default for ObjectValue {
|
519
|
+
fn default() -> Self {
|
520
|
+
ObjectValue { inner: *QNIL }
|
521
|
+
}
|
522
|
+
}
|
data/ext/polars/src/error.rs
CHANGED
@@ -211,7 +211,7 @@ impl RbLazyFrame {
|
|
211
211
|
projection_pushdown: bool,
|
212
212
|
simplify_expr: bool,
|
213
213
|
slice_pushdown: bool,
|
214
|
-
|
214
|
+
cse: bool,
|
215
215
|
allow_streaming: bool,
|
216
216
|
) -> RbLazyFrame {
|
217
217
|
let ldf = self.ldf.clone();
|
@@ -220,7 +220,7 @@ impl RbLazyFrame {
|
|
220
220
|
.with_predicate_pushdown(predicate_pushdown)
|
221
221
|
.with_simplify_expr(simplify_expr)
|
222
222
|
.with_slice_pushdown(slice_pushdown)
|
223
|
-
|
223
|
+
.with_common_subplan_elimination(cse)
|
224
224
|
.with_streaming(allow_streaming)
|
225
225
|
.with_projection_pushdown(projection_pushdown);
|
226
226
|
ldf.into()
|
data/ext/polars/src/lazy/dsl.rs
CHANGED
@@ -659,6 +659,65 @@ impl RbExpr {
|
|
659
659
|
self.inner.clone().str().starts_with(sub).into()
|
660
660
|
}
|
661
661
|
|
662
|
+
pub fn str_hex_encode(&self) -> Self {
|
663
|
+
self.clone()
|
664
|
+
.inner
|
665
|
+
.map(
|
666
|
+
move |s| s.utf8().map(|s| s.hex_encode().into_series()),
|
667
|
+
GetOutput::same_type(),
|
668
|
+
)
|
669
|
+
.with_fmt("str.hex_encode")
|
670
|
+
.into()
|
671
|
+
}
|
672
|
+
|
673
|
+
pub fn str_hex_decode(&self, strict: Option<bool>) -> Self {
|
674
|
+
self.clone()
|
675
|
+
.inner
|
676
|
+
.map(
|
677
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| s.into_series()),
|
678
|
+
GetOutput::same_type(),
|
679
|
+
)
|
680
|
+
.with_fmt("str.hex_decode")
|
681
|
+
.into()
|
682
|
+
}
|
683
|
+
|
684
|
+
pub fn str_base64_encode(&self) -> Self {
|
685
|
+
self.clone()
|
686
|
+
.inner
|
687
|
+
.map(
|
688
|
+
move |s| s.utf8().map(|s| s.base64_encode().into_series()),
|
689
|
+
GetOutput::same_type(),
|
690
|
+
)
|
691
|
+
.with_fmt("str.base64_encode")
|
692
|
+
.into()
|
693
|
+
}
|
694
|
+
|
695
|
+
pub fn str_base64_decode(&self, strict: Option<bool>) -> Self {
|
696
|
+
self.clone()
|
697
|
+
.inner
|
698
|
+
.map(
|
699
|
+
move |s| s.utf8()?.base64_decode(strict).map(|s| s.into_series()),
|
700
|
+
GetOutput::same_type(),
|
701
|
+
)
|
702
|
+
.with_fmt("str.base64_decode")
|
703
|
+
.into()
|
704
|
+
}
|
705
|
+
|
706
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
707
|
+
let function = move |s: Series| {
|
708
|
+
let ca = s.utf8()?;
|
709
|
+
match ca.json_path_match(&pat) {
|
710
|
+
Ok(ca) => Ok(ca.into_series()),
|
711
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
712
|
+
}
|
713
|
+
};
|
714
|
+
self.clone()
|
715
|
+
.inner
|
716
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
717
|
+
.with_fmt("str.json_path_match")
|
718
|
+
.into()
|
719
|
+
}
|
720
|
+
|
662
721
|
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
663
722
|
self.inner.clone().str().extract(&pat, group_index).into()
|
664
723
|
}
|
@@ -887,6 +946,10 @@ impl RbExpr {
|
|
887
946
|
self.inner.clone().dt().round(&every, &offset).into()
|
888
947
|
}
|
889
948
|
|
949
|
+
pub fn dot(&self, other: &RbExpr) -> Self {
|
950
|
+
self.inner.clone().dot(other.inner.clone()).into()
|
951
|
+
}
|
952
|
+
|
890
953
|
pub fn reinterpret(&self, signed: bool) -> Self {
|
891
954
|
let function = move |s: Series| reinterpret(&s, signed);
|
892
955
|
let dt = if signed {
|
@@ -1208,6 +1271,28 @@ impl RbExpr {
|
|
1208
1271
|
.into()
|
1209
1272
|
}
|
1210
1273
|
|
1274
|
+
pub fn lst_to_struct(
|
1275
|
+
&self,
|
1276
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
1277
|
+
_name_gen: Option<Value>,
|
1278
|
+
) -> RbResult<Self> {
|
1279
|
+
// TODO fix
|
1280
|
+
let name_gen = None;
|
1281
|
+
// let name_gen = name_gen.map(|lambda| {
|
1282
|
+
// Arc::new(move |idx: usize| {
|
1283
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
1284
|
+
// out.try_convert::<String>().unwrap()
|
1285
|
+
// }) as NameGenerator
|
1286
|
+
// });
|
1287
|
+
|
1288
|
+
Ok(self
|
1289
|
+
.inner
|
1290
|
+
.clone()
|
1291
|
+
.arr()
|
1292
|
+
.to_struct(width_strat.0, name_gen)
|
1293
|
+
.into())
|
1294
|
+
}
|
1295
|
+
|
1211
1296
|
pub fn rank(&self, method: Wrap<RankMethod>, reverse: bool) -> Self {
|
1212
1297
|
let options = RankOptions {
|
1213
1298
|
method: method.0,
|
@@ -1433,6 +1518,19 @@ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
|
|
1433
1518
|
}
|
1434
1519
|
}
|
1435
1520
|
|
1521
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
|
1522
|
+
polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
|
1523
|
+
}
|
1524
|
+
|
1525
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
|
1526
|
+
polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
|
1527
|
+
.into()
|
1528
|
+
}
|
1529
|
+
|
1530
|
+
pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
1531
|
+
polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
|
1532
|
+
}
|
1533
|
+
|
1436
1534
|
#[magnus::wrap(class = "Polars::RbWhen")]
|
1437
1535
|
#[derive(Clone)]
|
1438
1536
|
pub struct RbWhen {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -4,6 +4,7 @@ mod dataframe;
|
|
4
4
|
mod error;
|
5
5
|
mod file;
|
6
6
|
mod lazy;
|
7
|
+
mod list_construction;
|
7
8
|
mod series;
|
8
9
|
mod set;
|
9
10
|
mod utils;
|
@@ -27,6 +28,20 @@ use polars::functions::{diag_concat_df, hor_concat_df};
|
|
27
28
|
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
28
29
|
use series::RbSeries;
|
29
30
|
|
31
|
+
#[cfg(target_os = "linux")]
|
32
|
+
use jemallocator::Jemalloc;
|
33
|
+
|
34
|
+
#[cfg(not(target_os = "linux"))]
|
35
|
+
use mimalloc::MiMalloc;
|
36
|
+
|
37
|
+
#[global_allocator]
|
38
|
+
#[cfg(target_os = "linux")]
|
39
|
+
static GLOBAL: Jemalloc = Jemalloc;
|
40
|
+
|
41
|
+
#[global_allocator]
|
42
|
+
#[cfg(not(target_os = "linux"))]
|
43
|
+
static GLOBAL: MiMalloc = MiMalloc;
|
44
|
+
|
30
45
|
type RbResult<T> = Result<T, Error>;
|
31
46
|
|
32
47
|
fn module() -> RModule {
|
@@ -281,6 +296,14 @@ fn init() -> RbResult<()> {
|
|
281
296
|
class.define_method("str_contains", method!(RbExpr::str_contains, 2))?;
|
282
297
|
class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
|
283
298
|
class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
|
299
|
+
class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
|
300
|
+
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
301
|
+
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
302
|
+
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
303
|
+
class.define_method(
|
304
|
+
"str_json_path_match",
|
305
|
+
method!(RbExpr::str_json_path_match, 1),
|
306
|
+
)?;
|
284
307
|
class.define_method("str_extract", method!(RbExpr::str_extract, 2))?;
|
285
308
|
class.define_method("str_extract_all", method!(RbExpr::str_extract_all, 1))?;
|
286
309
|
class.define_method("count_match", method!(RbExpr::count_match, 1))?;
|
@@ -338,6 +361,7 @@ fn init() -> RbResult<()> {
|
|
338
361
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
339
362
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
340
363
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
364
|
+
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
341
365
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
342
366
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
343
367
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
@@ -372,6 +396,7 @@ fn init() -> RbResult<()> {
|
|
372
396
|
class.define_method("lst_slice", method!(RbExpr::lst_slice, 2))?;
|
373
397
|
class.define_method("lst_eval", method!(RbExpr::lst_eval, 2))?;
|
374
398
|
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
|
399
|
+
class.define_method("lst_to_struct", method!(RbExpr::lst_to_struct, 2))?;
|
375
400
|
class.define_method("rank", method!(RbExpr::rank, 2))?;
|
376
401
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
377
402
|
class.define_method("pct_change", method!(RbExpr::pct_change, 1))?;
|
@@ -424,6 +449,12 @@ fn init() -> RbResult<()> {
|
|
424
449
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
425
450
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
426
451
|
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
452
|
+
class.define_singleton_method("pearson_corr", function!(crate::lazy::dsl::pearson_corr, 3))?;
|
453
|
+
class.define_singleton_method(
|
454
|
+
"spearman_rank_corr",
|
455
|
+
function!(crate::lazy::dsl::spearman_rank_corr, 4),
|
456
|
+
)?;
|
457
|
+
class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
|
427
458
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
428
459
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
429
460
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
@@ -507,7 +538,10 @@ fn init() -> RbResult<()> {
|
|
507
538
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
508
539
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
509
540
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
541
|
+
class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
|
542
|
+
class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
|
510
543
|
class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
|
544
|
+
class.define_singleton_method("new_opt_datetime", function!(RbSeries::new_opt_datetime, 3))?;
|
511
545
|
class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
|
512
546
|
class.define_method(
|
513
547
|
"is_sorted_reverse_flag",
|