polars-df 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6abc9619a425d8aaa0255864b063c41835349063aa4919df133ac5a4ceb972f2
|
4
|
+
data.tar.gz: 78372a2a9eeddb3a8080b1d615991415b9ef7752752319e250f143841bfa67f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6fb27a50908c07e5f2f72c81171f07bfdf0999b5148421bdeb1ad7dc69cee1f0bae02021fa18fdad6d1740ea9273464daec513db5e3c7906d5839e77b7d6a66
|
7
|
+
data.tar.gz: 2eb9df841575711a057dd1ca2986403667306ead52cf540491899ffaa184d4878c1bdfc1015e3f5831c12c668de0d4126cbab7c63d1770684e10012f3d28183f
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -483,6 +483,12 @@ version = "0.1.0"
|
|
483
483
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
484
484
|
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
485
485
|
|
486
|
+
[[package]]
|
487
|
+
name = "fs_extra"
|
488
|
+
version = "1.2.0"
|
489
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
490
|
+
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
491
|
+
|
486
492
|
[[package]]
|
487
493
|
name = "futures"
|
488
494
|
version = "0.3.25"
|
@@ -646,6 +652,12 @@ dependencies = [
|
|
646
652
|
"libc",
|
647
653
|
]
|
648
654
|
|
655
|
+
[[package]]
|
656
|
+
name = "hex"
|
657
|
+
version = "0.4.3"
|
658
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
659
|
+
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
660
|
+
|
649
661
|
[[package]]
|
650
662
|
name = "indexmap"
|
651
663
|
version = "1.8.0"
|
@@ -663,6 +675,27 @@ version = "1.0.4"
|
|
663
675
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
664
676
|
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
|
665
677
|
|
678
|
+
[[package]]
|
679
|
+
name = "jemalloc-sys"
|
680
|
+
version = "0.5.2+5.3.0-patched"
|
681
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
682
|
+
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
683
|
+
dependencies = [
|
684
|
+
"cc",
|
685
|
+
"fs_extra",
|
686
|
+
"libc",
|
687
|
+
]
|
688
|
+
|
689
|
+
[[package]]
|
690
|
+
name = "jemallocator"
|
691
|
+
version = "0.5.0"
|
692
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
693
|
+
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
694
|
+
dependencies = [
|
695
|
+
"jemalloc-sys",
|
696
|
+
"libc",
|
697
|
+
]
|
698
|
+
|
666
699
|
[[package]]
|
667
700
|
name = "jobserver"
|
668
701
|
version = "0.1.25"
|
@@ -683,13 +716,23 @@ dependencies = [
|
|
683
716
|
|
684
717
|
[[package]]
|
685
718
|
name = "json-deserializer"
|
686
|
-
version = "0.4.
|
719
|
+
version = "0.4.3"
|
687
720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
688
|
-
checksum = "
|
721
|
+
checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
|
689
722
|
dependencies = [
|
690
723
|
"indexmap",
|
691
724
|
]
|
692
725
|
|
726
|
+
[[package]]
|
727
|
+
name = "jsonpath_lib"
|
728
|
+
version = "0.3.0"
|
729
|
+
source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
|
730
|
+
dependencies = [
|
731
|
+
"log",
|
732
|
+
"serde",
|
733
|
+
"serde_json",
|
734
|
+
]
|
735
|
+
|
693
736
|
[[package]]
|
694
737
|
name = "lazy_static"
|
695
738
|
version = "1.4.0"
|
@@ -797,6 +840,16 @@ version = "0.2.6"
|
|
797
840
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
798
841
|
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
799
842
|
|
843
|
+
[[package]]
|
844
|
+
name = "libmimalloc-sys"
|
845
|
+
version = "0.1.28"
|
846
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
847
|
+
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
848
|
+
dependencies = [
|
849
|
+
"cc",
|
850
|
+
"libc",
|
851
|
+
]
|
852
|
+
|
800
853
|
[[package]]
|
801
854
|
name = "lock_api"
|
802
855
|
version = "0.4.9"
|
@@ -882,6 +935,15 @@ dependencies = [
|
|
882
935
|
"autocfg",
|
883
936
|
]
|
884
937
|
|
938
|
+
[[package]]
|
939
|
+
name = "mimalloc"
|
940
|
+
version = "0.1.32"
|
941
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
942
|
+
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
943
|
+
dependencies = [
|
944
|
+
"libmimalloc-sys",
|
945
|
+
]
|
946
|
+
|
885
947
|
[[package]]
|
886
948
|
name = "minimal-lexical"
|
887
949
|
version = "0.2.1"
|
@@ -1160,10 +1222,13 @@ dependencies = [
|
|
1160
1222
|
|
1161
1223
|
[[package]]
|
1162
1224
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1225
|
+
version = "0.1.4"
|
1164
1226
|
dependencies = [
|
1227
|
+
"jemallocator",
|
1165
1228
|
"magnus",
|
1229
|
+
"mimalloc",
|
1166
1230
|
"polars 0.25.1",
|
1231
|
+
"polars-core",
|
1167
1232
|
"serde_json",
|
1168
1233
|
]
|
1169
1234
|
|
@@ -1202,11 +1267,13 @@ dependencies = [
|
|
1202
1267
|
"ahash 0.8.2",
|
1203
1268
|
"anyhow",
|
1204
1269
|
"arrow2",
|
1270
|
+
"base64",
|
1205
1271
|
"bitflags",
|
1206
1272
|
"chrono",
|
1207
1273
|
"chrono-tz",
|
1208
1274
|
"comfy-table",
|
1209
1275
|
"hashbrown 0.12.3",
|
1276
|
+
"hex",
|
1210
1277
|
"indexmap",
|
1211
1278
|
"num",
|
1212
1279
|
"once_cell",
|
@@ -1217,6 +1284,7 @@ dependencies = [
|
|
1217
1284
|
"rayon",
|
1218
1285
|
"regex",
|
1219
1286
|
"serde",
|
1287
|
+
"serde_json",
|
1220
1288
|
"smartstring",
|
1221
1289
|
"thiserror",
|
1222
1290
|
]
|
@@ -1276,9 +1344,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1276
1344
|
checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
|
1277
1345
|
dependencies = [
|
1278
1346
|
"arrow2",
|
1347
|
+
"jsonpath_lib",
|
1279
1348
|
"polars-arrow",
|
1280
1349
|
"polars-core",
|
1281
1350
|
"polars-utils",
|
1351
|
+
"serde_json",
|
1282
1352
|
]
|
1283
1353
|
|
1284
1354
|
[[package]]
|
@@ -1547,6 +1617,7 @@ version = "1.0.88"
|
|
1547
1617
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1548
1618
|
checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
|
1549
1619
|
dependencies = [
|
1620
|
+
"indexmap",
|
1550
1621
|
"itoa",
|
1551
1622
|
"ryu",
|
1552
1623
|
"serde",
|
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -27,7 +27,7 @@ Polars.read_csv("iris.csv")
|
|
27
27
|
.collect
|
28
28
|
```
|
29
29
|
|
30
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
30
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
|
31
31
|
|
32
32
|
## Examples
|
33
33
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -10,6 +10,7 @@ crate-type = ["cdylib"]
|
|
10
10
|
|
11
11
|
[dependencies]
|
12
12
|
magnus = "0.4"
|
13
|
+
polars-core = "0.25.1"
|
13
14
|
serde_json = "1"
|
14
15
|
|
15
16
|
[dependencies.polars]
|
@@ -17,10 +18,13 @@ version = "0.25.1"
|
|
17
18
|
features = [
|
18
19
|
"abs",
|
19
20
|
"arange",
|
21
|
+
"arg_where",
|
20
22
|
"concat_str",
|
23
|
+
"cse",
|
21
24
|
"csv-file",
|
22
25
|
"cum_agg",
|
23
26
|
"cumulative_eval",
|
27
|
+
"dataframe_arithmetic",
|
24
28
|
"date_offset",
|
25
29
|
"diagonal_concat",
|
26
30
|
"diff",
|
@@ -28,6 +32,7 @@ features = [
|
|
28
32
|
"dtype-full",
|
29
33
|
"dynamic_groupby",
|
30
34
|
"ewma",
|
35
|
+
"extract_jsonpath",
|
31
36
|
"fmt",
|
32
37
|
"horizontal_concat",
|
33
38
|
"interpolate",
|
@@ -38,16 +43,21 @@ features = [
|
|
38
43
|
"lazy",
|
39
44
|
"lazy_regex",
|
40
45
|
"list_eval",
|
46
|
+
"list_to_struct",
|
41
47
|
"log",
|
42
48
|
"meta",
|
43
49
|
"mode",
|
44
50
|
"moment",
|
51
|
+
"object",
|
45
52
|
"parquet",
|
46
53
|
"partition_by",
|
47
54
|
"pct_change",
|
55
|
+
"performant",
|
48
56
|
"product",
|
57
|
+
"propagate_nans",
|
49
58
|
"random",
|
50
59
|
"rank",
|
60
|
+
"reinterpret",
|
51
61
|
"repeat_by",
|
52
62
|
"rolling_window",
|
53
63
|
"round_series",
|
@@ -55,6 +65,7 @@ features = [
|
|
55
65
|
"semi_anti_join",
|
56
66
|
"serde-lazy",
|
57
67
|
"sign",
|
68
|
+
"string_encoding",
|
58
69
|
"string_justify",
|
59
70
|
"strings",
|
60
71
|
"timezones",
|
@@ -63,3 +74,9 @@ features = [
|
|
63
74
|
"trigonometry",
|
64
75
|
"unique_counts",
|
65
76
|
]
|
77
|
+
|
78
|
+
[target.'cfg(target_os = "linux")'.dependencies]
|
79
|
+
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
80
|
+
|
81
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
82
|
+
mimalloc = { version = "0.1", default-features = false }
|
@@ -1,9 +1,12 @@
|
|
1
|
-
use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
2
|
+
use polars::chunked_array::object::PolarsObjectSafe;
|
2
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
4
|
use polars::datatypes::AnyValue;
|
4
5
|
use polars::frame::DataFrame;
|
5
6
|
use polars::prelude::*;
|
6
7
|
use polars::series::ops::NullBehavior;
|
8
|
+
use std::fmt::{Display, Formatter};
|
9
|
+
use std::hash::{Hash, Hasher};
|
7
10
|
|
8
11
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
9
12
|
|
@@ -79,6 +82,32 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
79
82
|
AnyValue::Null => *QNIL,
|
80
83
|
AnyValue::Boolean(v) => Value::from(v),
|
81
84
|
AnyValue::Utf8(v) => Value::from(v),
|
85
|
+
AnyValue::Date(v) => class::time()
|
86
|
+
.funcall::<_, _, Value>("at", (v * 86400,))
|
87
|
+
.unwrap()
|
88
|
+
.funcall::<_, _, Value>("utc", ())
|
89
|
+
.unwrap()
|
90
|
+
.funcall::<_, _, Value>("to_date", ())
|
91
|
+
.unwrap(),
|
92
|
+
AnyValue::Datetime(v, tu, tz) => {
|
93
|
+
let t = match tu {
|
94
|
+
TimeUnit::Nanoseconds => todo!(),
|
95
|
+
TimeUnit::Microseconds => {
|
96
|
+
let sec = v / 1000000;
|
97
|
+
let subsec = v % 1000000;
|
98
|
+
class::time()
|
99
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
100
|
+
.unwrap()
|
101
|
+
}
|
102
|
+
TimeUnit::Milliseconds => todo!(),
|
103
|
+
};
|
104
|
+
|
105
|
+
if tz.is_some() {
|
106
|
+
todo!();
|
107
|
+
} else {
|
108
|
+
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
109
|
+
}
|
110
|
+
}
|
82
111
|
_ => todo!(),
|
83
112
|
}
|
84
113
|
}
|
@@ -102,10 +131,19 @@ impl TryConvert for Wrap<DataType> {
|
|
102
131
|
"i32" => DataType::Int32,
|
103
132
|
"i64" => DataType::Int64,
|
104
133
|
"str" => DataType::Utf8,
|
134
|
+
"bin" => DataType::Binary,
|
105
135
|
"bool" => DataType::Boolean,
|
136
|
+
"cat" => DataType::Categorical(None),
|
137
|
+
"date" => DataType::Date,
|
138
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
106
139
|
"f32" => DataType::Float32,
|
140
|
+
"time" => DataType::Time,
|
141
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
107
142
|
"f64" => DataType::Float64,
|
108
|
-
"
|
143
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
144
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
145
|
+
"null" => DataType::Null,
|
146
|
+
"unk" => DataType::Unknown,
|
109
147
|
_ => {
|
110
148
|
return Err(RbValueError::new_err(format!(
|
111
149
|
"{} is not a supported DataType.",
|
@@ -221,6 +259,22 @@ impl TryConvert for Wrap<JoinType> {
|
|
221
259
|
}
|
222
260
|
}
|
223
261
|
|
262
|
+
impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
263
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
264
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
265
|
+
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
|
266
|
+
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
267
|
+
v => {
|
268
|
+
return Err(RbValueError::new_err(format!(
|
269
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
|
270
|
+
v
|
271
|
+
)))
|
272
|
+
}
|
273
|
+
};
|
274
|
+
Ok(Wrap(parsed))
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
224
278
|
impl TryConvert for Wrap<NullBehavior> {
|
225
279
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
226
280
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -407,3 +461,62 @@ pub fn parse_parquet_compression(
|
|
407
461
|
};
|
408
462
|
Ok(parsed)
|
409
463
|
}
|
464
|
+
|
465
|
+
#[derive(Clone, Debug)]
|
466
|
+
pub struct ObjectValue {
|
467
|
+
pub inner: Value,
|
468
|
+
}
|
469
|
+
|
470
|
+
impl Hash for ObjectValue {
|
471
|
+
fn hash<H: Hasher>(&self, state: &mut H) {
|
472
|
+
let h = self
|
473
|
+
.inner
|
474
|
+
.funcall::<_, _, isize>("hash", ())
|
475
|
+
.expect("should be hashable");
|
476
|
+
state.write_isize(h)
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
impl Eq for ObjectValue {}
|
481
|
+
|
482
|
+
impl PartialEq for ObjectValue {
|
483
|
+
fn eq(&self, other: &Self) -> bool {
|
484
|
+
self.inner.eql(&other.inner).unwrap_or(false)
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
impl Display for ObjectValue {
|
489
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
490
|
+
write!(f, "{}", self.inner)
|
491
|
+
}
|
492
|
+
}
|
493
|
+
|
494
|
+
impl PolarsObject for ObjectValue {
|
495
|
+
fn type_name() -> &'static str {
|
496
|
+
"object"
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
impl From<Value> for ObjectValue {
|
501
|
+
fn from(v: Value) -> Self {
|
502
|
+
Self { inner: v }
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
506
|
+
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
507
|
+
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
508
|
+
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
impl ObjectValue {
|
513
|
+
pub fn to_object(&self) -> Value {
|
514
|
+
self.inner
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
impl Default for ObjectValue {
|
519
|
+
fn default() -> Self {
|
520
|
+
ObjectValue { inner: *QNIL }
|
521
|
+
}
|
522
|
+
}
|