polars-df 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +9 -0
- data/Cargo.lock +74 -3
- data/Cargo.toml +3 -0
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +18 -1
- data/ext/polars/src/conversion.rs +115 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +5 -5
- data/ext/polars/src/lazy/dsl.rs +157 -2
- data/ext/polars/src/lib.rs +185 -10
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +217 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +2384 -140
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +4374 -53
- data/lib/polars/expr_dispatch.rb +22 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +518 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1267 -69
- data/lib/polars/lazy_functions.rb +412 -24
- data/lib/polars/lazy_group_by.rb +80 -0
- data/lib/polars/list_expr.rb +507 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +2256 -242
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +847 -10
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +71 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +12 -10
- metadata +15 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6abc9619a425d8aaa0255864b063c41835349063aa4919df133ac5a4ceb972f2
|
4
|
+
data.tar.gz: 78372a2a9eeddb3a8080b1d615991415b9ef7752752319e250f143841bfa67f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6fb27a50908c07e5f2f72c81171f07bfdf0999b5148421bdeb1ad7dc69cee1f0bae02021fa18fdad6d1740ea9273464daec513db5e3c7906d5839e77b7d6a66
|
7
|
+
data.tar.gz: 2eb9df841575711a057dd1ca2986403667306ead52cf540491899ffaa184d4878c1bdfc1015e3f5831c12c668de0d4126cbab7c63d1770684e10012f3d28183f
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
@@ -483,6 +483,12 @@ version = "0.1.0"
|
|
483
483
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
484
484
|
checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
|
485
485
|
|
486
|
+
[[package]]
|
487
|
+
name = "fs_extra"
|
488
|
+
version = "1.2.0"
|
489
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
490
|
+
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
491
|
+
|
486
492
|
[[package]]
|
487
493
|
name = "futures"
|
488
494
|
version = "0.3.25"
|
@@ -646,6 +652,12 @@ dependencies = [
|
|
646
652
|
"libc",
|
647
653
|
]
|
648
654
|
|
655
|
+
[[package]]
|
656
|
+
name = "hex"
|
657
|
+
version = "0.4.3"
|
658
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
659
|
+
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
660
|
+
|
649
661
|
[[package]]
|
650
662
|
name = "indexmap"
|
651
663
|
version = "1.8.0"
|
@@ -663,6 +675,27 @@ version = "1.0.4"
|
|
663
675
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
664
676
|
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
|
665
677
|
|
678
|
+
[[package]]
|
679
|
+
name = "jemalloc-sys"
|
680
|
+
version = "0.5.2+5.3.0-patched"
|
681
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
682
|
+
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
683
|
+
dependencies = [
|
684
|
+
"cc",
|
685
|
+
"fs_extra",
|
686
|
+
"libc",
|
687
|
+
]
|
688
|
+
|
689
|
+
[[package]]
|
690
|
+
name = "jemallocator"
|
691
|
+
version = "0.5.0"
|
692
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
693
|
+
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
694
|
+
dependencies = [
|
695
|
+
"jemalloc-sys",
|
696
|
+
"libc",
|
697
|
+
]
|
698
|
+
|
666
699
|
[[package]]
|
667
700
|
name = "jobserver"
|
668
701
|
version = "0.1.25"
|
@@ -683,13 +716,23 @@ dependencies = [
|
|
683
716
|
|
684
717
|
[[package]]
|
685
718
|
name = "json-deserializer"
|
686
|
-
version = "0.4.
|
719
|
+
version = "0.4.3"
|
687
720
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
688
|
-
checksum = "
|
721
|
+
checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
|
689
722
|
dependencies = [
|
690
723
|
"indexmap",
|
691
724
|
]
|
692
725
|
|
726
|
+
[[package]]
|
727
|
+
name = "jsonpath_lib"
|
728
|
+
version = "0.3.0"
|
729
|
+
source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
|
730
|
+
dependencies = [
|
731
|
+
"log",
|
732
|
+
"serde",
|
733
|
+
"serde_json",
|
734
|
+
]
|
735
|
+
|
693
736
|
[[package]]
|
694
737
|
name = "lazy_static"
|
695
738
|
version = "1.4.0"
|
@@ -797,6 +840,16 @@ version = "0.2.6"
|
|
797
840
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
798
841
|
checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
|
799
842
|
|
843
|
+
[[package]]
|
844
|
+
name = "libmimalloc-sys"
|
845
|
+
version = "0.1.28"
|
846
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
847
|
+
checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
|
848
|
+
dependencies = [
|
849
|
+
"cc",
|
850
|
+
"libc",
|
851
|
+
]
|
852
|
+
|
800
853
|
[[package]]
|
801
854
|
name = "lock_api"
|
802
855
|
version = "0.4.9"
|
@@ -882,6 +935,15 @@ dependencies = [
|
|
882
935
|
"autocfg",
|
883
936
|
]
|
884
937
|
|
938
|
+
[[package]]
|
939
|
+
name = "mimalloc"
|
940
|
+
version = "0.1.32"
|
941
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
942
|
+
checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
|
943
|
+
dependencies = [
|
944
|
+
"libmimalloc-sys",
|
945
|
+
]
|
946
|
+
|
885
947
|
[[package]]
|
886
948
|
name = "minimal-lexical"
|
887
949
|
version = "0.2.1"
|
@@ -1160,10 +1222,13 @@ dependencies = [
|
|
1160
1222
|
|
1161
1223
|
[[package]]
|
1162
1224
|
name = "polars"
|
1163
|
-
version = "0.1.
|
1225
|
+
version = "0.1.4"
|
1164
1226
|
dependencies = [
|
1227
|
+
"jemallocator",
|
1165
1228
|
"magnus",
|
1229
|
+
"mimalloc",
|
1166
1230
|
"polars 0.25.1",
|
1231
|
+
"polars-core",
|
1167
1232
|
"serde_json",
|
1168
1233
|
]
|
1169
1234
|
|
@@ -1202,11 +1267,13 @@ dependencies = [
|
|
1202
1267
|
"ahash 0.8.2",
|
1203
1268
|
"anyhow",
|
1204
1269
|
"arrow2",
|
1270
|
+
"base64",
|
1205
1271
|
"bitflags",
|
1206
1272
|
"chrono",
|
1207
1273
|
"chrono-tz",
|
1208
1274
|
"comfy-table",
|
1209
1275
|
"hashbrown 0.12.3",
|
1276
|
+
"hex",
|
1210
1277
|
"indexmap",
|
1211
1278
|
"num",
|
1212
1279
|
"once_cell",
|
@@ -1217,6 +1284,7 @@ dependencies = [
|
|
1217
1284
|
"rayon",
|
1218
1285
|
"regex",
|
1219
1286
|
"serde",
|
1287
|
+
"serde_json",
|
1220
1288
|
"smartstring",
|
1221
1289
|
"thiserror",
|
1222
1290
|
]
|
@@ -1276,9 +1344,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1276
1344
|
checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
|
1277
1345
|
dependencies = [
|
1278
1346
|
"arrow2",
|
1347
|
+
"jsonpath_lib",
|
1279
1348
|
"polars-arrow",
|
1280
1349
|
"polars-core",
|
1281
1350
|
"polars-utils",
|
1351
|
+
"serde_json",
|
1282
1352
|
]
|
1283
1353
|
|
1284
1354
|
[[package]]
|
@@ -1547,6 +1617,7 @@ version = "1.0.88"
|
|
1547
1617
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
1548
1618
|
checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
|
1549
1619
|
dependencies = [
|
1620
|
+
"indexmap",
|
1550
1621
|
"itoa",
|
1551
1622
|
"ryu",
|
1552
1623
|
"serde",
|
data/Cargo.toml
CHANGED
data/README.md
CHANGED
@@ -27,7 +27,7 @@ Polars.read_csv("iris.csv")
|
|
27
27
|
.collect
|
28
28
|
```
|
29
29
|
|
30
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
30
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
|
31
31
|
|
32
32
|
## Examples
|
33
33
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.1.
|
3
|
+
version = "0.1.4"
|
4
4
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
5
5
|
edition = "2021"
|
6
6
|
publish = false
|
@@ -10,6 +10,7 @@ crate-type = ["cdylib"]
|
|
10
10
|
|
11
11
|
[dependencies]
|
12
12
|
magnus = "0.4"
|
13
|
+
polars-core = "0.25.1"
|
13
14
|
serde_json = "1"
|
14
15
|
|
15
16
|
[dependencies.polars]
|
@@ -17,10 +18,13 @@ version = "0.25.1"
|
|
17
18
|
features = [
|
18
19
|
"abs",
|
19
20
|
"arange",
|
21
|
+
"arg_where",
|
20
22
|
"concat_str",
|
23
|
+
"cse",
|
21
24
|
"csv-file",
|
22
25
|
"cum_agg",
|
23
26
|
"cumulative_eval",
|
27
|
+
"dataframe_arithmetic",
|
24
28
|
"date_offset",
|
25
29
|
"diagonal_concat",
|
26
30
|
"diff",
|
@@ -28,6 +32,7 @@ features = [
|
|
28
32
|
"dtype-full",
|
29
33
|
"dynamic_groupby",
|
30
34
|
"ewma",
|
35
|
+
"extract_jsonpath",
|
31
36
|
"fmt",
|
32
37
|
"horizontal_concat",
|
33
38
|
"interpolate",
|
@@ -38,16 +43,21 @@ features = [
|
|
38
43
|
"lazy",
|
39
44
|
"lazy_regex",
|
40
45
|
"list_eval",
|
46
|
+
"list_to_struct",
|
41
47
|
"log",
|
42
48
|
"meta",
|
43
49
|
"mode",
|
44
50
|
"moment",
|
51
|
+
"object",
|
45
52
|
"parquet",
|
46
53
|
"partition_by",
|
47
54
|
"pct_change",
|
55
|
+
"performant",
|
48
56
|
"product",
|
57
|
+
"propagate_nans",
|
49
58
|
"random",
|
50
59
|
"rank",
|
60
|
+
"reinterpret",
|
51
61
|
"repeat_by",
|
52
62
|
"rolling_window",
|
53
63
|
"round_series",
|
@@ -55,6 +65,7 @@ features = [
|
|
55
65
|
"semi_anti_join",
|
56
66
|
"serde-lazy",
|
57
67
|
"sign",
|
68
|
+
"string_encoding",
|
58
69
|
"string_justify",
|
59
70
|
"strings",
|
60
71
|
"timezones",
|
@@ -63,3 +74,9 @@ features = [
|
|
63
74
|
"trigonometry",
|
64
75
|
"unique_counts",
|
65
76
|
]
|
77
|
+
|
78
|
+
[target.'cfg(target_os = "linux")'.dependencies]
|
79
|
+
jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
|
80
|
+
|
81
|
+
[target.'cfg(not(target_os = "linux"))'.dependencies]
|
82
|
+
mimalloc = { version = "0.1", default-features = false }
|
@@ -1,9 +1,12 @@
|
|
1
|
-
use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
|
1
|
+
use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
|
2
|
+
use polars::chunked_array::object::PolarsObjectSafe;
|
2
3
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
4
|
use polars::datatypes::AnyValue;
|
4
5
|
use polars::frame::DataFrame;
|
5
6
|
use polars::prelude::*;
|
6
7
|
use polars::series::ops::NullBehavior;
|
8
|
+
use std::fmt::{Display, Formatter};
|
9
|
+
use std::hash::{Hash, Hasher};
|
7
10
|
|
8
11
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
9
12
|
|
@@ -79,6 +82,32 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
79
82
|
AnyValue::Null => *QNIL,
|
80
83
|
AnyValue::Boolean(v) => Value::from(v),
|
81
84
|
AnyValue::Utf8(v) => Value::from(v),
|
85
|
+
AnyValue::Date(v) => class::time()
|
86
|
+
.funcall::<_, _, Value>("at", (v * 86400,))
|
87
|
+
.unwrap()
|
88
|
+
.funcall::<_, _, Value>("utc", ())
|
89
|
+
.unwrap()
|
90
|
+
.funcall::<_, _, Value>("to_date", ())
|
91
|
+
.unwrap(),
|
92
|
+
AnyValue::Datetime(v, tu, tz) => {
|
93
|
+
let t = match tu {
|
94
|
+
TimeUnit::Nanoseconds => todo!(),
|
95
|
+
TimeUnit::Microseconds => {
|
96
|
+
let sec = v / 1000000;
|
97
|
+
let subsec = v % 1000000;
|
98
|
+
class::time()
|
99
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
100
|
+
.unwrap()
|
101
|
+
}
|
102
|
+
TimeUnit::Milliseconds => todo!(),
|
103
|
+
};
|
104
|
+
|
105
|
+
if tz.is_some() {
|
106
|
+
todo!();
|
107
|
+
} else {
|
108
|
+
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
109
|
+
}
|
110
|
+
}
|
82
111
|
_ => todo!(),
|
83
112
|
}
|
84
113
|
}
|
@@ -102,10 +131,19 @@ impl TryConvert for Wrap<DataType> {
|
|
102
131
|
"i32" => DataType::Int32,
|
103
132
|
"i64" => DataType::Int64,
|
104
133
|
"str" => DataType::Utf8,
|
134
|
+
"bin" => DataType::Binary,
|
105
135
|
"bool" => DataType::Boolean,
|
136
|
+
"cat" => DataType::Categorical(None),
|
137
|
+
"date" => DataType::Date,
|
138
|
+
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
106
139
|
"f32" => DataType::Float32,
|
140
|
+
"time" => DataType::Time,
|
141
|
+
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
107
142
|
"f64" => DataType::Float64,
|
108
|
-
"
|
143
|
+
// "obj" => DataType::Object(OBJECT_NAME),
|
144
|
+
"list" => DataType::List(Box::new(DataType::Boolean)),
|
145
|
+
"null" => DataType::Null,
|
146
|
+
"unk" => DataType::Unknown,
|
109
147
|
_ => {
|
110
148
|
return Err(RbValueError::new_err(format!(
|
111
149
|
"{} is not a supported DataType.",
|
@@ -221,6 +259,22 @@ impl TryConvert for Wrap<JoinType> {
|
|
221
259
|
}
|
222
260
|
}
|
223
261
|
|
262
|
+
impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
263
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
264
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
265
|
+
"first_non_null" => ListToStructWidthStrategy::FirstNonNull,
|
266
|
+
"max_width" => ListToStructWidthStrategy::MaxWidth,
|
267
|
+
v => {
|
268
|
+
return Err(RbValueError::new_err(format!(
|
269
|
+
"n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
|
270
|
+
v
|
271
|
+
)))
|
272
|
+
}
|
273
|
+
};
|
274
|
+
Ok(Wrap(parsed))
|
275
|
+
}
|
276
|
+
}
|
277
|
+
|
224
278
|
impl TryConvert for Wrap<NullBehavior> {
|
225
279
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
226
280
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -407,3 +461,62 @@ pub fn parse_parquet_compression(
|
|
407
461
|
};
|
408
462
|
Ok(parsed)
|
409
463
|
}
|
464
|
+
|
465
|
+
#[derive(Clone, Debug)]
|
466
|
+
pub struct ObjectValue {
|
467
|
+
pub inner: Value,
|
468
|
+
}
|
469
|
+
|
470
|
+
impl Hash for ObjectValue {
|
471
|
+
fn hash<H: Hasher>(&self, state: &mut H) {
|
472
|
+
let h = self
|
473
|
+
.inner
|
474
|
+
.funcall::<_, _, isize>("hash", ())
|
475
|
+
.expect("should be hashable");
|
476
|
+
state.write_isize(h)
|
477
|
+
}
|
478
|
+
}
|
479
|
+
|
480
|
+
impl Eq for ObjectValue {}
|
481
|
+
|
482
|
+
impl PartialEq for ObjectValue {
|
483
|
+
fn eq(&self, other: &Self) -> bool {
|
484
|
+
self.inner.eql(&other.inner).unwrap_or(false)
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
impl Display for ObjectValue {
|
489
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
490
|
+
write!(f, "{}", self.inner)
|
491
|
+
}
|
492
|
+
}
|
493
|
+
|
494
|
+
impl PolarsObject for ObjectValue {
|
495
|
+
fn type_name() -> &'static str {
|
496
|
+
"object"
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
impl From<Value> for ObjectValue {
|
501
|
+
fn from(v: Value) -> Self {
|
502
|
+
Self { inner: v }
|
503
|
+
}
|
504
|
+
}
|
505
|
+
|
506
|
+
impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
507
|
+
fn from(val: &dyn PolarsObjectSafe) -> Self {
|
508
|
+
unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
|
509
|
+
}
|
510
|
+
}
|
511
|
+
|
512
|
+
impl ObjectValue {
|
513
|
+
pub fn to_object(&self) -> Value {
|
514
|
+
self.inner
|
515
|
+
}
|
516
|
+
}
|
517
|
+
|
518
|
+
impl Default for ObjectValue {
|
519
|
+
fn default() -> Self {
|
520
|
+
ObjectValue { inner: *QNIL }
|
521
|
+
}
|
522
|
+
}
|