polars-df 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +3 -0
  3. data/CHANGELOG.md +9 -0
  4. data/Cargo.lock +74 -3
  5. data/Cargo.toml +3 -0
  6. data/README.md +1 -1
  7. data/ext/polars/Cargo.toml +18 -1
  8. data/ext/polars/src/conversion.rs +115 -2
  9. data/ext/polars/src/dataframe.rs +228 -11
  10. data/ext/polars/src/error.rs +4 -0
  11. data/ext/polars/src/lazy/dataframe.rs +5 -5
  12. data/ext/polars/src/lazy/dsl.rs +157 -2
  13. data/ext/polars/src/lib.rs +185 -10
  14. data/ext/polars/src/list_construction.rs +100 -0
  15. data/ext/polars/src/series.rs +217 -29
  16. data/ext/polars/src/set.rs +91 -0
  17. data/ext/polars/src/utils.rs +19 -0
  18. data/lib/polars/batched_csv_reader.rb +1 -0
  19. data/lib/polars/cat_expr.rb +39 -0
  20. data/lib/polars/cat_name_space.rb +54 -0
  21. data/lib/polars/data_frame.rb +2384 -140
  22. data/lib/polars/date_time_expr.rb +1282 -7
  23. data/lib/polars/date_time_name_space.rb +1484 -0
  24. data/lib/polars/exceptions.rb +20 -0
  25. data/lib/polars/expr.rb +4374 -53
  26. data/lib/polars/expr_dispatch.rb +22 -0
  27. data/lib/polars/functions.rb +219 -0
  28. data/lib/polars/group_by.rb +518 -0
  29. data/lib/polars/io.rb +421 -2
  30. data/lib/polars/lazy_frame.rb +1267 -69
  31. data/lib/polars/lazy_functions.rb +412 -24
  32. data/lib/polars/lazy_group_by.rb +80 -0
  33. data/lib/polars/list_expr.rb +507 -5
  34. data/lib/polars/list_name_space.rb +346 -0
  35. data/lib/polars/meta_expr.rb +21 -0
  36. data/lib/polars/series.rb +2256 -242
  37. data/lib/polars/slice.rb +104 -0
  38. data/lib/polars/string_expr.rb +847 -10
  39. data/lib/polars/string_name_space.rb +690 -0
  40. data/lib/polars/struct_expr.rb +73 -0
  41. data/lib/polars/struct_name_space.rb +64 -0
  42. data/lib/polars/utils.rb +71 -3
  43. data/lib/polars/version.rb +2 -1
  44. data/lib/polars/when.rb +1 -0
  45. data/lib/polars/when_then.rb +1 -0
  46. data/lib/polars.rb +12 -10
  47. metadata +15 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 510c7a761553fb3a49919add842d520da1518c5df6dd37f93af338e928c7a207
4
- data.tar.gz: 3e724d3c2553bb6b4a587f056a04977990b85103ea9ae231166746407e1c0b1e
3
+ metadata.gz: 6abc9619a425d8aaa0255864b063c41835349063aa4919df133ac5a4ceb972f2
4
+ data.tar.gz: 78372a2a9eeddb3a8080b1d615991415b9ef7752752319e250f143841bfa67f3
5
5
  SHA512:
6
- metadata.gz: 59391ecd7a3d14372ad24693240ebeae4d83c59d55b37876fc6f216dd576897113607f7b5f6f6a3d7a5d75f96def72b1f2659f3de71e6a975a2753719879fa33
7
- data.tar.gz: 5a2e8cdf3aed01f16823be62067b0ec01e4e0dc3bfa4dc27929dfbe42d7c92e62f6e7a09bc78533d220a3bfc9890f9f0fb069482a338fbd0b55603aa42fddc57
6
+ metadata.gz: e6fb27a50908c07e5f2f72c81171f07bfdf0999b5148421bdeb1ad7dc69cee1f0bae02021fa18fdad6d1740ea9273464daec513db5e3c7906d5839e77b7d6a66
7
+ data.tar.gz: 2eb9df841575711a057dd1ca2986403667306ead52cf540491899ffaa184d4878c1bdfc1015e3f5831c12c668de0d4126cbab7c63d1770684e10012f3d28183f
data/.yardopts ADDED
@@ -0,0 +1,3 @@
1
+ --no-private
2
+ --markup markdown
3
+ --embed-mixins
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.1.4 (2022-12-02)
2
+
3
+ - Added more methods
4
+ - Improved performance
5
+
6
+ ## 0.1.3 (2022-11-27)
7
+
8
+ - Added more methods
9
+
1
10
  ## 0.1.2 (2022-11-25)
2
11
 
3
12
  - Added more methods
data/Cargo.lock CHANGED
@@ -483,6 +483,12 @@ version = "0.1.0"
483
483
  source = "registry+https://github.com/rust-lang/crates.io-index"
484
484
  checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673"
485
485
 
486
+ [[package]]
487
+ name = "fs_extra"
488
+ version = "1.2.0"
489
+ source = "registry+https://github.com/rust-lang/crates.io-index"
490
+ checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
491
+
486
492
  [[package]]
487
493
  name = "futures"
488
494
  version = "0.3.25"
@@ -646,6 +652,12 @@ dependencies = [
646
652
  "libc",
647
653
  ]
648
654
 
655
+ [[package]]
656
+ name = "hex"
657
+ version = "0.4.3"
658
+ source = "registry+https://github.com/rust-lang/crates.io-index"
659
+ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
660
+
649
661
  [[package]]
650
662
  name = "indexmap"
651
663
  version = "1.8.0"
@@ -663,6 +675,27 @@ version = "1.0.4"
663
675
  source = "registry+https://github.com/rust-lang/crates.io-index"
664
676
  checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
665
677
 
678
+ [[package]]
679
+ name = "jemalloc-sys"
680
+ version = "0.5.2+5.3.0-patched"
681
+ source = "registry+https://github.com/rust-lang/crates.io-index"
682
+ checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
683
+ dependencies = [
684
+ "cc",
685
+ "fs_extra",
686
+ "libc",
687
+ ]
688
+
689
+ [[package]]
690
+ name = "jemallocator"
691
+ version = "0.5.0"
692
+ source = "registry+https://github.com/rust-lang/crates.io-index"
693
+ checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
694
+ dependencies = [
695
+ "jemalloc-sys",
696
+ "libc",
697
+ ]
698
+
666
699
  [[package]]
667
700
  name = "jobserver"
668
701
  version = "0.1.25"
@@ -683,13 +716,23 @@ dependencies = [
683
716
 
684
717
  [[package]]
685
718
  name = "json-deserializer"
686
- version = "0.4.2"
719
+ version = "0.4.3"
687
720
  source = "registry+https://github.com/rust-lang/crates.io-index"
688
- checksum = "d784d2d481d0bace3450572391d6076dd6d10c66c0ebc1a0be037b3b420664bd"
721
+ checksum = "daba674f7eecf80fe8bbbf196340908ad1a22510fe71fd6111bb50f441b26440"
689
722
  dependencies = [
690
723
  "indexmap",
691
724
  ]
692
725
 
726
+ [[package]]
727
+ name = "jsonpath_lib"
728
+ version = "0.3.0"
729
+ source = "git+https://github.com/ritchie46/jsonpath?rev=24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b#24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b"
730
+ dependencies = [
731
+ "log",
732
+ "serde",
733
+ "serde_json",
734
+ ]
735
+
693
736
  [[package]]
694
737
  name = "lazy_static"
695
738
  version = "1.4.0"
@@ -797,6 +840,16 @@ version = "0.2.6"
797
840
  source = "registry+https://github.com/rust-lang/crates.io-index"
798
841
  checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb"
799
842
 
843
+ [[package]]
844
+ name = "libmimalloc-sys"
845
+ version = "0.1.28"
846
+ source = "registry+https://github.com/rust-lang/crates.io-index"
847
+ checksum = "04d1c67deb83e6b75fa4fe3309e09cfeade12e7721d95322af500d3814ea60c9"
848
+ dependencies = [
849
+ "cc",
850
+ "libc",
851
+ ]
852
+
800
853
  [[package]]
801
854
  name = "lock_api"
802
855
  version = "0.4.9"
@@ -882,6 +935,15 @@ dependencies = [
882
935
  "autocfg",
883
936
  ]
884
937
 
938
+ [[package]]
939
+ name = "mimalloc"
940
+ version = "0.1.32"
941
+ source = "registry+https://github.com/rust-lang/crates.io-index"
942
+ checksum = "9b2374e2999959a7b583e1811a1ddbf1d3a4b9496eceb9746f1192a59d871eca"
943
+ dependencies = [
944
+ "libmimalloc-sys",
945
+ ]
946
+
885
947
  [[package]]
886
948
  name = "minimal-lexical"
887
949
  version = "0.2.1"
@@ -1160,10 +1222,13 @@ dependencies = [
1160
1222
 
1161
1223
  [[package]]
1162
1224
  name = "polars"
1163
- version = "0.1.2"
1225
+ version = "0.1.4"
1164
1226
  dependencies = [
1227
+ "jemallocator",
1165
1228
  "magnus",
1229
+ "mimalloc",
1166
1230
  "polars 0.25.1",
1231
+ "polars-core",
1167
1232
  "serde_json",
1168
1233
  ]
1169
1234
 
@@ -1202,11 +1267,13 @@ dependencies = [
1202
1267
  "ahash 0.8.2",
1203
1268
  "anyhow",
1204
1269
  "arrow2",
1270
+ "base64",
1205
1271
  "bitflags",
1206
1272
  "chrono",
1207
1273
  "chrono-tz",
1208
1274
  "comfy-table",
1209
1275
  "hashbrown 0.12.3",
1276
+ "hex",
1210
1277
  "indexmap",
1211
1278
  "num",
1212
1279
  "once_cell",
@@ -1217,6 +1284,7 @@ dependencies = [
1217
1284
  "rayon",
1218
1285
  "regex",
1219
1286
  "serde",
1287
+ "serde_json",
1220
1288
  "smartstring",
1221
1289
  "thiserror",
1222
1290
  ]
@@ -1276,9 +1344,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
1276
1344
  checksum = "bfd3f6552b3e9539634c35047f372db331b6227f75c36fcbe4670ab58bbcbeb3"
1277
1345
  dependencies = [
1278
1346
  "arrow2",
1347
+ "jsonpath_lib",
1279
1348
  "polars-arrow",
1280
1349
  "polars-core",
1281
1350
  "polars-utils",
1351
+ "serde_json",
1282
1352
  ]
1283
1353
 
1284
1354
  [[package]]
@@ -1547,6 +1617,7 @@ version = "1.0.88"
1547
1617
  source = "registry+https://github.com/rust-lang/crates.io-index"
1548
1618
  checksum = "8e8b3801309262e8184d9687fb697586833e939767aea0dda89f5a8e650e8bd7"
1549
1619
  dependencies = [
1620
+ "indexmap",
1550
1621
  "itoa",
1551
1622
  "ryu",
1552
1623
  "serde",
data/Cargo.toml CHANGED
@@ -1,5 +1,8 @@
1
1
  [workspace]
2
2
  members = ["ext/polars"]
3
3
 
4
+ [patch.crates-io]
5
+ jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
+
4
7
  [profile.release]
5
8
  strip = true
data/README.md CHANGED
@@ -27,7 +27,7 @@ Polars.read_csv("iris.csv")
27
27
  .collect
28
28
  ```
29
29
 
30
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Note that many methods and options are missing at the moment.
30
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems. Some methods are missing at the moment.
31
31
 
32
32
  ## Examples
33
33
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.1.2"
3
+ version = "0.1.4"
4
4
  authors = ["Andrew Kane <andrew@ankane.org>"]
5
5
  edition = "2021"
6
6
  publish = false
@@ -10,6 +10,7 @@ crate-type = ["cdylib"]
10
10
 
11
11
  [dependencies]
12
12
  magnus = "0.4"
13
+ polars-core = "0.25.1"
13
14
  serde_json = "1"
14
15
 
15
16
  [dependencies.polars]
@@ -17,10 +18,13 @@ version = "0.25.1"
17
18
  features = [
18
19
  "abs",
19
20
  "arange",
21
+ "arg_where",
20
22
  "concat_str",
23
+ "cse",
21
24
  "csv-file",
22
25
  "cum_agg",
23
26
  "cumulative_eval",
27
+ "dataframe_arithmetic",
24
28
  "date_offset",
25
29
  "diagonal_concat",
26
30
  "diff",
@@ -28,6 +32,7 @@ features = [
28
32
  "dtype-full",
29
33
  "dynamic_groupby",
30
34
  "ewma",
35
+ "extract_jsonpath",
31
36
  "fmt",
32
37
  "horizontal_concat",
33
38
  "interpolate",
@@ -38,16 +43,21 @@ features = [
38
43
  "lazy",
39
44
  "lazy_regex",
40
45
  "list_eval",
46
+ "list_to_struct",
41
47
  "log",
42
48
  "meta",
43
49
  "mode",
44
50
  "moment",
51
+ "object",
45
52
  "parquet",
46
53
  "partition_by",
47
54
  "pct_change",
55
+ "performant",
48
56
  "product",
57
+ "propagate_nans",
49
58
  "random",
50
59
  "rank",
60
+ "reinterpret",
51
61
  "repeat_by",
52
62
  "rolling_window",
53
63
  "round_series",
@@ -55,6 +65,7 @@ features = [
55
65
  "semi_anti_join",
56
66
  "serde-lazy",
57
67
  "sign",
68
+ "string_encoding",
58
69
  "string_justify",
59
70
  "strings",
60
71
  "timezones",
@@ -63,3 +74,9 @@ features = [
63
74
  "trigonometry",
64
75
  "unique_counts",
65
76
  ]
77
+
78
+ [target.'cfg(target_os = "linux")'.dependencies]
79
+ jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] }
80
+
81
+ [target.'cfg(not(target_os = "linux"))'.dependencies]
82
+ mimalloc = { version = "0.1", default-features = false }
@@ -1,9 +1,12 @@
1
- use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
1
+ use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL};
2
+ use polars::chunked_array::object::PolarsObjectSafe;
2
3
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
3
4
  use polars::datatypes::AnyValue;
4
5
  use polars::frame::DataFrame;
5
6
  use polars::prelude::*;
6
7
  use polars::series::ops::NullBehavior;
8
+ use std::fmt::{Display, Formatter};
9
+ use std::hash::{Hash, Hasher};
7
10
 
8
11
  use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
9
12
 
@@ -79,6 +82,32 @@ impl From<Wrap<AnyValue<'_>>> for Value {
79
82
  AnyValue::Null => *QNIL,
80
83
  AnyValue::Boolean(v) => Value::from(v),
81
84
  AnyValue::Utf8(v) => Value::from(v),
85
+ AnyValue::Date(v) => class::time()
86
+ .funcall::<_, _, Value>("at", (v * 86400,))
87
+ .unwrap()
88
+ .funcall::<_, _, Value>("utc", ())
89
+ .unwrap()
90
+ .funcall::<_, _, Value>("to_date", ())
91
+ .unwrap(),
92
+ AnyValue::Datetime(v, tu, tz) => {
93
+ let t = match tu {
94
+ TimeUnit::Nanoseconds => todo!(),
95
+ TimeUnit::Microseconds => {
96
+ let sec = v / 1000000;
97
+ let subsec = v % 1000000;
98
+ class::time()
99
+ .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
100
+ .unwrap()
101
+ }
102
+ TimeUnit::Milliseconds => todo!(),
103
+ };
104
+
105
+ if tz.is_some() {
106
+ todo!();
107
+ } else {
108
+ t.funcall::<_, _, Value>("utc", ()).unwrap()
109
+ }
110
+ }
82
111
  _ => todo!(),
83
112
  }
84
113
  }
@@ -102,10 +131,19 @@ impl TryConvert for Wrap<DataType> {
102
131
  "i32" => DataType::Int32,
103
132
  "i64" => DataType::Int64,
104
133
  "str" => DataType::Utf8,
134
+ "bin" => DataType::Binary,
105
135
  "bool" => DataType::Boolean,
136
+ "cat" => DataType::Categorical(None),
137
+ "date" => DataType::Date,
138
+ "datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
106
139
  "f32" => DataType::Float32,
140
+ "time" => DataType::Time,
141
+ "dur" => DataType::Duration(TimeUnit::Microseconds),
107
142
  "f64" => DataType::Float64,
108
- "date" => DataType::Date,
143
+ // "obj" => DataType::Object(OBJECT_NAME),
144
+ "list" => DataType::List(Box::new(DataType::Boolean)),
145
+ "null" => DataType::Null,
146
+ "unk" => DataType::Unknown,
109
147
  _ => {
110
148
  return Err(RbValueError::new_err(format!(
111
149
  "{} is not a supported DataType.",
@@ -221,6 +259,22 @@ impl TryConvert for Wrap<JoinType> {
221
259
  }
222
260
  }
223
261
 
262
+ impl TryConvert for Wrap<ListToStructWidthStrategy> {
263
+ fn try_convert(ob: Value) -> RbResult<Self> {
264
+ let parsed = match ob.try_convert::<String>()?.as_str() {
265
+ "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
266
+ "max_width" => ListToStructWidthStrategy::MaxWidth,
267
+ v => {
268
+ return Err(RbValueError::new_err(format!(
269
+ "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}",
270
+ v
271
+ )))
272
+ }
273
+ };
274
+ Ok(Wrap(parsed))
275
+ }
276
+ }
277
+
224
278
  impl TryConvert for Wrap<NullBehavior> {
225
279
  fn try_convert(ob: Value) -> RbResult<Self> {
226
280
  let parsed = match ob.try_convert::<String>()?.as_str() {
@@ -407,3 +461,62 @@ pub fn parse_parquet_compression(
407
461
  };
408
462
  Ok(parsed)
409
463
  }
464
+
465
+ #[derive(Clone, Debug)]
466
+ pub struct ObjectValue {
467
+ pub inner: Value,
468
+ }
469
+
470
+ impl Hash for ObjectValue {
471
+ fn hash<H: Hasher>(&self, state: &mut H) {
472
+ let h = self
473
+ .inner
474
+ .funcall::<_, _, isize>("hash", ())
475
+ .expect("should be hashable");
476
+ state.write_isize(h)
477
+ }
478
+ }
479
+
480
+ impl Eq for ObjectValue {}
481
+
482
+ impl PartialEq for ObjectValue {
483
+ fn eq(&self, other: &Self) -> bool {
484
+ self.inner.eql(&other.inner).unwrap_or(false)
485
+ }
486
+ }
487
+
488
+ impl Display for ObjectValue {
489
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
490
+ write!(f, "{}", self.inner)
491
+ }
492
+ }
493
+
494
+ impl PolarsObject for ObjectValue {
495
+ fn type_name() -> &'static str {
496
+ "object"
497
+ }
498
+ }
499
+
500
+ impl From<Value> for ObjectValue {
501
+ fn from(v: Value) -> Self {
502
+ Self { inner: v }
503
+ }
504
+ }
505
+
506
+ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
507
+ fn from(val: &dyn PolarsObjectSafe) -> Self {
508
+ unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) }
509
+ }
510
+ }
511
+
512
+ impl ObjectValue {
513
+ pub fn to_object(&self) -> Value {
514
+ self.inner
515
+ }
516
+ }
517
+
518
+ impl Default for ObjectValue {
519
+ fn default() -> Self {
520
+ ObjectValue { inner: *QNIL }
521
+ }
522
+ }