polars-df 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/Cargo.lock +1 -1
  4. data/ext/polars/Cargo.toml +7 -1
  5. data/ext/polars/src/conversion/mod.rs +92 -4
  6. data/ext/polars/src/exceptions.rs +1 -0
  7. data/ext/polars/src/expr/array.rs +73 -4
  8. data/ext/polars/src/expr/binary.rs +26 -1
  9. data/ext/polars/src/expr/bitwise.rs +39 -0
  10. data/ext/polars/src/expr/categorical.rs +20 -0
  11. data/ext/polars/src/expr/datatype.rs +24 -1
  12. data/ext/polars/src/expr/datetime.rs +58 -0
  13. data/ext/polars/src/expr/general.rs +84 -5
  14. data/ext/polars/src/expr/list.rs +24 -0
  15. data/ext/polars/src/expr/meta.rs +11 -0
  16. data/ext/polars/src/expr/mod.rs +1 -0
  17. data/ext/polars/src/expr/name.rs +8 -0
  18. data/ext/polars/src/expr/rolling.rs +20 -0
  19. data/ext/polars/src/expr/string.rs +59 -0
  20. data/ext/polars/src/expr/struct.rs +9 -1
  21. data/ext/polars/src/functions/io.rs +19 -0
  22. data/ext/polars/src/functions/lazy.rs +4 -0
  23. data/ext/polars/src/lazyframe/general.rs +51 -0
  24. data/ext/polars/src/lib.rs +119 -10
  25. data/ext/polars/src/map/dataframe.rs +2 -2
  26. data/ext/polars/src/map/series.rs +1 -1
  27. data/ext/polars/src/series/aggregation.rs +44 -0
  28. data/ext/polars/src/series/general.rs +64 -4
  29. data/lib/polars/array_expr.rb +382 -3
  30. data/lib/polars/array_name_space.rb +281 -0
  31. data/lib/polars/binary_expr.rb +67 -0
  32. data/lib/polars/binary_name_space.rb +43 -0
  33. data/lib/polars/cat_expr.rb +224 -0
  34. data/lib/polars/cat_name_space.rb +138 -0
  35. data/lib/polars/config.rb +2 -2
  36. data/lib/polars/convert.rb +6 -6
  37. data/lib/polars/data_frame.rb +684 -19
  38. data/lib/polars/data_type_expr.rb +52 -0
  39. data/lib/polars/data_types.rb +14 -2
  40. data/lib/polars/date_time_expr.rb +251 -0
  41. data/lib/polars/date_time_name_space.rb +299 -0
  42. data/lib/polars/expr.rb +1213 -180
  43. data/lib/polars/functions/datatype.rb +21 -0
  44. data/lib/polars/functions/lazy.rb +13 -0
  45. data/lib/polars/io/csv.rb +1 -1
  46. data/lib/polars/io/json.rb +4 -4
  47. data/lib/polars/io/ndjson.rb +4 -4
  48. data/lib/polars/io/parquet.rb +27 -5
  49. data/lib/polars/lazy_frame.rb +936 -20
  50. data/lib/polars/list_expr.rb +196 -4
  51. data/lib/polars/list_name_space.rb +201 -4
  52. data/lib/polars/meta_expr.rb +64 -0
  53. data/lib/polars/name_expr.rb +36 -0
  54. data/lib/polars/schema.rb +79 -3
  55. data/lib/polars/selector.rb +72 -0
  56. data/lib/polars/selectors.rb +3 -3
  57. data/lib/polars/series.rb +1051 -54
  58. data/lib/polars/string_expr.rb +411 -6
  59. data/lib/polars/string_name_space.rb +722 -49
  60. data/lib/polars/struct_expr.rb +103 -0
  61. data/lib/polars/struct_name_space.rb +19 -1
  62. data/lib/polars/utils/various.rb +18 -1
  63. data/lib/polars/utils.rb +5 -1
  64. data/lib/polars/version.rb +1 -1
  65. data/lib/polars.rb +2 -0
  66. metadata +4 -1
@@ -323,6 +323,10 @@ impl RbExpr {
323
323
  self.inner.clone().arg_min().into()
324
324
  }
325
325
 
326
+ pub fn index_of(&self, element: &Self) -> Self {
327
+ self.inner.clone().index_of(element.inner.clone()).into()
328
+ }
329
+
326
330
  pub fn search_sorted(
327
331
  &self,
328
332
  element: &Self,
@@ -429,6 +433,13 @@ impl RbExpr {
429
433
  .into()
430
434
  }
431
435
 
436
+ pub fn is_close(&self, other: &Self, abs_tol: f64, rel_tol: f64, nans_equal: bool) -> Self {
437
+ self.inner
438
+ .clone()
439
+ .is_close(other.inner.clone(), abs_tol, rel_tol, nans_equal)
440
+ .into()
441
+ }
442
+
432
443
  pub fn approx_n_unique(&self) -> Self {
433
444
  self.inner.clone().approx_n_unique().into()
434
445
  }
@@ -482,6 +493,10 @@ impl RbExpr {
482
493
  self.inner.clone().round(decimals, mode.0).into()
483
494
  }
484
495
 
496
+ pub fn round_sig_figs(&self, digits: i32) -> Self {
497
+ self.clone().inner.round_sig_figs(digits).into()
498
+ }
499
+
485
500
  pub fn floor(&self) -> Self {
486
501
  self.inner.clone().floor().into()
487
502
  }
@@ -517,6 +532,10 @@ impl RbExpr {
517
532
  self.inner.clone().tan().into()
518
533
  }
519
534
 
535
+ pub fn cot(&self) -> Self {
536
+ self.inner.clone().cot().into()
537
+ }
538
+
520
539
  pub fn arcsin(&self) -> Self {
521
540
  self.inner.clone().arcsin().into()
522
541
  }
@@ -553,6 +572,14 @@ impl RbExpr {
553
572
  self.inner.clone().arctanh().into()
554
573
  }
555
574
 
575
+ pub fn degrees(&self) -> Self {
576
+ self.inner.clone().degrees().into()
577
+ }
578
+
579
+ pub fn radians(&self) -> Self {
580
+ self.inner.clone().radians().into()
581
+ }
582
+
556
583
  pub fn sign(&self) -> Self {
557
584
  self.inner.clone().sign().into()
558
585
  }
@@ -566,18 +593,35 @@ impl RbExpr {
566
593
  Ok(self.inner.clone().over(partition_by).into())
567
594
  }
568
595
 
569
- pub fn _and(&self, expr: &Self) -> Self {
570
- self.inner.clone().and(expr.inner.clone()).into()
596
+ pub fn rolling(
597
+ &self,
598
+ index_column: String,
599
+ period: String,
600
+ offset: String,
601
+ closed: Wrap<ClosedWindow>,
602
+ ) -> RbResult<Self> {
603
+ let options = RollingGroupOptions {
604
+ index_column: index_column.into(),
605
+ period: Duration::try_parse(&period).map_err(RbPolarsErr::from)?,
606
+ offset: Duration::try_parse(&offset).map_err(RbPolarsErr::from)?,
607
+ closed_window: closed.0,
608
+ };
609
+
610
+ Ok(self.inner.clone().rolling(options).into())
571
611
  }
572
612
 
573
- pub fn _xor(&self, expr: &Self) -> Self {
574
- self.inner.clone().xor(expr.inner.clone()).into()
613
+ pub fn and_(&self, expr: &Self) -> Self {
614
+ self.inner.clone().and(expr.inner.clone()).into()
575
615
  }
576
616
 
577
- pub fn _or(&self, expr: &Self) -> Self {
617
+ pub fn or_(&self, expr: &Self) -> Self {
578
618
  self.inner.clone().or(expr.inner.clone()).into()
579
619
  }
580
620
 
621
+ pub fn xor_(&self, expr: &Self) -> Self {
622
+ self.inner.clone().xor(expr.inner.clone()).into()
623
+ }
624
+
581
625
  pub fn is_in(&self, expr: &Self, nulls_equal: bool) -> Self {
582
626
  self.inner
583
627
  .clone()
@@ -593,6 +637,14 @@ impl RbExpr {
593
637
  self.inner.clone().pow(exponent.inner.clone()).into()
594
638
  }
595
639
 
640
+ pub fn sqrt(&self) -> Self {
641
+ self.inner.clone().sqrt().into()
642
+ }
643
+
644
+ pub fn cbrt(&self) -> Self {
645
+ self.inner.clone().cbrt().into()
646
+ }
647
+
596
648
  pub fn cum_sum(&self, reverse: bool) -> Self {
597
649
  self.inner.clone().cum_sum(reverse).into()
598
650
  }
@@ -756,6 +808,15 @@ impl RbExpr {
756
808
  self.inner.clone().ewm_mean(options).into()
757
809
  }
758
810
 
811
+ pub fn ewm_mean_by(&self, times: &RbExpr, half_life: String) -> RbResult<Self> {
812
+ let half_life = Duration::try_parse(&half_life).map_err(RbPolarsErr::from)?;
813
+ Ok(self
814
+ .inner
815
+ .clone()
816
+ .ewm_mean_by(times.inner.clone(), half_life)
817
+ .into())
818
+ }
819
+
759
820
  pub fn ewm_std(
760
821
  &self,
761
822
  alpha: f64,
@@ -811,6 +872,10 @@ impl RbExpr {
811
872
  self.inner.clone().log(base).into()
812
873
  }
813
874
 
875
+ pub fn log1p(&self) -> Self {
876
+ self.inner.clone().log1p().into()
877
+ }
878
+
814
879
  pub fn exp(&self) -> Self {
815
880
  self.inner.clone().exp().into()
816
881
  }
@@ -857,6 +922,20 @@ impl RbExpr {
857
922
  .into()
858
923
  }
859
924
 
925
+ pub fn hist(
926
+ &self,
927
+ bins: Option<&RbExpr>,
928
+ bin_count: Option<usize>,
929
+ include_category: bool,
930
+ include_breakpoint: bool,
931
+ ) -> Self {
932
+ let bins = bins.map(|e| e.inner.clone());
933
+ self.inner
934
+ .clone()
935
+ .hist(bins, bin_count, include_category, include_breakpoint)
936
+ .into()
937
+ }
938
+
860
939
  #[allow(clippy::wrong_self_convention)]
861
940
  pub fn into_selector(&self) -> RbResult<RbSelector> {
862
941
  Ok(self
@@ -83,6 +83,18 @@ impl RbExpr {
83
83
  self.inner.clone().list().mean().into()
84
84
  }
85
85
 
86
+ pub fn list_median(&self) -> Self {
87
+ self.inner.clone().list().median().into()
88
+ }
89
+
90
+ pub fn list_std(&self, ddof: u8) -> Self {
91
+ self.inner.clone().list().std(ddof).into()
92
+ }
93
+
94
+ pub fn list_var(&self, ddof: u8) -> Self {
95
+ self.inner.clone().list().var(ddof).into()
96
+ }
97
+
86
98
  pub fn list_min(&self) -> Self {
87
99
  self.inner.clone().list().min().into()
88
100
  }
@@ -171,6 +183,14 @@ impl RbExpr {
171
183
  .into()
172
184
  }
173
185
 
186
+ pub fn list_gather_every(&self, n: &RbExpr, offset: &RbExpr) -> Self {
187
+ self.inner
188
+ .clone()
189
+ .list()
190
+ .gather_every(n.inner.clone(), offset.inner.clone())
191
+ .into()
192
+ }
193
+
174
194
  pub fn list_to_array(&self, width: usize) -> Self {
175
195
  self.inner.clone().list().to_array(width).into()
176
196
  }
@@ -205,6 +225,10 @@ impl RbExpr {
205
225
  .into())
206
226
  }
207
227
 
228
+ pub fn list_n_unique(&self) -> Self {
229
+ self.inner.clone().list().n_unique().into()
230
+ }
231
+
208
232
  pub fn list_unique(&self, maintain_order: bool) -> Self {
209
233
  let e = self.inner.clone();
210
234
 
@@ -57,6 +57,17 @@ impl RbExpr {
57
57
  self.inner.clone().meta().is_regex_projection()
58
58
  }
59
59
 
60
+ pub fn meta_is_column_selection(&self, allow_aliasing: bool) -> bool {
61
+ self.inner
62
+ .clone()
63
+ .meta()
64
+ .is_column_selection(allow_aliasing)
65
+ }
66
+
67
+ pub fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
68
+ self.inner.clone().meta().is_literal(allow_aliasing)
69
+ }
70
+
60
71
  fn compute_tree_format(
61
72
  &self,
62
73
  display_as_dot: bool,
@@ -1,5 +1,6 @@
1
1
  mod array;
2
2
  mod binary;
3
+ mod bitwise;
3
4
  mod categorical;
4
5
  pub mod datatype;
5
6
  mod datetime;
@@ -42,4 +42,12 @@ impl RbExpr {
42
42
  pub fn name_to_uppercase(&self) -> Self {
43
43
  self.inner.clone().name().to_uppercase().into()
44
44
  }
45
+
46
+ pub fn name_prefix_fields(&self, prefix: String) -> Self {
47
+ self.inner.clone().name().prefix_fields(&prefix).into()
48
+ }
49
+
50
+ pub fn name_suffix_fields(&self, suffix: String) -> Self {
51
+ self.inner.clone().name().suffix_fields(&suffix).into()
52
+ }
45
53
  }
@@ -337,4 +337,24 @@ impl RbExpr {
337
337
 
338
338
  self.inner.clone().rolling_skew(options).into()
339
339
  }
340
+
341
+ pub fn rolling_kurtosis(
342
+ &self,
343
+ window_size: usize,
344
+ fisher: bool,
345
+ bias: bool,
346
+ min_periods: Option<usize>,
347
+ center: bool,
348
+ ) -> Self {
349
+ let min_periods = min_periods.unwrap_or(window_size);
350
+ let options = RollingOptionsFixedWindow {
351
+ window_size,
352
+ weights: None,
353
+ min_periods,
354
+ center,
355
+ fn_params: Some(RollingFnParams::Kurtosis { fisher, bias }),
356
+ };
357
+
358
+ self.inner.clone().rolling_kurtosis(options).into()
359
+ }
340
360
  }
@@ -122,6 +122,14 @@ impl RbExpr {
122
122
  .into()
123
123
  }
124
124
 
125
+ pub fn str_head(&self, n: &Self) -> Self {
126
+ self.inner.clone().str().head(n.inner.clone()).into()
127
+ }
128
+
129
+ pub fn str_tail(&self, n: &Self) -> Self {
130
+ self.inner.clone().str().tail(n.inner.clone()).into()
131
+ }
132
+
125
133
  pub fn str_to_uppercase(&self) -> Self {
126
134
  self.inner.clone().str().to_uppercase().into()
127
135
  }
@@ -159,6 +167,10 @@ impl RbExpr {
159
167
  .into()
160
168
  }
161
169
 
170
+ pub fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
171
+ self.inner.clone().str().normalize(form.0).into()
172
+ }
173
+
162
174
  pub fn str_reverse(&self) -> Self {
163
175
  self.inner.clone().str().reverse().into()
164
176
  }
@@ -200,6 +212,23 @@ impl RbExpr {
200
212
  }
201
213
  }
202
214
 
215
+ pub fn str_find(&self, pat: &Self, literal: Option<bool>, strict: bool) -> Self {
216
+ match literal {
217
+ Some(true) => self
218
+ .inner
219
+ .clone()
220
+ .str()
221
+ .find_literal(pat.inner.clone())
222
+ .into(),
223
+ _ => self
224
+ .inner
225
+ .clone()
226
+ .str()
227
+ .find(pat.inner.clone(), strict)
228
+ .into(),
229
+ }
230
+ }
231
+
203
232
  pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
204
233
  self.inner.clone().str().ends_with(sub.inner.clone()).into()
205
234
  }
@@ -351,4 +380,34 @@ impl RbExpr {
351
380
  )
352
381
  .into()
353
382
  }
383
+
384
+ pub fn str_extract_many(
385
+ &self,
386
+ patterns: &RbExpr,
387
+ ascii_case_insensitive: bool,
388
+ overlapping: bool,
389
+ ) -> Self {
390
+ self.inner
391
+ .clone()
392
+ .str()
393
+ .extract_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
394
+ .into()
395
+ }
396
+
397
+ pub fn str_find_many(
398
+ &self,
399
+ patterns: &RbExpr,
400
+ ascii_case_insensitive: bool,
401
+ overlapping: bool,
402
+ ) -> Self {
403
+ self.inner
404
+ .clone()
405
+ .str()
406
+ .find_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
407
+ .into()
408
+ }
409
+
410
+ pub fn str_escape_regex(&self) -> Self {
411
+ self.inner.clone().str().escape_regex().into()
412
+ }
354
413
  }
@@ -1,4 +1,6 @@
1
- use crate::RbExpr;
1
+ use magnus::RArray;
2
+
3
+ use crate::{RbExpr, RbResult, rb_exprs_to_exprs};
2
4
 
3
5
  impl RbExpr {
4
6
  pub fn struct_field_by_index(&self, index: i64) -> Self {
@@ -16,4 +18,10 @@ impl RbExpr {
16
18
  pub fn struct_json_encode(&self) -> Self {
17
19
  self.inner.clone().struct_().json_encode().into()
18
20
  }
21
+
22
+ pub fn struct_with_fields(&self, fields: RArray) -> RbResult<Self> {
23
+ let fields = rb_exprs_to_exprs(fields)?;
24
+ let e = self.inner.clone().struct_().with_fields(fields);
25
+ Ok(e.into())
26
+ }
19
27
  }
@@ -21,6 +21,25 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
21
21
  Ok(dict)
22
22
  }
23
23
 
24
+ pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
25
+ use polars_parquet::read::read_metadata;
26
+ use polars_parquet::read::schema::read_custom_key_value_metadata;
27
+
28
+ let metadata = match get_either_file(rb_f, false)? {
29
+ EitherRustRubyFile::Rust(r) => {
30
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
31
+ }
32
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
33
+ };
34
+
35
+ let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
36
+ let dict = RHash::new();
37
+ for (key, value) in key_value_metadata.into_iter() {
38
+ dict.aset(key.as_str(), value.as_str())?;
39
+ }
40
+ Ok(dict)
41
+ }
42
+
24
43
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
25
44
  use polars_parquet::read::{infer_schema, read_metadata};
26
45
 
@@ -85,6 +85,10 @@ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
85
85
  Ok(dsl::as_struct(exprs).into())
86
86
  }
87
87
 
88
+ pub fn field(names: Vec<String>) -> RbExpr {
89
+ dsl::Expr::Field(names.into_iter().map(|x| x.into()).collect()).into()
90
+ }
91
+
88
92
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
89
93
  let exprs = rb_exprs_to_exprs(exprs)?;
90
94
  Ok(dsl::coalesce(&exprs).into())
@@ -325,6 +325,30 @@ impl RbLazyFrame {
325
325
  .into())
326
326
  }
327
327
 
328
+ pub fn top_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
329
+ let ldf = self.ldf.borrow().clone();
330
+ let exprs = rb_exprs_to_exprs(by)?;
331
+ Ok(ldf
332
+ .top_k(
333
+ k,
334
+ exprs,
335
+ SortMultipleOptions::new().with_order_descending_multi(reverse),
336
+ )
337
+ .into())
338
+ }
339
+
340
+ pub fn bottom_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
341
+ let ldf = self.ldf.borrow().clone();
342
+ let exprs = rb_exprs_to_exprs(by)?;
343
+ Ok(ldf
344
+ .bottom_k(
345
+ k,
346
+ exprs,
347
+ SortMultipleOptions::new().with_order_descending_multi(reverse),
348
+ )
349
+ .into())
350
+ }
351
+
328
352
  pub fn cache(&self) -> Self {
329
353
  let ldf = self.ldf.borrow().clone();
330
354
  ldf.cache().into()
@@ -509,6 +533,11 @@ impl RbLazyFrame {
509
533
  ldf.filter(predicate.inner.clone()).into()
510
534
  }
511
535
 
536
+ pub fn remove(&self, predicate: &RbExpr) -> Self {
537
+ let ldf = self.ldf.borrow().clone();
538
+ ldf.remove(predicate.inner.clone()).into()
539
+ }
540
+
512
541
  pub fn select(&self, exprs: RArray) -> RbResult<Self> {
513
542
  let ldf = self.ldf.borrow().clone();
514
543
  let exprs = rb_exprs_to_exprs(exprs)?;
@@ -701,6 +730,20 @@ impl RbLazyFrame {
701
730
  .into())
702
731
  }
703
732
 
733
+ pub fn join_where(&self, other: &Self, predicates: RArray, suffix: String) -> RbResult<Self> {
734
+ let ldf = self.ldf.borrow().clone();
735
+ let other = other.ldf.borrow().clone();
736
+
737
+ let predicates = rb_exprs_to_exprs(predicates)?;
738
+
739
+ Ok(ldf
740
+ .join_builder()
741
+ .with(other)
742
+ .suffix(suffix)
743
+ .join_where(predicates)
744
+ .into())
745
+ }
746
+
704
747
  pub fn with_column(&self, expr: &RbExpr) -> Self {
705
748
  let ldf = self.ldf.borrow().clone();
706
749
  ldf.with_column(expr.inner.clone()).into()
@@ -816,6 +859,14 @@ impl RbLazyFrame {
816
859
  .into())
817
860
  }
818
861
 
862
+ pub fn drop_nans(&self, subset: Option<&RbSelector>) -> Self {
863
+ self.ldf
864
+ .borrow()
865
+ .clone()
866
+ .drop_nans(subset.map(|e| e.inner.clone()))
867
+ .into()
868
+ }
869
+
819
870
  pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
820
871
  self.ldf
821
872
  .borrow()