polars-df 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +13 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +187 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +1 -1
  18. data/ext/polars/src/expr/datatype.rs +14 -0
  19. data/ext/polars/src/expr/general.rs +36 -44
  20. data/ext/polars/src/expr/list.rs +27 -17
  21. data/ext/polars/src/expr/meta.rs +18 -41
  22. data/ext/polars/src/expr/mod.rs +3 -1
  23. data/ext/polars/src/expr/name.rs +2 -2
  24. data/ext/polars/src/expr/rolling.rs +1 -1
  25. data/ext/polars/src/expr/selector.rs +219 -0
  26. data/ext/polars/src/expr/string.rs +14 -7
  27. data/ext/polars/src/file.rs +12 -6
  28. data/ext/polars/src/functions/io.rs +2 -11
  29. data/ext/polars/src/functions/lazy.rs +22 -54
  30. data/ext/polars/src/functions/meta.rs +2 -2
  31. data/ext/polars/src/functions/misc.rs +1 -1
  32. data/ext/polars/src/functions/range.rs +14 -10
  33. data/ext/polars/src/functions/string_cache.rs +4 -5
  34. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  35. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  36. data/ext/polars/src/io/mod.rs +102 -0
  37. data/ext/polars/src/lazyframe/general.rs +75 -113
  38. data/ext/polars/src/lazyframe/serde.rs +1 -1
  39. data/ext/polars/src/lazyframe/sink.rs +6 -6
  40. data/ext/polars/src/lib.rs +104 -26
  41. data/ext/polars/src/map/dataframe.rs +7 -7
  42. data/ext/polars/src/map/lazy.rs +1 -1
  43. data/ext/polars/src/map/mod.rs +31 -19
  44. data/ext/polars/src/map/series.rs +8 -8
  45. data/ext/polars/src/on_startup.rs +5 -2
  46. data/ext/polars/src/rb_modules.rs +1 -1
  47. data/ext/polars/src/series/construction.rs +11 -7
  48. data/ext/polars/src/series/export.rs +6 -4
  49. data/ext/polars/src/series/general.rs +12 -207
  50. data/ext/polars/src/series/import.rs +2 -2
  51. data/ext/polars/src/series/map.rs +227 -0
  52. data/ext/polars/src/series/mod.rs +2 -1
  53. data/ext/polars/src/series/scatter.rs +1 -1
  54. data/ext/polars/src/utils.rs +10 -2
  55. data/lib/polars/cat_name_space.rb +3 -43
  56. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  57. data/lib/polars/catalog/unity/column_info.rb +31 -0
  58. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  59. data/lib/polars/catalog/unity/table_info.rb +50 -0
  60. data/lib/polars/catalog.rb +448 -0
  61. data/lib/polars/convert.rb +10 -0
  62. data/lib/polars/data_frame.rb +151 -30
  63. data/lib/polars/data_types.rb +47 -3
  64. data/lib/polars/exceptions.rb +7 -2
  65. data/lib/polars/expr.rb +48 -39
  66. data/lib/polars/functions/col.rb +6 -5
  67. data/lib/polars/functions/eager.rb +1 -1
  68. data/lib/polars/functions/lazy.rb +114 -15
  69. data/lib/polars/functions/repeat.rb +4 -0
  70. data/lib/polars/io/csv.rb +18 -0
  71. data/lib/polars/io/json.rb +16 -0
  72. data/lib/polars/io/ndjson.rb +13 -0
  73. data/lib/polars/io/parquet.rb +45 -63
  74. data/lib/polars/io/scan_options.rb +47 -0
  75. data/lib/polars/lazy_frame.rb +163 -75
  76. data/lib/polars/list_expr.rb +213 -17
  77. data/lib/polars/list_name_space.rb +121 -8
  78. data/lib/polars/meta_expr.rb +14 -29
  79. data/lib/polars/scan_cast_options.rb +64 -0
  80. data/lib/polars/schema.rb +6 -1
  81. data/lib/polars/selector.rb +138 -0
  82. data/lib/polars/selectors.rb +931 -202
  83. data/lib/polars/series.rb +46 -19
  84. data/lib/polars/string_expr.rb +24 -3
  85. data/lib/polars/string_name_space.rb +12 -1
  86. data/lib/polars/utils/parse.rb +40 -0
  87. data/lib/polars/utils.rb +5 -1
  88. data/lib/polars/version.rb +1 -1
  89. data/lib/polars.rb +8 -0
  90. metadata +17 -2
@@ -1,7 +1,7 @@
1
1
  use std::hash::BuildHasher;
2
2
 
3
3
  use either::Either;
4
- use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
4
+ use magnus::{IntoValue, RArray, Value, prelude::*, typed_data::Obj};
5
5
  use polars::prelude::pivot::{pivot, pivot_stable};
6
6
  use polars::prelude::*;
7
7
 
@@ -416,17 +416,19 @@ impl RbDataFrame {
416
416
  columns: Option<Vec<String>>,
417
417
  separator: Option<String>,
418
418
  drop_first: bool,
419
+ drop_nulls: bool,
419
420
  ) -> RbResult<Self> {
420
421
  let df = match columns {
421
422
  Some(cols) => self.df.borrow().columns_to_dummies(
422
423
  cols.iter().map(|x| x as &str).collect(),
423
424
  separator.as_deref(),
424
425
  drop_first,
426
+ drop_nulls,
425
427
  ),
426
428
  None => self
427
429
  .df
428
430
  .borrow()
429
- .to_dummies(separator.as_deref(), drop_first),
431
+ .to_dummies(separator.as_deref(), drop_first, drop_nulls),
430
432
  }
431
433
  .map_err(RbPolarsErr::from)?;
432
434
  Ok(df.into())
@@ -1,6 +1,6 @@
1
- use magnus::{prelude::*, Value};
2
- use polars::io::avro::AvroCompression;
1
+ use magnus::{Value, prelude::*};
3
2
  use polars::io::RowIndex;
3
+ use polars::io::avro::AvroCompression;
4
4
  use polars::prelude::*;
5
5
  use std::io::BufWriter;
6
6
  use std::num::NonZeroUsize;
@@ -1,5 +1,5 @@
1
1
  use crate::rb_modules;
2
- use magnus::{exception, Error};
2
+ use magnus::{Error, exception};
3
3
  use std::borrow::Cow;
4
4
 
5
5
  macro_rules! create_exception {
@@ -0,0 +1,14 @@
1
+ use polars::prelude::DataTypeExpr;
2
+
3
+ #[magnus::wrap(class = "Polars::RbDataTypeExpr")]
4
+ #[repr(transparent)]
5
+ #[derive(Clone)]
6
+ pub struct RbDataTypeExpr {
7
+ pub inner: DataTypeExpr,
8
+ }
9
+
10
+ impl From<DataTypeExpr> for RbDataTypeExpr {
11
+ fn from(expr: DataTypeExpr) -> Self {
12
+ RbDataTypeExpr { inner: expr }
13
+ }
14
+ }
@@ -1,15 +1,16 @@
1
1
  use std::ops::Neg;
2
2
 
3
- use magnus::{prelude::*, value::Opaque, IntoValue, RArray, Ruby, Value};
3
+ use magnus::{RArray, Value};
4
4
  use polars::lazy::dsl;
5
5
  use polars::prelude::*;
6
6
  use polars::series::ops::NullBehavior;
7
7
  use polars_core::series::IsSorted;
8
8
 
9
- use crate::conversion::{parse_fill_null_strategy, Wrap};
9
+ use super::selector::RbSelector;
10
+ use crate::conversion::{Wrap, parse_fill_null_strategy};
10
11
  use crate::map::lazy::map_single;
11
12
  use crate::rb_exprs_to_exprs;
12
- use crate::{RbExpr, RbResult};
13
+ use crate::{RbExpr, RbPolarsErr, RbResult};
13
14
 
14
15
  impl RbExpr {
15
16
  pub fn add(&self, rhs: &Self) -> RbResult<Self> {
@@ -276,17 +277,8 @@ impl RbExpr {
276
277
  .into()
277
278
  }
278
279
 
279
- pub fn arg_sort(&self, reverse: bool, nulls_last: bool) -> Self {
280
- self.clone()
281
- .inner
282
- .arg_sort(SortOptions {
283
- descending: reverse,
284
- nulls_last,
285
- multithreaded: true,
286
- maintain_order: false,
287
- limit: None,
288
- })
289
- .into()
280
+ pub fn arg_sort(&self, descending: bool, nulls_last: bool) -> Self {
281
+ self.inner.clone().arg_sort(descending, nulls_last).into()
290
282
  }
291
283
 
292
284
  pub fn top_k(&self, k: &Self) -> Self {
@@ -331,10 +323,15 @@ impl RbExpr {
331
323
  self.inner.clone().arg_min().into()
332
324
  }
333
325
 
334
- pub fn search_sorted(&self, element: &Self, side: Wrap<SearchSortedSide>) -> Self {
326
+ pub fn search_sorted(
327
+ &self,
328
+ element: &Self,
329
+ side: Wrap<SearchSortedSide>,
330
+ descending: bool,
331
+ ) -> Self {
335
332
  self.inner
336
333
  .clone()
337
- .search_sorted(element.inner.clone(), side.0)
334
+ .search_sorted(element.inner.clone(), side.0, descending)
338
335
  .into()
339
336
  }
340
337
 
@@ -389,16 +386,8 @@ impl RbExpr {
389
386
  strategy: String,
390
387
  limit: FillNullLimit,
391
388
  ) -> RbResult<Self> {
392
- let strat = parse_fill_null_strategy(&strategy, limit)?;
393
- Ok(self
394
- .inner
395
- .clone()
396
- .apply(
397
- move |s| s.fill_null(strat).map(Some),
398
- GetOutput::same_type(),
399
- )
400
- .with_fmt("fill_null_with_strategy")
401
- .into())
389
+ let strategy = parse_fill_null_strategy(&strategy, limit)?;
390
+ Ok(self.inner.clone().fill_null_with_strategy(strategy).into())
402
391
  }
403
392
 
404
393
  pub fn fill_nan(&self, expr: &Self) -> Self {
@@ -658,10 +647,6 @@ impl RbExpr {
658
647
  self.inner.clone().mode().into()
659
648
  }
660
649
 
661
- pub fn exclude(&self, columns: Vec<String>) -> Self {
662
- self.inner.clone().exclude(columns).into()
663
- }
664
-
665
650
  pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
666
651
  self.inner.clone().interpolate(method.0).into()
667
652
  }
@@ -678,10 +663,10 @@ impl RbExpr {
678
663
  self.inner.clone().upper_bound().into()
679
664
  }
680
665
 
681
- pub fn cumulative_eval(&self, expr: &Self, min_periods: usize, parallel: bool) -> Self {
666
+ pub fn cumulative_eval(&self, expr: &Self, min_samples: usize) -> Self {
682
667
  self.inner
683
668
  .clone()
684
- .cumulative_eval(expr.inner.clone(), min_periods, parallel)
669
+ .cumulative_eval(expr.inner.clone(), min_samples)
685
670
  .into()
686
671
  }
687
672
 
@@ -807,20 +792,10 @@ impl RbExpr {
807
792
  self.inner.clone().ewm_var(options).into()
808
793
  }
809
794
 
810
- pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> Self {
811
- let value = value.into_value();
812
- let value = Opaque::from(value);
795
+ pub fn extend_constant(&self, value: &Self, n: &Self) -> Self {
813
796
  self.inner
814
797
  .clone()
815
- .apply(
816
- move |s| {
817
- let value = Ruby::get().unwrap().get_inner(value);
818
- let value = Wrap::<AnyValue>::try_convert(value).unwrap().0;
819
- s.extend_constant(value, n).map(Some)
820
- },
821
- GetOutput::same_type(),
822
- )
823
- .with_fmt("extend")
798
+ .extend_constant(value.inner.clone(), n.inner.clone())
824
799
  .into()
825
800
  }
826
801
 
@@ -881,4 +856,21 @@ impl RbExpr {
881
856
  )
882
857
  .into()
883
858
  }
859
+
860
+ #[allow(clippy::wrong_self_convention)]
861
+ pub fn into_selector(&self) -> RbResult<RbSelector> {
862
+ Ok(self
863
+ .inner
864
+ .clone()
865
+ .into_selector()
866
+ .ok_or_else(
867
+ || polars_err!(InvalidOperation: "expr `{}` is not a selector", &self.inner),
868
+ )
869
+ .map_err(RbPolarsErr::from)?
870
+ .into())
871
+ }
872
+
873
+ pub fn new_selector(selector: &RbSelector) -> Self {
874
+ Expr::Selector(selector.inner.clone()).into()
875
+ }
884
876
  }
@@ -1,4 +1,4 @@
1
- use magnus::{prelude::*, value::Opaque, Ruby, Value};
1
+ use magnus::{Ruby, Value, prelude::*, value::Opaque};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -43,11 +43,15 @@ impl RbExpr {
43
43
  Ok(self.inner.clone().list().diff(n, null_behavior.0).into())
44
44
  }
45
45
 
46
- pub fn list_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
46
+ pub fn list_eval(&self, expr: &RbExpr) -> Self {
47
+ self.inner.clone().list().eval(expr.inner.clone()).into()
48
+ }
49
+
50
+ pub fn list_filter(&self, predicate: &RbExpr) -> Self {
47
51
  self.inner
48
52
  .clone()
49
53
  .list()
50
- .eval(expr.inner.clone(), parallel)
54
+ .eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
51
55
  .into()
52
56
  }
53
57
 
@@ -76,12 +80,7 @@ impl RbExpr {
76
80
  }
77
81
 
78
82
  pub fn list_mean(&self) -> Self {
79
- self.inner
80
- .clone()
81
- .list()
82
- .mean()
83
- .with_fmt("list.mean")
84
- .into()
83
+ self.inner.clone().list().mean().into()
85
84
  }
86
85
 
87
86
  pub fn list_min(&self) -> Self {
@@ -116,20 +115,20 @@ impl RbExpr {
116
115
  self.inner.clone().list().tail(n.inner.clone()).into()
117
116
  }
118
117
 
119
- pub fn list_sort(&self, reverse: bool) -> Self {
118
+ pub fn list_sort(&self, descending: bool, nulls_last: bool) -> Self {
120
119
  self.inner
121
120
  .clone()
122
121
  .list()
123
- .sort(SortOptions {
124
- descending: reverse,
125
- ..Default::default()
126
- })
127
- .with_fmt("list.sort")
122
+ .sort(
123
+ SortOptions::default()
124
+ .with_order_descending(descending)
125
+ .with_nulls_last(nulls_last),
126
+ )
128
127
  .into()
129
128
  }
130
129
 
131
130
  pub fn list_sum(&self) -> Self {
132
- self.inner.clone().list().sum().with_fmt("list.sum").into()
131
+ self.inner.clone().list().sum().into()
133
132
  }
134
133
 
135
134
  pub fn list_drop_nulls(&self) -> Self {
@@ -198,7 +197,7 @@ impl RbExpr {
198
197
  .inner
199
198
  .clone()
200
199
  .list()
201
- .to_struct(ListToStructArgs::InferWidth {
200
+ .to_struct(ListToStruct::InferWidth {
202
201
  infer_field_strategy: width_strat.0,
203
202
  get_index_name: name_gen,
204
203
  max_fields: upper_bound,
@@ -215,4 +214,15 @@ impl RbExpr {
215
214
  e.list().unique().into()
216
215
  }
217
216
  }
217
+
218
+ pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
219
+ let e = self.inner.clone().list();
220
+ match operation.0 {
221
+ SetOperation::Intersection => e.set_intersection(other.inner.clone()),
222
+ SetOperation::Difference => e.set_difference(other.inner.clone()),
223
+ SetOperation::Union => e.union(other.inner.clone()),
224
+ SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
225
+ }
226
+ .into()
227
+ }
218
228
  }
@@ -1,14 +1,21 @@
1
1
  use magnus::RArray;
2
+ use polars::prelude::Schema;
2
3
 
3
- use crate::{RbExpr, RbPolarsErr, RbResult};
4
+ use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
4
5
 
5
6
  impl RbExpr {
6
7
  pub fn meta_eq(&self, other: &RbExpr) -> bool {
7
8
  self.inner == other.inner
8
9
  }
9
10
 
10
- pub fn meta_pop(&self) -> RbResult<RArray> {
11
- let exprs = self.inner.clone().meta().pop().map_err(RbPolarsErr::from)?;
11
+ pub fn meta_pop(&self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
12
+ let schema = schema.as_ref().map(|s| &s.0);
13
+ let exprs = self
14
+ .inner
15
+ .clone()
16
+ .meta()
17
+ .pop(schema)
18
+ .map_err(RbPolarsErr::from)?;
12
19
  Ok(RArray::from_iter(
13
20
  exprs.iter().map(|e| RbExpr::from(e.clone())),
14
21
  ))
@@ -50,51 +57,21 @@ impl RbExpr {
50
57
  self.inner.clone().meta().is_regex_projection()
51
58
  }
52
59
 
53
- pub fn _meta_selector_add(&self, other: &RbExpr) -> RbResult<RbExpr> {
54
- let out = self
55
- .inner
56
- .clone()
57
- .meta()
58
- ._selector_add(other.inner.clone())
59
- .map_err(RbPolarsErr::from)?;
60
- Ok(out.into())
61
- }
62
-
63
- pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
64
- let out = self
65
- .inner
66
- .clone()
67
- .meta()
68
- ._selector_sub(other.inner.clone())
69
- .map_err(RbPolarsErr::from)?;
70
- Ok(out.into())
71
- }
72
-
73
- pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
74
- let out = self
75
- .inner
76
- .clone()
77
- .meta()
78
- ._selector_and(other.inner.clone())
79
- .map_err(RbPolarsErr::from)?;
80
- Ok(out.into())
81
- }
82
-
83
- pub fn _meta_as_selector(&self) -> RbExpr {
84
- self.inner.clone().meta()._into_selector().into()
85
- }
86
-
87
- fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
60
+ fn compute_tree_format(
61
+ &self,
62
+ display_as_dot: bool,
63
+ schema: Option<Wrap<Schema>>,
64
+ ) -> RbResult<String> {
88
65
  let e = self
89
66
  .inner
90
67
  .clone()
91
68
  .meta()
92
- .into_tree_formatter(display_as_dot)
69
+ .into_tree_formatter(display_as_dot, schema.as_ref().map(|s| &s.0))
93
70
  .map_err(RbPolarsErr::from)?;
94
71
  Ok(format!("{e}"))
95
72
  }
96
73
 
97
- pub fn meta_tree_format(&self) -> RbResult<String> {
98
- self.compute_tree_format(false)
74
+ pub fn meta_tree_format(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
75
+ self.compute_tree_format(false, schema)
99
76
  }
100
77
  }
@@ -1,16 +1,18 @@
1
1
  mod array;
2
2
  mod binary;
3
3
  mod categorical;
4
+ pub mod datatype;
4
5
  mod datetime;
5
6
  mod general;
6
7
  mod list;
7
8
  mod meta;
8
9
  mod name;
9
10
  mod rolling;
11
+ pub mod selector;
10
12
  mod string;
11
13
  mod r#struct;
12
14
 
13
- use magnus::{prelude::*, RArray};
15
+ use magnus::{RArray, prelude::*};
14
16
  use polars::lazy::dsl::Expr;
15
17
 
16
18
  use crate::RbResult;
@@ -1,4 +1,4 @@
1
- use magnus::{block::Proc, value::Opaque, Ruby};
1
+ use magnus::{Ruby, block::Proc, value::Opaque};
2
2
  use polars::prelude::*;
3
3
  use polars_utils::format_pl_smallstr;
4
4
 
@@ -20,7 +20,7 @@ impl RbExpr {
20
20
  match out {
21
21
  Ok(out) => Ok(format_pl_smallstr!("{}", out)),
22
22
  Err(e) => Err(PolarsError::ComputeError(
23
- format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
+ format!("Ruby function in 'name.map' produced an error: {e}.").into(),
24
24
  )),
25
25
  }
26
26
  })
@@ -1,7 +1,7 @@
1
1
  use polars::prelude::*;
2
2
 
3
- use crate::conversion::Wrap;
4
3
  use crate::RbExpr;
4
+ use crate::conversion::Wrap;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn rolling_sum(
@@ -0,0 +1,219 @@
1
+ use std::hash::{Hash, Hasher};
2
+ use std::sync::Arc;
3
+
4
+ use polars::prelude::{
5
+ DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
6
+ };
7
+ use polars_plan::dsl;
8
+
9
+ use crate::prelude::Wrap;
10
+ use crate::{RbResult, RbTypeError};
11
+
12
+ #[magnus::wrap(class = "Polars::RbSelector")]
13
+ #[repr(transparent)]
14
+ #[derive(Clone)]
15
+ pub struct RbSelector {
16
+ pub inner: Selector,
17
+ }
18
+
19
+ impl From<Selector> for RbSelector {
20
+ fn from(inner: Selector) -> Self {
21
+ Self { inner }
22
+ }
23
+ }
24
+
25
+ fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
26
+ let mut tu = TimeUnitSet::empty();
27
+ for v in time_units {
28
+ match v.0 {
29
+ TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
30
+ TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
31
+ TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
32
+ }
33
+ }
34
+ tu
35
+ }
36
+
37
+ pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
38
+ selector.inner.clone().to_dtype_selector().ok_or_else(|| {
39
+ RbTypeError::new_err(format!(
40
+ "expected datatype based expression got '{}'",
41
+ selector.inner
42
+ ))
43
+ })
44
+ }
45
+
46
+ impl RbSelector {
47
+ pub fn union(&self, other: &Self) -> Self {
48
+ Self {
49
+ inner: self.inner.clone() | other.inner.clone(),
50
+ }
51
+ }
52
+
53
+ pub fn difference(&self, other: &Self) -> Self {
54
+ Self {
55
+ inner: self.inner.clone() - other.inner.clone(),
56
+ }
57
+ }
58
+
59
+ pub fn exclusive_or(&self, other: &Self) -> Self {
60
+ Self {
61
+ inner: self.inner.clone() ^ other.inner.clone(),
62
+ }
63
+ }
64
+
65
+ pub fn intersect(&self, other: &Self) -> Self {
66
+ Self {
67
+ inner: self.inner.clone() & other.inner.clone(),
68
+ }
69
+ }
70
+
71
+ pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
72
+ let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
73
+ dsl::dtype_cols(dtypes).as_selector().into()
74
+ }
75
+
76
+ pub fn by_name(names: Vec<String>, strict: bool) -> Self {
77
+ dsl::by_name(names, strict).into()
78
+ }
79
+
80
+ pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
81
+ Selector::ByIndex {
82
+ indices: indices.into(),
83
+ strict,
84
+ }
85
+ .into()
86
+ }
87
+
88
+ pub fn first(strict: bool) -> Self {
89
+ Selector::ByIndex {
90
+ indices: [0].into(),
91
+ strict,
92
+ }
93
+ .into()
94
+ }
95
+
96
+ pub fn last(strict: bool) -> Self {
97
+ Selector::ByIndex {
98
+ indices: [-1].into(),
99
+ strict,
100
+ }
101
+ .into()
102
+ }
103
+
104
+ pub fn matches(pattern: String) -> Self {
105
+ Selector::Matches(pattern.into()).into()
106
+ }
107
+
108
+ pub fn enum_() -> Self {
109
+ DataTypeSelector::Enum.as_selector().into()
110
+ }
111
+
112
+ pub fn categorical() -> Self {
113
+ DataTypeSelector::Categorical.as_selector().into()
114
+ }
115
+
116
+ pub fn nested() -> Self {
117
+ DataTypeSelector::Nested.as_selector().into()
118
+ }
119
+
120
+ pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
121
+ let inner_dst = match inner_dst {
122
+ None => None,
123
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
124
+ };
125
+ Ok(DataTypeSelector::List(inner_dst).as_selector().into())
126
+ }
127
+
128
+ pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
129
+ let inner_dst = match inner_dst {
130
+ None => None,
131
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
132
+ };
133
+ Ok(DataTypeSelector::Array(inner_dst, width)
134
+ .as_selector()
135
+ .into())
136
+ }
137
+
138
+ pub fn struct_() -> Self {
139
+ DataTypeSelector::Struct.as_selector().into()
140
+ }
141
+
142
+ pub fn integer() -> Self {
143
+ DataTypeSelector::Integer.as_selector().into()
144
+ }
145
+
146
+ pub fn signed_integer() -> Self {
147
+ DataTypeSelector::SignedInteger.as_selector().into()
148
+ }
149
+
150
+ pub fn unsigned_integer() -> Self {
151
+ DataTypeSelector::UnsignedInteger.as_selector().into()
152
+ }
153
+
154
+ pub fn float() -> Self {
155
+ DataTypeSelector::Float.as_selector().into()
156
+ }
157
+
158
+ pub fn decimal() -> Self {
159
+ DataTypeSelector::Decimal.as_selector().into()
160
+ }
161
+
162
+ pub fn numeric() -> Self {
163
+ DataTypeSelector::Numeric.as_selector().into()
164
+ }
165
+
166
+ pub fn temporal() -> Self {
167
+ DataTypeSelector::Temporal.as_selector().into()
168
+ }
169
+
170
+ pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
171
+ use TimeZoneSet as TZS;
172
+
173
+ let mut allow_unset = false;
174
+ let mut allow_set = false;
175
+ let mut any_of: Vec<TimeZone> = Vec::new();
176
+
177
+ let tu = parse_time_unit_set(tu);
178
+ for t in tz {
179
+ let t = t.0;
180
+ match t {
181
+ None => allow_unset = true,
182
+ Some(s) if s.as_str() == "*" => allow_set = true,
183
+ Some(t) => any_of.push(t),
184
+ }
185
+ }
186
+
187
+ let tzs = match (allow_unset, allow_set) {
188
+ (true, true) => TZS::Any,
189
+ (false, true) => TZS::AnySet,
190
+ (true, false) if any_of.is_empty() => TZS::Unset,
191
+ (true, false) => TZS::UnsetOrAnyOf(any_of.into()),
192
+ (false, false) => TZS::AnyOf(any_of.into()),
193
+ };
194
+ DataTypeSelector::Datetime(tu, tzs).as_selector().into()
195
+ }
196
+
197
+ pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
198
+ let tu = parse_time_unit_set(tu);
199
+ DataTypeSelector::Duration(tu).as_selector().into()
200
+ }
201
+
202
+ pub fn object() -> Self {
203
+ DataTypeSelector::Object.as_selector().into()
204
+ }
205
+
206
+ pub fn empty() -> Self {
207
+ dsl::empty().into()
208
+ }
209
+
210
+ pub fn all() -> Self {
211
+ dsl::all().into()
212
+ }
213
+
214
+ pub fn hash(&self) -> u64 {
215
+ let mut hasher = std::hash::DefaultHasher::default();
216
+ self.inner.hash(&mut hasher);
217
+ hasher.finish()
218
+ }
219
+ }
@@ -163,12 +163,20 @@ impl RbExpr {
163
163
  self.inner.clone().str().reverse().into()
164
164
  }
165
165
 
166
- pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
167
- self.clone().inner.str().pad_start(length, fillchar).into()
166
+ pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
167
+ self.clone()
168
+ .inner
169
+ .str()
170
+ .pad_start(length.inner.clone(), fillchar)
171
+ .into()
168
172
  }
169
173
 
170
- pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
171
- self.clone().inner.str().pad_end(length, fillchar).into()
174
+ pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
175
+ self.clone()
176
+ .inner
177
+ .str()
178
+ .pad_end(length.inner.clone(), fillchar)
179
+ .into()
172
180
  }
173
181
 
174
182
  pub fn str_zfill(&self, length: &Self) -> Self {
@@ -220,12 +228,11 @@ impl RbExpr {
220
228
  self.inner.clone().str().base64_decode(strict).into()
221
229
  }
222
230
 
223
- pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
231
+ pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
224
232
  self.inner
225
233
  .clone()
226
234
  .str()
227
- .to_integer(base.inner.clone(), strict)
228
- .with_fmt("str.to_integer")
235
+ .to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
229
236
  .into()
230
237
  }
231
238