polars-df 0.20.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +19 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +275 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +2 -1
  18. data/ext/polars/src/expr/array.rs +73 -4
  19. data/ext/polars/src/expr/binary.rs +26 -1
  20. data/ext/polars/src/expr/bitwise.rs +39 -0
  21. data/ext/polars/src/expr/categorical.rs +20 -0
  22. data/ext/polars/src/expr/datatype.rs +37 -0
  23. data/ext/polars/src/expr/datetime.rs +58 -0
  24. data/ext/polars/src/expr/general.rs +106 -22
  25. data/ext/polars/src/expr/list.rs +45 -2
  26. data/ext/polars/src/expr/meta.rs +5 -28
  27. data/ext/polars/src/expr/mod.rs +4 -1
  28. data/ext/polars/src/expr/name.rs +10 -2
  29. data/ext/polars/src/expr/rolling.rs +21 -1
  30. data/ext/polars/src/expr/selector.rs +219 -0
  31. data/ext/polars/src/expr/string.rs +73 -6
  32. data/ext/polars/src/expr/struct.rs +9 -1
  33. data/ext/polars/src/file.rs +11 -5
  34. data/ext/polars/src/functions/io.rs +21 -11
  35. data/ext/polars/src/functions/lazy.rs +26 -54
  36. data/ext/polars/src/functions/meta.rs +2 -2
  37. data/ext/polars/src/functions/misc.rs +1 -1
  38. data/ext/polars/src/functions/string_cache.rs +4 -5
  39. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  40. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  41. data/ext/polars/src/io/mod.rs +102 -0
  42. data/ext/polars/src/lazyframe/general.rs +124 -111
  43. data/ext/polars/src/lazyframe/serde.rs +1 -1
  44. data/ext/polars/src/lazyframe/sink.rs +6 -6
  45. data/ext/polars/src/lib.rs +216 -29
  46. data/ext/polars/src/map/dataframe.rs +9 -9
  47. data/ext/polars/src/map/lazy.rs +1 -1
  48. data/ext/polars/src/map/mod.rs +31 -19
  49. data/ext/polars/src/map/series.rs +9 -9
  50. data/ext/polars/src/on_startup.rs +5 -2
  51. data/ext/polars/src/rb_modules.rs +1 -1
  52. data/ext/polars/src/series/aggregation.rs +44 -0
  53. data/ext/polars/src/series/construction.rs +11 -7
  54. data/ext/polars/src/series/export.rs +6 -4
  55. data/ext/polars/src/series/general.rs +75 -210
  56. data/ext/polars/src/series/import.rs +2 -2
  57. data/ext/polars/src/series/map.rs +227 -0
  58. data/ext/polars/src/series/mod.rs +2 -1
  59. data/ext/polars/src/series/scatter.rs +1 -1
  60. data/ext/polars/src/utils.rs +10 -2
  61. data/lib/polars/array_expr.rb +382 -3
  62. data/lib/polars/array_name_space.rb +281 -0
  63. data/lib/polars/binary_expr.rb +67 -0
  64. data/lib/polars/binary_name_space.rb +43 -0
  65. data/lib/polars/cat_expr.rb +224 -0
  66. data/lib/polars/cat_name_space.rb +130 -32
  67. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  68. data/lib/polars/catalog/unity/column_info.rb +31 -0
  69. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  70. data/lib/polars/catalog/unity/table_info.rb +50 -0
  71. data/lib/polars/catalog.rb +448 -0
  72. data/lib/polars/config.rb +2 -2
  73. data/lib/polars/convert.rb +12 -2
  74. data/lib/polars/data_frame.rb +834 -48
  75. data/lib/polars/data_type_expr.rb +52 -0
  76. data/lib/polars/data_types.rb +61 -5
  77. data/lib/polars/date_time_expr.rb +251 -0
  78. data/lib/polars/date_time_name_space.rb +299 -0
  79. data/lib/polars/exceptions.rb +7 -2
  80. data/lib/polars/expr.rb +1247 -211
  81. data/lib/polars/functions/col.rb +6 -5
  82. data/lib/polars/functions/datatype.rb +21 -0
  83. data/lib/polars/functions/lazy.rb +127 -15
  84. data/lib/polars/functions/repeat.rb +4 -0
  85. data/lib/polars/io/csv.rb +19 -1
  86. data/lib/polars/io/json.rb +16 -0
  87. data/lib/polars/io/ndjson.rb +13 -0
  88. data/lib/polars/io/parquet.rb +70 -66
  89. data/lib/polars/io/scan_options.rb +47 -0
  90. data/lib/polars/lazy_frame.rb +1099 -95
  91. data/lib/polars/list_expr.rb +400 -11
  92. data/lib/polars/list_name_space.rb +321 -5
  93. data/lib/polars/meta_expr.rb +71 -22
  94. data/lib/polars/name_expr.rb +36 -0
  95. data/lib/polars/scan_cast_options.rb +64 -0
  96. data/lib/polars/schema.rb +84 -3
  97. data/lib/polars/selector.rb +210 -0
  98. data/lib/polars/selectors.rb +932 -203
  99. data/lib/polars/series.rb +1083 -63
  100. data/lib/polars/string_expr.rb +435 -9
  101. data/lib/polars/string_name_space.rb +729 -45
  102. data/lib/polars/struct_expr.rb +103 -0
  103. data/lib/polars/struct_name_space.rb +19 -1
  104. data/lib/polars/utils/parse.rb +40 -0
  105. data/lib/polars/utils/various.rb +18 -1
  106. data/lib/polars/utils.rb +9 -1
  107. data/lib/polars/version.rb +1 -1
  108. data/lib/polars.rb +10 -0
  109. metadata +20 -2
@@ -1,4 +1,4 @@
1
- use magnus::{prelude::*, value::Opaque, Ruby, Value};
1
+ use magnus::{Ruby, Value, prelude::*, value::Opaque};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -47,6 +47,14 @@ impl RbExpr {
47
47
  self.inner.clone().list().eval(expr.inner.clone()).into()
48
48
  }
49
49
 
50
+ pub fn list_filter(&self, predicate: &RbExpr) -> Self {
51
+ self.inner
52
+ .clone()
53
+ .list()
54
+ .eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
55
+ .into()
56
+ }
57
+
50
58
  pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
51
59
  self.inner
52
60
  .clone()
@@ -75,6 +83,18 @@ impl RbExpr {
75
83
  self.inner.clone().list().mean().into()
76
84
  }
77
85
 
86
+ pub fn list_median(&self) -> Self {
87
+ self.inner.clone().list().median().into()
88
+ }
89
+
90
+ pub fn list_std(&self, ddof: u8) -> Self {
91
+ self.inner.clone().list().std(ddof).into()
92
+ }
93
+
94
+ pub fn list_var(&self, ddof: u8) -> Self {
95
+ self.inner.clone().list().var(ddof).into()
96
+ }
97
+
78
98
  pub fn list_min(&self) -> Self {
79
99
  self.inner.clone().list().min().into()
80
100
  }
@@ -163,6 +183,14 @@ impl RbExpr {
163
183
  .into()
164
184
  }
165
185
 
186
+ pub fn list_gather_every(&self, n: &RbExpr, offset: &RbExpr) -> Self {
187
+ self.inner
188
+ .clone()
189
+ .list()
190
+ .gather_every(n.inner.clone(), offset.inner.clone())
191
+ .into()
192
+ }
193
+
166
194
  pub fn list_to_array(&self, width: usize) -> Self {
167
195
  self.inner.clone().list().to_array(width).into()
168
196
  }
@@ -189,7 +217,7 @@ impl RbExpr {
189
217
  .inner
190
218
  .clone()
191
219
  .list()
192
- .to_struct(ListToStructArgs::InferWidth {
220
+ .to_struct(ListToStruct::InferWidth {
193
221
  infer_field_strategy: width_strat.0,
194
222
  get_index_name: name_gen,
195
223
  max_fields: upper_bound,
@@ -197,6 +225,10 @@ impl RbExpr {
197
225
  .into())
198
226
  }
199
227
 
228
+ pub fn list_n_unique(&self) -> Self {
229
+ self.inner.clone().list().n_unique().into()
230
+ }
231
+
200
232
  pub fn list_unique(&self, maintain_order: bool) -> Self {
201
233
  let e = self.inner.clone();
202
234
 
@@ -206,4 +238,15 @@ impl RbExpr {
206
238
  e.list().unique().into()
207
239
  }
208
240
  }
241
+
242
+ pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
243
+ let e = self.inner.clone().list();
244
+ match operation.0 {
245
+ SetOperation::Intersection => e.set_intersection(other.inner.clone()),
246
+ SetOperation::Difference => e.set_difference(other.inner.clone()),
247
+ SetOperation::Union => e.union(other.inner.clone()),
248
+ SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
249
+ }
250
+ .into()
251
+ }
209
252
  }
@@ -57,38 +57,15 @@ impl RbExpr {
57
57
  self.inner.clone().meta().is_regex_projection()
58
58
  }
59
59
 
60
- pub fn _meta_selector_add(&self, other: &RbExpr) -> RbResult<RbExpr> {
61
- let out = self
62
- .inner
63
- .clone()
64
- .meta()
65
- ._selector_add(other.inner.clone())
66
- .map_err(RbPolarsErr::from)?;
67
- Ok(out.into())
68
- }
69
-
70
- pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
71
- let out = self
72
- .inner
73
- .clone()
74
- .meta()
75
- ._selector_sub(other.inner.clone())
76
- .map_err(RbPolarsErr::from)?;
77
- Ok(out.into())
78
- }
79
-
80
- pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
81
- let out = self
82
- .inner
60
+ pub fn meta_is_column_selection(&self, allow_aliasing: bool) -> bool {
61
+ self.inner
83
62
  .clone()
84
63
  .meta()
85
- ._selector_and(other.inner.clone())
86
- .map_err(RbPolarsErr::from)?;
87
- Ok(out.into())
64
+ .is_column_selection(allow_aliasing)
88
65
  }
89
66
 
90
- pub fn _meta_as_selector(&self) -> RbExpr {
91
- self.inner.clone().meta()._into_selector().into()
67
+ pub fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
68
+ self.inner.clone().meta().is_literal(allow_aliasing)
92
69
  }
93
70
 
94
71
  fn compute_tree_format(
@@ -1,16 +1,19 @@
1
1
  mod array;
2
2
  mod binary;
3
+ mod bitwise;
3
4
  mod categorical;
5
+ pub mod datatype;
4
6
  mod datetime;
5
7
  mod general;
6
8
  mod list;
7
9
  mod meta;
8
10
  mod name;
9
11
  mod rolling;
12
+ pub mod selector;
10
13
  mod string;
11
14
  mod r#struct;
12
15
 
13
- use magnus::{prelude::*, RArray};
16
+ use magnus::{RArray, prelude::*};
14
17
  use polars::lazy::dsl::Expr;
15
18
 
16
19
  use crate::RbResult;
@@ -1,4 +1,4 @@
1
- use magnus::{block::Proc, value::Opaque, Ruby};
1
+ use magnus::{Ruby, block::Proc, value::Opaque};
2
2
  use polars::prelude::*;
3
3
  use polars_utils::format_pl_smallstr;
4
4
 
@@ -20,7 +20,7 @@ impl RbExpr {
20
20
  match out {
21
21
  Ok(out) => Ok(format_pl_smallstr!("{}", out)),
22
22
  Err(e) => Err(PolarsError::ComputeError(
23
- format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
+ format!("Ruby function in 'name.map' produced an error: {e}.").into(),
24
24
  )),
25
25
  }
26
26
  })
@@ -42,4 +42,12 @@ impl RbExpr {
42
42
  pub fn name_to_uppercase(&self) -> Self {
43
43
  self.inner.clone().name().to_uppercase().into()
44
44
  }
45
+
46
+ pub fn name_prefix_fields(&self, prefix: String) -> Self {
47
+ self.inner.clone().name().prefix_fields(&prefix).into()
48
+ }
49
+
50
+ pub fn name_suffix_fields(&self, suffix: String) -> Self {
51
+ self.inner.clone().name().suffix_fields(&suffix).into()
52
+ }
45
53
  }
@@ -1,7 +1,7 @@
1
1
  use polars::prelude::*;
2
2
 
3
- use crate::conversion::Wrap;
4
3
  use crate::RbExpr;
4
+ use crate::conversion::Wrap;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn rolling_sum(
@@ -337,4 +337,24 @@ impl RbExpr {
337
337
 
338
338
  self.inner.clone().rolling_skew(options).into()
339
339
  }
340
+
341
+ pub fn rolling_kurtosis(
342
+ &self,
343
+ window_size: usize,
344
+ fisher: bool,
345
+ bias: bool,
346
+ min_periods: Option<usize>,
347
+ center: bool,
348
+ ) -> Self {
349
+ let min_periods = min_periods.unwrap_or(window_size);
350
+ let options = RollingOptionsFixedWindow {
351
+ window_size,
352
+ weights: None,
353
+ min_periods,
354
+ center,
355
+ fn_params: Some(RollingFnParams::Kurtosis { fisher, bias }),
356
+ };
357
+
358
+ self.inner.clone().rolling_kurtosis(options).into()
359
+ }
340
360
  }
@@ -0,0 +1,219 @@
1
+ use std::hash::{Hash, Hasher};
2
+ use std::sync::Arc;
3
+
4
+ use polars::prelude::{
5
+ DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
6
+ };
7
+ use polars_plan::dsl;
8
+
9
+ use crate::prelude::Wrap;
10
+ use crate::{RbResult, RbTypeError};
11
+
12
+ #[magnus::wrap(class = "Polars::RbSelector")]
13
+ #[repr(transparent)]
14
+ #[derive(Clone)]
15
+ pub struct RbSelector {
16
+ pub inner: Selector,
17
+ }
18
+
19
+ impl From<Selector> for RbSelector {
20
+ fn from(inner: Selector) -> Self {
21
+ Self { inner }
22
+ }
23
+ }
24
+
25
+ fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
26
+ let mut tu = TimeUnitSet::empty();
27
+ for v in time_units {
28
+ match v.0 {
29
+ TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
30
+ TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
31
+ TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
32
+ }
33
+ }
34
+ tu
35
+ }
36
+
37
+ pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
38
+ selector.inner.clone().to_dtype_selector().ok_or_else(|| {
39
+ RbTypeError::new_err(format!(
40
+ "expected datatype based expression got '{}'",
41
+ selector.inner
42
+ ))
43
+ })
44
+ }
45
+
46
+ impl RbSelector {
47
+ pub fn union(&self, other: &Self) -> Self {
48
+ Self {
49
+ inner: self.inner.clone() | other.inner.clone(),
50
+ }
51
+ }
52
+
53
+ pub fn difference(&self, other: &Self) -> Self {
54
+ Self {
55
+ inner: self.inner.clone() - other.inner.clone(),
56
+ }
57
+ }
58
+
59
+ pub fn exclusive_or(&self, other: &Self) -> Self {
60
+ Self {
61
+ inner: self.inner.clone() ^ other.inner.clone(),
62
+ }
63
+ }
64
+
65
+ pub fn intersect(&self, other: &Self) -> Self {
66
+ Self {
67
+ inner: self.inner.clone() & other.inner.clone(),
68
+ }
69
+ }
70
+
71
+ pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
72
+ let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
73
+ dsl::dtype_cols(dtypes).as_selector().into()
74
+ }
75
+
76
+ pub fn by_name(names: Vec<String>, strict: bool) -> Self {
77
+ dsl::by_name(names, strict).into()
78
+ }
79
+
80
+ pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
81
+ Selector::ByIndex {
82
+ indices: indices.into(),
83
+ strict,
84
+ }
85
+ .into()
86
+ }
87
+
88
+ pub fn first(strict: bool) -> Self {
89
+ Selector::ByIndex {
90
+ indices: [0].into(),
91
+ strict,
92
+ }
93
+ .into()
94
+ }
95
+
96
+ pub fn last(strict: bool) -> Self {
97
+ Selector::ByIndex {
98
+ indices: [-1].into(),
99
+ strict,
100
+ }
101
+ .into()
102
+ }
103
+
104
+ pub fn matches(pattern: String) -> Self {
105
+ Selector::Matches(pattern.into()).into()
106
+ }
107
+
108
+ pub fn enum_() -> Self {
109
+ DataTypeSelector::Enum.as_selector().into()
110
+ }
111
+
112
+ pub fn categorical() -> Self {
113
+ DataTypeSelector::Categorical.as_selector().into()
114
+ }
115
+
116
+ pub fn nested() -> Self {
117
+ DataTypeSelector::Nested.as_selector().into()
118
+ }
119
+
120
+ pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
121
+ let inner_dst = match inner_dst {
122
+ None => None,
123
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
124
+ };
125
+ Ok(DataTypeSelector::List(inner_dst).as_selector().into())
126
+ }
127
+
128
+ pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
129
+ let inner_dst = match inner_dst {
130
+ None => None,
131
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
132
+ };
133
+ Ok(DataTypeSelector::Array(inner_dst, width)
134
+ .as_selector()
135
+ .into())
136
+ }
137
+
138
+ pub fn struct_() -> Self {
139
+ DataTypeSelector::Struct.as_selector().into()
140
+ }
141
+
142
+ pub fn integer() -> Self {
143
+ DataTypeSelector::Integer.as_selector().into()
144
+ }
145
+
146
+ pub fn signed_integer() -> Self {
147
+ DataTypeSelector::SignedInteger.as_selector().into()
148
+ }
149
+
150
+ pub fn unsigned_integer() -> Self {
151
+ DataTypeSelector::UnsignedInteger.as_selector().into()
152
+ }
153
+
154
+ pub fn float() -> Self {
155
+ DataTypeSelector::Float.as_selector().into()
156
+ }
157
+
158
+ pub fn decimal() -> Self {
159
+ DataTypeSelector::Decimal.as_selector().into()
160
+ }
161
+
162
+ pub fn numeric() -> Self {
163
+ DataTypeSelector::Numeric.as_selector().into()
164
+ }
165
+
166
+ pub fn temporal() -> Self {
167
+ DataTypeSelector::Temporal.as_selector().into()
168
+ }
169
+
170
+ pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
171
+ use TimeZoneSet as TZS;
172
+
173
+ let mut allow_unset = false;
174
+ let mut allow_set = false;
175
+ let mut any_of: Vec<TimeZone> = Vec::new();
176
+
177
+ let tu = parse_time_unit_set(tu);
178
+ for t in tz {
179
+ let t = t.0;
180
+ match t {
181
+ None => allow_unset = true,
182
+ Some(s) if s.as_str() == "*" => allow_set = true,
183
+ Some(t) => any_of.push(t),
184
+ }
185
+ }
186
+
187
+ let tzs = match (allow_unset, allow_set) {
188
+ (true, true) => TZS::Any,
189
+ (false, true) => TZS::AnySet,
190
+ (true, false) if any_of.is_empty() => TZS::Unset,
191
+ (true, false) => TZS::UnsetOrAnyOf(any_of.into()),
192
+ (false, false) => TZS::AnyOf(any_of.into()),
193
+ };
194
+ DataTypeSelector::Datetime(tu, tzs).as_selector().into()
195
+ }
196
+
197
+ pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
198
+ let tu = parse_time_unit_set(tu);
199
+ DataTypeSelector::Duration(tu).as_selector().into()
200
+ }
201
+
202
+ pub fn object() -> Self {
203
+ DataTypeSelector::Object.as_selector().into()
204
+ }
205
+
206
+ pub fn empty() -> Self {
207
+ dsl::empty().into()
208
+ }
209
+
210
+ pub fn all() -> Self {
211
+ dsl::all().into()
212
+ }
213
+
214
+ pub fn hash(&self) -> u64 {
215
+ let mut hasher = std::hash::DefaultHasher::default();
216
+ self.inner.hash(&mut hasher);
217
+ hasher.finish()
218
+ }
219
+ }
@@ -122,6 +122,14 @@ impl RbExpr {
122
122
  .into()
123
123
  }
124
124
 
125
+ pub fn str_head(&self, n: &Self) -> Self {
126
+ self.inner.clone().str().head(n.inner.clone()).into()
127
+ }
128
+
129
+ pub fn str_tail(&self, n: &Self) -> Self {
130
+ self.inner.clone().str().tail(n.inner.clone()).into()
131
+ }
132
+
125
133
  pub fn str_to_uppercase(&self) -> Self {
126
134
  self.inner.clone().str().to_uppercase().into()
127
135
  }
@@ -159,16 +167,28 @@ impl RbExpr {
159
167
  .into()
160
168
  }
161
169
 
170
+ pub fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
171
+ self.inner.clone().str().normalize(form.0).into()
172
+ }
173
+
162
174
  pub fn str_reverse(&self) -> Self {
163
175
  self.inner.clone().str().reverse().into()
164
176
  }
165
177
 
166
- pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
167
- self.clone().inner.str().pad_start(length, fillchar).into()
178
+ pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
179
+ self.clone()
180
+ .inner
181
+ .str()
182
+ .pad_start(length.inner.clone(), fillchar)
183
+ .into()
168
184
  }
169
185
 
170
- pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
171
- self.clone().inner.str().pad_end(length, fillchar).into()
186
+ pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
187
+ self.clone()
188
+ .inner
189
+ .str()
190
+ .pad_end(length.inner.clone(), fillchar)
191
+ .into()
172
192
  }
173
193
 
174
194
  pub fn str_zfill(&self, length: &Self) -> Self {
@@ -192,6 +212,23 @@ impl RbExpr {
192
212
  }
193
213
  }
194
214
 
215
+ pub fn str_find(&self, pat: &Self, literal: Option<bool>, strict: bool) -> Self {
216
+ match literal {
217
+ Some(true) => self
218
+ .inner
219
+ .clone()
220
+ .str()
221
+ .find_literal(pat.inner.clone())
222
+ .into(),
223
+ _ => self
224
+ .inner
225
+ .clone()
226
+ .str()
227
+ .find(pat.inner.clone(), strict)
228
+ .into(),
229
+ }
230
+ }
231
+
195
232
  pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
196
233
  self.inner.clone().str().ends_with(sub.inner.clone()).into()
197
234
  }
@@ -220,11 +257,11 @@ impl RbExpr {
220
257
  self.inner.clone().str().base64_decode(strict).into()
221
258
  }
222
259
 
223
- pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
260
+ pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
224
261
  self.inner
225
262
  .clone()
226
263
  .str()
227
- .to_integer(base.inner.clone(), strict)
264
+ .to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
228
265
  .into()
229
266
  }
230
267
 
@@ -343,4 +380,34 @@ impl RbExpr {
343
380
  )
344
381
  .into()
345
382
  }
383
+
384
+ pub fn str_extract_many(
385
+ &self,
386
+ patterns: &RbExpr,
387
+ ascii_case_insensitive: bool,
388
+ overlapping: bool,
389
+ ) -> Self {
390
+ self.inner
391
+ .clone()
392
+ .str()
393
+ .extract_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
394
+ .into()
395
+ }
396
+
397
+ pub fn str_find_many(
398
+ &self,
399
+ patterns: &RbExpr,
400
+ ascii_case_insensitive: bool,
401
+ overlapping: bool,
402
+ ) -> Self {
403
+ self.inner
404
+ .clone()
405
+ .str()
406
+ .find_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
407
+ .into()
408
+ }
409
+
410
+ pub fn str_escape_regex(&self) -> Self {
411
+ self.inner.clone().str().escape_regex().into()
412
+ }
346
413
  }
@@ -1,4 +1,6 @@
1
- use crate::RbExpr;
1
+ use magnus::RArray;
2
+
3
+ use crate::{RbExpr, RbResult, rb_exprs_to_exprs};
2
4
 
3
5
  impl RbExpr {
4
6
  pub fn struct_field_by_index(&self, index: i64) -> Self {
@@ -16,4 +18,10 @@ impl RbExpr {
16
18
  pub fn struct_json_encode(&self) -> Self {
17
19
  self.inner.clone().struct_().json_encode().into()
18
20
  }
21
+
22
+ pub fn struct_with_fields(&self, fields: RArray) -> RbResult<Self> {
23
+ let fields = rb_exprs_to_exprs(fields)?;
24
+ let e = self.inner.clone().struct_().with_fields(fields);
25
+ Ok(e.into())
26
+ }
19
27
  }
@@ -3,17 +3,18 @@ use std::io;
3
3
  use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
- use magnus::{exception, prelude::*, value::Opaque, Error, RString, Ruby, Value};
6
+ use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
7
7
  use polars::io::cloud::CloudOptions;
8
8
  use polars::io::mmap::MmapBytesReader;
9
+ use polars::prelude::PlPath;
9
10
  use polars::prelude::file::DynWriteable;
10
11
  use polars::prelude::sync_on_close::SyncOnCloseType;
11
12
  use polars_utils::file::ClosableFile;
12
13
  use polars_utils::mmap::MemSlice;
13
14
 
15
+ use crate::RbResult;
14
16
  use crate::error::RbPolarsErr;
15
17
  use crate::prelude::resolve_homedir;
16
- use crate::RbResult;
17
18
 
18
19
  #[derive(Clone)]
19
20
  pub struct RbFileLikeObject {
@@ -188,7 +189,7 @@ impl EitherRustRubyFile {
188
189
 
189
190
  pub enum RubyScanSourceInput {
190
191
  Buffer(MemSlice),
191
- Path(PathBuf),
192
+ Path(PlPath),
192
193
  #[allow(dead_code)]
193
194
  File(File),
194
195
  }
@@ -202,8 +203,13 @@ pub(crate) fn try_get_rbfile(
202
203
  }
203
204
 
204
205
  pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
205
- if let Ok(file_path) = PathBuf::try_convert(rb_f) {
206
- // TODO resolve_homedir
206
+ if let Ok(s) = String::try_convert(rb_f) {
207
+ let mut file_path = PlPath::new(&s);
208
+ if let Some(p) = file_path.as_ref().as_local_path() {
209
+ if p.starts_with("~/") {
210
+ file_path = PlPath::Local(resolve_homedir(&p).into());
211
+ }
212
+ }
207
213
  Ok(RubyScanSourceInput::Path(file_path))
208
214
  } else {
209
215
  let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
@@ -1,13 +1,10 @@
1
1
  use std::io::BufReader;
2
2
 
3
- use arrow::array::Utf8ViewArray;
4
3
  use magnus::{RHash, Value};
5
4
  use polars::prelude::ArrowSchema;
6
- use polars_core::datatypes::create_enum_dtype;
7
5
 
8
6
  use crate::conversion::Wrap;
9
- use crate::file::{get_either_file, EitherRustRubyFile};
10
- use crate::prelude::ArrowDataType;
7
+ use crate::file::{EitherRustRubyFile, get_either_file};
11
8
  use crate::{RbPolarsErr, RbResult};
12
9
 
13
10
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
@@ -24,6 +21,25 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
24
21
  Ok(dict)
25
22
  }
26
23
 
24
+ pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
25
+ use polars_parquet::read::read_metadata;
26
+ use polars_parquet::read::schema::read_custom_key_value_metadata;
27
+
28
+ let metadata = match get_either_file(rb_f, false)? {
29
+ EitherRustRubyFile::Rust(r) => {
30
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
31
+ }
32
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
33
+ };
34
+
35
+ let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
36
+ let dict = RHash::new();
37
+ for (key, value) in key_value_metadata.into_iter() {
38
+ dict.aset(key.as_str(), value.as_str())?;
39
+ }
40
+ Ok(dict)
41
+ }
42
+
27
43
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
28
44
  use polars_parquet::read::{infer_schema, read_metadata};
29
45
 
@@ -42,13 +58,7 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
42
58
 
43
59
  fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
44
60
  for field in schema.iter_values() {
45
- let dt = if field.is_enum() {
46
- Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
47
- ArrowDataType::Utf8View,
48
- )))
49
- } else {
50
- Wrap(polars::prelude::DataType::from_arrow_field(field))
51
- };
61
+ let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
52
62
  dict.aset(field.name.as_str(), dt)?;
53
63
  }
54
64
  Ok(())