polars-df 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +11 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +13 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +187 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +1 -1
  18. data/ext/polars/src/expr/datatype.rs +14 -0
  19. data/ext/polars/src/expr/general.rs +22 -17
  20. data/ext/polars/src/expr/list.rs +21 -2
  21. data/ext/polars/src/expr/meta.rs +0 -34
  22. data/ext/polars/src/expr/mod.rs +3 -1
  23. data/ext/polars/src/expr/name.rs +2 -2
  24. data/ext/polars/src/expr/rolling.rs +1 -1
  25. data/ext/polars/src/expr/selector.rs +219 -0
  26. data/ext/polars/src/expr/string.rs +14 -6
  27. data/ext/polars/src/file.rs +11 -5
  28. data/ext/polars/src/functions/io.rs +2 -11
  29. data/ext/polars/src/functions/lazy.rs +22 -54
  30. data/ext/polars/src/functions/meta.rs +2 -2
  31. data/ext/polars/src/functions/misc.rs +1 -1
  32. data/ext/polars/src/functions/string_cache.rs +4 -5
  33. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  34. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  35. data/ext/polars/src/io/mod.rs +102 -0
  36. data/ext/polars/src/lazyframe/general.rs +74 -112
  37. data/ext/polars/src/lazyframe/serde.rs +1 -1
  38. data/ext/polars/src/lazyframe/sink.rs +6 -6
  39. data/ext/polars/src/lib.rs +98 -20
  40. data/ext/polars/src/map/dataframe.rs +7 -7
  41. data/ext/polars/src/map/lazy.rs +1 -1
  42. data/ext/polars/src/map/mod.rs +31 -19
  43. data/ext/polars/src/map/series.rs +8 -8
  44. data/ext/polars/src/on_startup.rs +5 -2
  45. data/ext/polars/src/rb_modules.rs +1 -1
  46. data/ext/polars/src/series/construction.rs +11 -7
  47. data/ext/polars/src/series/export.rs +6 -4
  48. data/ext/polars/src/series/general.rs +12 -207
  49. data/ext/polars/src/series/import.rs +2 -2
  50. data/ext/polars/src/series/map.rs +227 -0
  51. data/ext/polars/src/series/mod.rs +2 -1
  52. data/ext/polars/src/series/scatter.rs +1 -1
  53. data/ext/polars/src/utils.rs +10 -2
  54. data/lib/polars/cat_name_space.rb +3 -43
  55. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  56. data/lib/polars/catalog/unity/column_info.rb +31 -0
  57. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  58. data/lib/polars/catalog/unity/table_info.rb +50 -0
  59. data/lib/polars/catalog.rb +448 -0
  60. data/lib/polars/convert.rb +10 -0
  61. data/lib/polars/data_frame.rb +151 -30
  62. data/lib/polars/data_types.rb +47 -3
  63. data/lib/polars/exceptions.rb +7 -2
  64. data/lib/polars/expr.rb +34 -31
  65. data/lib/polars/functions/col.rb +6 -5
  66. data/lib/polars/functions/lazy.rb +114 -15
  67. data/lib/polars/functions/repeat.rb +4 -0
  68. data/lib/polars/io/csv.rb +18 -0
  69. data/lib/polars/io/json.rb +16 -0
  70. data/lib/polars/io/ndjson.rb +13 -0
  71. data/lib/polars/io/parquet.rb +45 -63
  72. data/lib/polars/io/scan_options.rb +47 -0
  73. data/lib/polars/lazy_frame.rb +163 -75
  74. data/lib/polars/list_expr.rb +204 -7
  75. data/lib/polars/list_name_space.rb +120 -1
  76. data/lib/polars/meta_expr.rb +7 -22
  77. data/lib/polars/scan_cast_options.rb +64 -0
  78. data/lib/polars/schema.rb +6 -1
  79. data/lib/polars/selector.rb +138 -0
  80. data/lib/polars/selectors.rb +931 -202
  81. data/lib/polars/series.rb +34 -11
  82. data/lib/polars/string_expr.rb +24 -3
  83. data/lib/polars/string_name_space.rb +11 -0
  84. data/lib/polars/utils/parse.rb +40 -0
  85. data/lib/polars/utils.rb +5 -1
  86. data/lib/polars/version.rb +1 -1
  87. data/lib/polars.rb +8 -0
  88. metadata +17 -2
@@ -1,7 +1,7 @@
1
1
  use std::hash::BuildHasher;
2
2
 
3
3
  use either::Either;
4
- use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
4
+ use magnus::{IntoValue, RArray, Value, prelude::*, typed_data::Obj};
5
5
  use polars::prelude::pivot::{pivot, pivot_stable};
6
6
  use polars::prelude::*;
7
7
 
@@ -416,17 +416,19 @@ impl RbDataFrame {
416
416
  columns: Option<Vec<String>>,
417
417
  separator: Option<String>,
418
418
  drop_first: bool,
419
+ drop_nulls: bool,
419
420
  ) -> RbResult<Self> {
420
421
  let df = match columns {
421
422
  Some(cols) => self.df.borrow().columns_to_dummies(
422
423
  cols.iter().map(|x| x as &str).collect(),
423
424
  separator.as_deref(),
424
425
  drop_first,
426
+ drop_nulls,
425
427
  ),
426
428
  None => self
427
429
  .df
428
430
  .borrow()
429
- .to_dummies(separator.as_deref(), drop_first),
431
+ .to_dummies(separator.as_deref(), drop_first, drop_nulls),
430
432
  }
431
433
  .map_err(RbPolarsErr::from)?;
432
434
  Ok(df.into())
@@ -1,6 +1,6 @@
1
- use magnus::{prelude::*, Value};
2
- use polars::io::avro::AvroCompression;
1
+ use magnus::{Value, prelude::*};
3
2
  use polars::io::RowIndex;
3
+ use polars::io::avro::AvroCompression;
4
4
  use polars::prelude::*;
5
5
  use std::io::BufWriter;
6
6
  use std::num::NonZeroUsize;
@@ -1,5 +1,5 @@
1
1
  use crate::rb_modules;
2
- use magnus::{exception, Error};
2
+ use magnus::{Error, exception};
3
3
  use std::borrow::Cow;
4
4
 
5
5
  macro_rules! create_exception {
@@ -0,0 +1,14 @@
1
+ use polars::prelude::DataTypeExpr;
2
+
3
+ #[magnus::wrap(class = "Polars::RbDataTypeExpr")]
4
+ #[repr(transparent)]
5
+ #[derive(Clone)]
6
+ pub struct RbDataTypeExpr {
7
+ pub inner: DataTypeExpr,
8
+ }
9
+
10
+ impl From<DataTypeExpr> for RbDataTypeExpr {
11
+ fn from(expr: DataTypeExpr) -> Self {
12
+ RbDataTypeExpr { inner: expr }
13
+ }
14
+ }
@@ -6,10 +6,11 @@ use polars::prelude::*;
6
6
  use polars::series::ops::NullBehavior;
7
7
  use polars_core::series::IsSorted;
8
8
 
9
- use crate::conversion::{parse_fill_null_strategy, Wrap};
9
+ use super::selector::RbSelector;
10
+ use crate::conversion::{Wrap, parse_fill_null_strategy};
10
11
  use crate::map::lazy::map_single;
11
12
  use crate::rb_exprs_to_exprs;
12
- use crate::{RbExpr, RbResult};
13
+ use crate::{RbExpr, RbPolarsErr, RbResult};
13
14
 
14
15
  impl RbExpr {
15
16
  pub fn add(&self, rhs: &Self) -> RbResult<Self> {
@@ -276,17 +277,8 @@ impl RbExpr {
276
277
  .into()
277
278
  }
278
279
 
279
- pub fn arg_sort(&self, reverse: bool, nulls_last: bool) -> Self {
280
- self.clone()
281
- .inner
282
- .arg_sort(SortOptions {
283
- descending: reverse,
284
- nulls_last,
285
- multithreaded: true,
286
- maintain_order: false,
287
- limit: None,
288
- })
289
- .into()
280
+ pub fn arg_sort(&self, descending: bool, nulls_last: bool) -> Self {
281
+ self.inner.clone().arg_sort(descending, nulls_last).into()
290
282
  }
291
283
 
292
284
  pub fn top_k(&self, k: &Self) -> Self {
@@ -655,10 +647,6 @@ impl RbExpr {
655
647
  self.inner.clone().mode().into()
656
648
  }
657
649
 
658
- pub fn exclude(&self, columns: Vec<String>) -> Self {
659
- self.inner.clone().exclude(columns).into()
660
- }
661
-
662
650
  pub fn interpolate(&self, method: Wrap<InterpolationMethod>) -> Self {
663
651
  self.inner.clone().interpolate(method.0).into()
664
652
  }
@@ -868,4 +856,21 @@ impl RbExpr {
868
856
  )
869
857
  .into()
870
858
  }
859
+
860
+ #[allow(clippy::wrong_self_convention)]
861
+ pub fn into_selector(&self) -> RbResult<RbSelector> {
862
+ Ok(self
863
+ .inner
864
+ .clone()
865
+ .into_selector()
866
+ .ok_or_else(
867
+ || polars_err!(InvalidOperation: "expr `{}` is not a selector", &self.inner),
868
+ )
869
+ .map_err(RbPolarsErr::from)?
870
+ .into())
871
+ }
872
+
873
+ pub fn new_selector(selector: &RbSelector) -> Self {
874
+ Expr::Selector(selector.inner.clone()).into()
875
+ }
871
876
  }
@@ -1,4 +1,4 @@
1
- use magnus::{prelude::*, value::Opaque, Ruby, Value};
1
+ use magnus::{Ruby, Value, prelude::*, value::Opaque};
2
2
  use polars::lazy::dsl::lit;
3
3
  use polars::prelude::*;
4
4
  use polars::series::ops::NullBehavior;
@@ -47,6 +47,14 @@ impl RbExpr {
47
47
  self.inner.clone().list().eval(expr.inner.clone()).into()
48
48
  }
49
49
 
50
+ pub fn list_filter(&self, predicate: &RbExpr) -> Self {
51
+ self.inner
52
+ .clone()
53
+ .list()
54
+ .eval(Expr::Column(PlSmallStr::EMPTY).filter(predicate.inner.clone()))
55
+ .into()
56
+ }
57
+
50
58
  pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
51
59
  self.inner
52
60
  .clone()
@@ -189,7 +197,7 @@ impl RbExpr {
189
197
  .inner
190
198
  .clone()
191
199
  .list()
192
- .to_struct(ListToStructArgs::InferWidth {
200
+ .to_struct(ListToStruct::InferWidth {
193
201
  infer_field_strategy: width_strat.0,
194
202
  get_index_name: name_gen,
195
203
  max_fields: upper_bound,
@@ -206,4 +214,15 @@ impl RbExpr {
206
214
  e.list().unique().into()
207
215
  }
208
216
  }
217
+
218
+ pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
219
+ let e = self.inner.clone().list();
220
+ match operation.0 {
221
+ SetOperation::Intersection => e.set_intersection(other.inner.clone()),
222
+ SetOperation::Difference => e.set_difference(other.inner.clone()),
223
+ SetOperation::Union => e.union(other.inner.clone()),
224
+ SetOperation::SymmetricDifference => e.set_symmetric_difference(other.inner.clone()),
225
+ }
226
+ .into()
227
+ }
209
228
  }
@@ -57,40 +57,6 @@ impl RbExpr {
57
57
  self.inner.clone().meta().is_regex_projection()
58
58
  }
59
59
 
60
- pub fn _meta_selector_add(&self, other: &RbExpr) -> RbResult<RbExpr> {
61
- let out = self
62
- .inner
63
- .clone()
64
- .meta()
65
- ._selector_add(other.inner.clone())
66
- .map_err(RbPolarsErr::from)?;
67
- Ok(out.into())
68
- }
69
-
70
- pub fn _meta_selector_sub(&self, other: &RbExpr) -> RbResult<RbExpr> {
71
- let out = self
72
- .inner
73
- .clone()
74
- .meta()
75
- ._selector_sub(other.inner.clone())
76
- .map_err(RbPolarsErr::from)?;
77
- Ok(out.into())
78
- }
79
-
80
- pub fn _meta_selector_and(&self, other: &RbExpr) -> RbResult<RbExpr> {
81
- let out = self
82
- .inner
83
- .clone()
84
- .meta()
85
- ._selector_and(other.inner.clone())
86
- .map_err(RbPolarsErr::from)?;
87
- Ok(out.into())
88
- }
89
-
90
- pub fn _meta_as_selector(&self) -> RbExpr {
91
- self.inner.clone().meta()._into_selector().into()
92
- }
93
-
94
60
  fn compute_tree_format(
95
61
  &self,
96
62
  display_as_dot: bool,
@@ -1,16 +1,18 @@
1
1
  mod array;
2
2
  mod binary;
3
3
  mod categorical;
4
+ pub mod datatype;
4
5
  mod datetime;
5
6
  mod general;
6
7
  mod list;
7
8
  mod meta;
8
9
  mod name;
9
10
  mod rolling;
11
+ pub mod selector;
10
12
  mod string;
11
13
  mod r#struct;
12
14
 
13
- use magnus::{prelude::*, RArray};
15
+ use magnus::{RArray, prelude::*};
14
16
  use polars::lazy::dsl::Expr;
15
17
 
16
18
  use crate::RbResult;
@@ -1,4 +1,4 @@
1
- use magnus::{block::Proc, value::Opaque, Ruby};
1
+ use magnus::{Ruby, block::Proc, value::Opaque};
2
2
  use polars::prelude::*;
3
3
  use polars_utils::format_pl_smallstr;
4
4
 
@@ -20,7 +20,7 @@ impl RbExpr {
20
20
  match out {
21
21
  Ok(out) => Ok(format_pl_smallstr!("{}", out)),
22
22
  Err(e) => Err(PolarsError::ComputeError(
23
- format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
23
+ format!("Ruby function in 'name.map' produced an error: {e}.").into(),
24
24
  )),
25
25
  }
26
26
  })
@@ -1,7 +1,7 @@
1
1
  use polars::prelude::*;
2
2
 
3
- use crate::conversion::Wrap;
4
3
  use crate::RbExpr;
4
+ use crate::conversion::Wrap;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn rolling_sum(
@@ -0,0 +1,219 @@
1
+ use std::hash::{Hash, Hasher};
2
+ use std::sync::Arc;
3
+
4
+ use polars::prelude::{
5
+ DataType, DataTypeSelector, Selector, TimeUnit, TimeUnitSet, TimeZone, TimeZoneSet,
6
+ };
7
+ use polars_plan::dsl;
8
+
9
+ use crate::prelude::Wrap;
10
+ use crate::{RbResult, RbTypeError};
11
+
12
+ #[magnus::wrap(class = "Polars::RbSelector")]
13
+ #[repr(transparent)]
14
+ #[derive(Clone)]
15
+ pub struct RbSelector {
16
+ pub inner: Selector,
17
+ }
18
+
19
+ impl From<Selector> for RbSelector {
20
+ fn from(inner: Selector) -> Self {
21
+ Self { inner }
22
+ }
23
+ }
24
+
25
+ fn parse_time_unit_set(time_units: Vec<Wrap<TimeUnit>>) -> TimeUnitSet {
26
+ let mut tu = TimeUnitSet::empty();
27
+ for v in time_units {
28
+ match v.0 {
29
+ TimeUnit::Nanoseconds => tu |= TimeUnitSet::NANO_SECONDS,
30
+ TimeUnit::Microseconds => tu |= TimeUnitSet::MICRO_SECONDS,
31
+ TimeUnit::Milliseconds => tu |= TimeUnitSet::MILLI_SECONDS,
32
+ }
33
+ }
34
+ tu
35
+ }
36
+
37
+ pub fn parse_datatype_selector(selector: &RbSelector) -> RbResult<DataTypeSelector> {
38
+ selector.inner.clone().to_dtype_selector().ok_or_else(|| {
39
+ RbTypeError::new_err(format!(
40
+ "expected datatype based expression got '{}'",
41
+ selector.inner
42
+ ))
43
+ })
44
+ }
45
+
46
+ impl RbSelector {
47
+ pub fn union(&self, other: &Self) -> Self {
48
+ Self {
49
+ inner: self.inner.clone() | other.inner.clone(),
50
+ }
51
+ }
52
+
53
+ pub fn difference(&self, other: &Self) -> Self {
54
+ Self {
55
+ inner: self.inner.clone() - other.inner.clone(),
56
+ }
57
+ }
58
+
59
+ pub fn exclusive_or(&self, other: &Self) -> Self {
60
+ Self {
61
+ inner: self.inner.clone() ^ other.inner.clone(),
62
+ }
63
+ }
64
+
65
+ pub fn intersect(&self, other: &Self) -> Self {
66
+ Self {
67
+ inner: self.inner.clone() & other.inner.clone(),
68
+ }
69
+ }
70
+
71
+ pub fn by_dtype(dtypes: Vec<Wrap<DataType>>) -> Self {
72
+ let dtypes = dtypes.into_iter().map(|x| x.0).collect::<Vec<_>>();
73
+ dsl::dtype_cols(dtypes).as_selector().into()
74
+ }
75
+
76
+ pub fn by_name(names: Vec<String>, strict: bool) -> Self {
77
+ dsl::by_name(names, strict).into()
78
+ }
79
+
80
+ pub fn by_index(indices: Vec<i64>, strict: bool) -> Self {
81
+ Selector::ByIndex {
82
+ indices: indices.into(),
83
+ strict,
84
+ }
85
+ .into()
86
+ }
87
+
88
+ pub fn first(strict: bool) -> Self {
89
+ Selector::ByIndex {
90
+ indices: [0].into(),
91
+ strict,
92
+ }
93
+ .into()
94
+ }
95
+
96
+ pub fn last(strict: bool) -> Self {
97
+ Selector::ByIndex {
98
+ indices: [-1].into(),
99
+ strict,
100
+ }
101
+ .into()
102
+ }
103
+
104
+ pub fn matches(pattern: String) -> Self {
105
+ Selector::Matches(pattern.into()).into()
106
+ }
107
+
108
+ pub fn enum_() -> Self {
109
+ DataTypeSelector::Enum.as_selector().into()
110
+ }
111
+
112
+ pub fn categorical() -> Self {
113
+ DataTypeSelector::Categorical.as_selector().into()
114
+ }
115
+
116
+ pub fn nested() -> Self {
117
+ DataTypeSelector::Nested.as_selector().into()
118
+ }
119
+
120
+ pub fn list(inner_dst: Option<&Self>) -> RbResult<Self> {
121
+ let inner_dst = match inner_dst {
122
+ None => None,
123
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
124
+ };
125
+ Ok(DataTypeSelector::List(inner_dst).as_selector().into())
126
+ }
127
+
128
+ pub fn array(inner_dst: Option<&Self>, width: Option<usize>) -> RbResult<Self> {
129
+ let inner_dst = match inner_dst {
130
+ None => None,
131
+ Some(inner_dst) => Some(Arc::new(parse_datatype_selector(inner_dst)?)),
132
+ };
133
+ Ok(DataTypeSelector::Array(inner_dst, width)
134
+ .as_selector()
135
+ .into())
136
+ }
137
+
138
+ pub fn struct_() -> Self {
139
+ DataTypeSelector::Struct.as_selector().into()
140
+ }
141
+
142
+ pub fn integer() -> Self {
143
+ DataTypeSelector::Integer.as_selector().into()
144
+ }
145
+
146
+ pub fn signed_integer() -> Self {
147
+ DataTypeSelector::SignedInteger.as_selector().into()
148
+ }
149
+
150
+ pub fn unsigned_integer() -> Self {
151
+ DataTypeSelector::UnsignedInteger.as_selector().into()
152
+ }
153
+
154
+ pub fn float() -> Self {
155
+ DataTypeSelector::Float.as_selector().into()
156
+ }
157
+
158
+ pub fn decimal() -> Self {
159
+ DataTypeSelector::Decimal.as_selector().into()
160
+ }
161
+
162
+ pub fn numeric() -> Self {
163
+ DataTypeSelector::Numeric.as_selector().into()
164
+ }
165
+
166
+ pub fn temporal() -> Self {
167
+ DataTypeSelector::Temporal.as_selector().into()
168
+ }
169
+
170
+ pub fn datetime(tu: Vec<Wrap<TimeUnit>>, tz: Vec<Wrap<Option<TimeZone>>>) -> Self {
171
+ use TimeZoneSet as TZS;
172
+
173
+ let mut allow_unset = false;
174
+ let mut allow_set = false;
175
+ let mut any_of: Vec<TimeZone> = Vec::new();
176
+
177
+ let tu = parse_time_unit_set(tu);
178
+ for t in tz {
179
+ let t = t.0;
180
+ match t {
181
+ None => allow_unset = true,
182
+ Some(s) if s.as_str() == "*" => allow_set = true,
183
+ Some(t) => any_of.push(t),
184
+ }
185
+ }
186
+
187
+ let tzs = match (allow_unset, allow_set) {
188
+ (true, true) => TZS::Any,
189
+ (false, true) => TZS::AnySet,
190
+ (true, false) if any_of.is_empty() => TZS::Unset,
191
+ (true, false) => TZS::UnsetOrAnyOf(any_of.into()),
192
+ (false, false) => TZS::AnyOf(any_of.into()),
193
+ };
194
+ DataTypeSelector::Datetime(tu, tzs).as_selector().into()
195
+ }
196
+
197
+ pub fn duration(tu: Vec<Wrap<TimeUnit>>) -> Self {
198
+ let tu = parse_time_unit_set(tu);
199
+ DataTypeSelector::Duration(tu).as_selector().into()
200
+ }
201
+
202
+ pub fn object() -> Self {
203
+ DataTypeSelector::Object.as_selector().into()
204
+ }
205
+
206
+ pub fn empty() -> Self {
207
+ dsl::empty().into()
208
+ }
209
+
210
+ pub fn all() -> Self {
211
+ dsl::all().into()
212
+ }
213
+
214
+ pub fn hash(&self) -> u64 {
215
+ let mut hasher = std::hash::DefaultHasher::default();
216
+ self.inner.hash(&mut hasher);
217
+ hasher.finish()
218
+ }
219
+ }
@@ -163,12 +163,20 @@ impl RbExpr {
163
163
  self.inner.clone().str().reverse().into()
164
164
  }
165
165
 
166
- pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
167
- self.clone().inner.str().pad_start(length, fillchar).into()
166
+ pub fn str_pad_start(&self, length: &RbExpr, fillchar: char) -> Self {
167
+ self.clone()
168
+ .inner
169
+ .str()
170
+ .pad_start(length.inner.clone(), fillchar)
171
+ .into()
168
172
  }
169
173
 
170
- pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
171
- self.clone().inner.str().pad_end(length, fillchar).into()
174
+ pub fn str_pad_end(&self, length: &RbExpr, fillchar: char) -> Self {
175
+ self.clone()
176
+ .inner
177
+ .str()
178
+ .pad_end(length.inner.clone(), fillchar)
179
+ .into()
172
180
  }
173
181
 
174
182
  pub fn str_zfill(&self, length: &Self) -> Self {
@@ -220,11 +228,11 @@ impl RbExpr {
220
228
  self.inner.clone().str().base64_decode(strict).into()
221
229
  }
222
230
 
223
- pub fn str_to_integer(&self, base: &Self, strict: bool) -> Self {
231
+ pub fn str_to_integer(&self, base: &Self, dtype: Option<Wrap<DataType>>, strict: bool) -> Self {
224
232
  self.inner
225
233
  .clone()
226
234
  .str()
227
- .to_integer(base.inner.clone(), strict)
235
+ .to_integer(base.inner.clone(), dtype.map(|wrap| wrap.0), strict)
228
236
  .into()
229
237
  }
230
238
 
@@ -3,17 +3,18 @@ use std::io;
3
3
  use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
- use magnus::{exception, prelude::*, value::Opaque, Error, RString, Ruby, Value};
6
+ use magnus::{Error, RString, Ruby, Value, exception, prelude::*, value::Opaque};
7
7
  use polars::io::cloud::CloudOptions;
8
8
  use polars::io::mmap::MmapBytesReader;
9
+ use polars::prelude::PlPath;
9
10
  use polars::prelude::file::DynWriteable;
10
11
  use polars::prelude::sync_on_close::SyncOnCloseType;
11
12
  use polars_utils::file::ClosableFile;
12
13
  use polars_utils::mmap::MemSlice;
13
14
 
15
+ use crate::RbResult;
14
16
  use crate::error::RbPolarsErr;
15
17
  use crate::prelude::resolve_homedir;
16
- use crate::RbResult;
17
18
 
18
19
  #[derive(Clone)]
19
20
  pub struct RbFileLikeObject {
@@ -188,7 +189,7 @@ impl EitherRustRubyFile {
188
189
 
189
190
  pub enum RubyScanSourceInput {
190
191
  Buffer(MemSlice),
191
- Path(PathBuf),
192
+ Path(PlPath),
192
193
  #[allow(dead_code)]
193
194
  File(File),
194
195
  }
@@ -202,8 +203,13 @@ pub(crate) fn try_get_rbfile(
202
203
  }
203
204
 
204
205
  pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScanSourceInput> {
205
- if let Ok(file_path) = PathBuf::try_convert(rb_f) {
206
- // TODO resolve_homedir
206
+ if let Ok(s) = String::try_convert(rb_f) {
207
+ let mut file_path = PlPath::new(&s);
208
+ if let Some(p) = file_path.as_ref().as_local_path() {
209
+ if p.starts_with("~/") {
210
+ file_path = PlPath::Local(resolve_homedir(&p).into());
211
+ }
212
+ }
207
213
  Ok(RubyScanSourceInput::Path(file_path))
208
214
  } else {
209
215
  let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
@@ -1,13 +1,10 @@
1
1
  use std::io::BufReader;
2
2
 
3
- use arrow::array::Utf8ViewArray;
4
3
  use magnus::{RHash, Value};
5
4
  use polars::prelude::ArrowSchema;
6
- use polars_core::datatypes::create_enum_dtype;
7
5
 
8
6
  use crate::conversion::Wrap;
9
- use crate::file::{get_either_file, EitherRustRubyFile};
10
- use crate::prelude::ArrowDataType;
7
+ use crate::file::{EitherRustRubyFile, get_either_file};
11
8
  use crate::{RbPolarsErr, RbResult};
12
9
 
13
10
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
@@ -42,13 +39,7 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
42
39
 
43
40
  fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
44
41
  for field in schema.iter_values() {
45
- let dt = if field.is_enum() {
46
- Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
47
- ArrowDataType::Utf8View,
48
- )))
49
- } else {
50
- Wrap(polars::prelude::DataType::from_arrow_field(field))
51
- };
42
+ let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
52
43
  dict.aset(field.name.as_str(), dt)?;
53
44
  }
54
45
  Ok(())