polars-df 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +55 -48
- data/Cargo.toml +3 -0
- data/README.md +12 -0
- data/ext/polars/Cargo.toml +22 -11
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/catalog/unity.rs +96 -94
- data/ext/polars/src/conversion/any_value.rs +26 -30
- data/ext/polars/src/conversion/chunked_array.rs +32 -28
- data/ext/polars/src/conversion/datetime.rs +11 -0
- data/ext/polars/src/conversion/mod.rs +307 -34
- data/ext/polars/src/dataframe/construction.rs +4 -3
- data/ext/polars/src/dataframe/export.rs +17 -15
- data/ext/polars/src/dataframe/general.rs +15 -12
- data/ext/polars/src/dataframe/io.rs +1 -2
- data/ext/polars/src/dataframe/mod.rs +25 -1
- data/ext/polars/src/dataframe/serde.rs +23 -8
- data/ext/polars/src/exceptions.rs +8 -4
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +24 -1
- data/ext/polars/src/expr/datetime.rs +58 -14
- data/ext/polars/src/expr/general.rs +87 -15
- data/ext/polars/src/expr/list.rs +32 -24
- data/ext/polars/src/expr/meta.rs +15 -6
- data/ext/polars/src/expr/mod.rs +3 -0
- data/ext/polars/src/expr/name.rs +19 -14
- data/ext/polars/src/expr/rolling.rs +20 -0
- data/ext/polars/src/expr/serde.rs +28 -0
- data/ext/polars/src/expr/string.rs +64 -10
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +15 -9
- data/ext/polars/src/functions/business.rs +0 -1
- data/ext/polars/src/functions/io.rs +25 -3
- data/ext/polars/src/functions/lazy.rs +11 -6
- data/ext/polars/src/functions/meta.rs +3 -3
- data/ext/polars/src/functions/string_cache.rs +3 -3
- data/ext/polars/src/interop/arrow/to_ruby.rs +3 -3
- data/ext/polars/src/interop/numo/numo_rs.rs +4 -3
- data/ext/polars/src/io/mod.rs +6 -0
- data/ext/polars/src/lazyframe/general.rs +59 -9
- data/ext/polars/src/lazyframe/mod.rs +16 -1
- data/ext/polars/src/lazyframe/optflags.rs +58 -0
- data/ext/polars/src/lazyframe/serde.rs +27 -3
- data/ext/polars/src/lib.rs +261 -19
- data/ext/polars/src/map/dataframe.rs +20 -17
- data/ext/polars/src/map/lazy.rs +6 -5
- data/ext/polars/src/map/series.rs +8 -7
- data/ext/polars/src/on_startup.rs +12 -5
- data/ext/polars/src/rb_modules.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +85 -28
- data/ext/polars/src/series/construction.rs +1 -0
- data/ext/polars/src/series/export.rs +37 -33
- data/ext/polars/src/series/general.rs +120 -21
- data/ext/polars/src/series/mod.rs +29 -4
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +138 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +6 -6
- data/lib/polars/data_frame.rb +794 -27
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +26 -5
- data/lib/polars/date_time_expr.rb +252 -1
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/expr.rb +1248 -206
- data/lib/polars/functions/business.rb +95 -0
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +14 -1
- data/lib/polars/io/csv.rb +1 -1
- data/lib/polars/io/iceberg.rb +27 -0
- data/lib/polars/io/json.rb +4 -4
- data/lib/polars/io/ndjson.rb +4 -4
- data/lib/polars/io/parquet.rb +32 -7
- data/lib/polars/io/scan_options.rb +4 -1
- data/lib/polars/lazy_frame.rb +1028 -28
- data/lib/polars/list_expr.rb +217 -17
- data/lib/polars/list_name_space.rb +231 -22
- data/lib/polars/meta_expr.rb +89 -0
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/query_opt_flags.rb +50 -0
- data/lib/polars/scan_cast_options.rb +20 -1
- data/lib/polars/schema.rb +79 -3
- data/lib/polars/selector.rb +72 -0
- data/lib/polars/selectors.rb +3 -3
- data/lib/polars/series.rb +1053 -54
- data/lib/polars/string_expr.rb +436 -32
- data/lib/polars/string_name_space.rb +736 -50
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/serde.rb +17 -0
- data/lib/polars/utils/various.rb +22 -1
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +11 -1
data/ext/polars/src/expr/list.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{RArray, prelude::*};
|
2
2
|
use polars::lazy::dsl::lit;
|
3
3
|
use polars::prelude::*;
|
4
4
|
use polars::series::ops::NullBehavior;
|
@@ -83,6 +83,18 @@ impl RbExpr {
|
|
83
83
|
self.inner.clone().list().mean().into()
|
84
84
|
}
|
85
85
|
|
86
|
+
pub fn list_median(&self) -> Self {
|
87
|
+
self.inner.clone().list().median().into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn list_std(&self, ddof: u8) -> Self {
|
91
|
+
self.inner.clone().list().std(ddof).into()
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn list_var(&self, ddof: u8) -> Self {
|
95
|
+
self.inner.clone().list().var(ddof).into()
|
96
|
+
}
|
97
|
+
|
86
98
|
pub fn list_min(&self) -> Self {
|
87
99
|
self.inner.clone().list().min().into()
|
88
100
|
}
|
@@ -171,40 +183,36 @@ impl RbExpr {
|
|
171
183
|
.into()
|
172
184
|
}
|
173
185
|
|
186
|
+
pub fn list_gather_every(&self, n: &RbExpr, offset: &RbExpr) -> Self {
|
187
|
+
self.inner
|
188
|
+
.clone()
|
189
|
+
.list()
|
190
|
+
.gather_every(n.inner.clone(), offset.inner.clone())
|
191
|
+
.into()
|
192
|
+
}
|
193
|
+
|
174
194
|
pub fn list_to_array(&self, width: usize) -> Self {
|
175
195
|
self.inner.clone().list().to_array(width).into()
|
176
196
|
}
|
177
197
|
|
178
|
-
pub fn list_to_struct(
|
179
|
-
&self,
|
180
|
-
width_strat: Wrap<ListToStructWidthStrategy>,
|
181
|
-
name_gen: Option<Value>,
|
182
|
-
upper_bound: Option<usize>,
|
183
|
-
) -> RbResult<Self> {
|
184
|
-
let name_gen = name_gen.map(|lambda| {
|
185
|
-
let lambda = Opaque::from(lambda);
|
186
|
-
Arc::new(move |idx: usize| {
|
187
|
-
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
188
|
-
let out: String = lambda.funcall("call", (idx,)).unwrap();
|
189
|
-
PlSmallStr::from_string(out)
|
190
|
-
});
|
191
|
-
|
192
|
-
// non-Ruby thread
|
193
|
-
todo!();
|
194
|
-
});
|
195
|
-
|
198
|
+
pub fn list_to_struct(&self, names: RArray) -> RbResult<Self> {
|
196
199
|
Ok(self
|
197
200
|
.inner
|
198
201
|
.clone()
|
199
202
|
.list()
|
200
|
-
.to_struct(
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
203
|
+
.to_struct(
|
204
|
+
names
|
205
|
+
.into_iter()
|
206
|
+
.map(|x| Ok(Wrap::<PlSmallStr>::try_convert(x)?.0))
|
207
|
+
.collect::<RbResult<Arc<[_]>>>()?,
|
208
|
+
)
|
205
209
|
.into())
|
206
210
|
}
|
207
211
|
|
212
|
+
pub fn list_n_unique(&self) -> Self {
|
213
|
+
self.inner.clone().list().n_unique().into()
|
214
|
+
}
|
215
|
+
|
208
216
|
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
209
217
|
let e = self.inner.clone();
|
210
218
|
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::RArray;
|
1
|
+
use magnus::{RArray, Ruby};
|
2
2
|
use polars::prelude::Schema;
|
3
3
|
|
4
4
|
use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
|
@@ -8,17 +8,15 @@ impl RbExpr {
|
|
8
8
|
self.inner == other.inner
|
9
9
|
}
|
10
10
|
|
11
|
-
pub fn meta_pop(&
|
11
|
+
pub fn meta_pop(ruby: &Ruby, rb_self: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
|
12
12
|
let schema = schema.as_ref().map(|s| &s.0);
|
13
|
-
let exprs =
|
13
|
+
let exprs = rb_self
|
14
14
|
.inner
|
15
15
|
.clone()
|
16
16
|
.meta()
|
17
17
|
.pop(schema)
|
18
18
|
.map_err(RbPolarsErr::from)?;
|
19
|
-
Ok(
|
20
|
-
exprs.iter().map(|e| RbExpr::from(e.clone())),
|
21
|
-
))
|
19
|
+
Ok(ruby.ary_from_iter(exprs.iter().map(|e| RbExpr::from(e.clone()))))
|
22
20
|
}
|
23
21
|
|
24
22
|
pub fn meta_root_names(&self) -> Vec<String> {
|
@@ -57,6 +55,17 @@ impl RbExpr {
|
|
57
55
|
self.inner.clone().meta().is_regex_projection()
|
58
56
|
}
|
59
57
|
|
58
|
+
pub fn meta_is_column_selection(&self, allow_aliasing: bool) -> bool {
|
59
|
+
self.inner
|
60
|
+
.clone()
|
61
|
+
.meta()
|
62
|
+
.is_column_selection(allow_aliasing)
|
63
|
+
}
|
64
|
+
|
65
|
+
pub fn meta_is_literal(&self, allow_aliasing: bool) -> bool {
|
66
|
+
self.inner.clone().meta().is_literal(allow_aliasing)
|
67
|
+
}
|
68
|
+
|
60
69
|
fn compute_tree_format(
|
61
70
|
&self,
|
62
71
|
display_as_dot: bool,
|
data/ext/polars/src/expr/mod.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
mod array;
|
2
2
|
mod binary;
|
3
|
+
mod bitwise;
|
3
4
|
mod categorical;
|
4
5
|
pub mod datatype;
|
5
6
|
mod datetime;
|
@@ -9,6 +10,8 @@ mod meta;
|
|
9
10
|
mod name;
|
10
11
|
mod rolling;
|
11
12
|
pub mod selector;
|
13
|
+
#[cfg(feature = "serialize_binary")]
|
14
|
+
mod serde;
|
12
15
|
mod string;
|
13
16
|
mod r#struct;
|
14
17
|
|
data/ext/polars/src/expr/name.rs
CHANGED
@@ -11,20 +11,17 @@ impl RbExpr {
|
|
11
11
|
|
12
12
|
pub fn name_map(&self, lambda: Proc) -> Self {
|
13
13
|
let lambda = Opaque::from(lambda);
|
14
|
-
|
15
|
-
.
|
16
|
-
.name()
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
}
|
26
|
-
})
|
27
|
-
.into()
|
14
|
+
let func = PlanCallback::new(move |name: PlSmallStr| {
|
15
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
16
|
+
let out = lambda.call::<_, String>((name.as_str(),));
|
17
|
+
match out {
|
18
|
+
Ok(out) => Ok(format_pl_smallstr!("{}", out)),
|
19
|
+
Err(e) => Err(PolarsError::ComputeError(
|
20
|
+
format!("Ruby function in 'name.map' produced an error: {e}.").into(),
|
21
|
+
)),
|
22
|
+
}
|
23
|
+
});
|
24
|
+
self.inner.clone().name().map(func).into()
|
28
25
|
}
|
29
26
|
|
30
27
|
pub fn name_prefix(&self, prefix: String) -> Self {
|
@@ -42,4 +39,12 @@ impl RbExpr {
|
|
42
39
|
pub fn name_to_uppercase(&self) -> Self {
|
43
40
|
self.inner.clone().name().to_uppercase().into()
|
44
41
|
}
|
42
|
+
|
43
|
+
pub fn name_prefix_fields(&self, prefix: String) -> Self {
|
44
|
+
self.inner.clone().name().prefix_fields(&prefix).into()
|
45
|
+
}
|
46
|
+
|
47
|
+
pub fn name_suffix_fields(&self, suffix: String) -> Self {
|
48
|
+
self.inner.clone().name().suffix_fields(&suffix).into()
|
49
|
+
}
|
45
50
|
}
|
@@ -337,4 +337,24 @@ impl RbExpr {
|
|
337
337
|
|
338
338
|
self.inner.clone().rolling_skew(options).into()
|
339
339
|
}
|
340
|
+
|
341
|
+
pub fn rolling_kurtosis(
|
342
|
+
&self,
|
343
|
+
window_size: usize,
|
344
|
+
fisher: bool,
|
345
|
+
bias: bool,
|
346
|
+
min_periods: Option<usize>,
|
347
|
+
center: bool,
|
348
|
+
) -> Self {
|
349
|
+
let min_periods = min_periods.unwrap_or(window_size);
|
350
|
+
let options = RollingOptionsFixedWindow {
|
351
|
+
window_size,
|
352
|
+
weights: None,
|
353
|
+
min_periods,
|
354
|
+
center,
|
355
|
+
fn_params: Some(RollingFnParams::Kurtosis { fisher, bias }),
|
356
|
+
};
|
357
|
+
|
358
|
+
self.inner.clone().rolling_kurtosis(options).into()
|
359
|
+
}
|
340
360
|
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
use std::io::{BufReader, BufWriter};
|
2
|
+
|
3
|
+
use magnus::Value;
|
4
|
+
use polars::lazy::prelude::Expr;
|
5
|
+
use polars_utils::pl_serialize;
|
6
|
+
|
7
|
+
use crate::exceptions::ComputeError;
|
8
|
+
use crate::file::get_file_like;
|
9
|
+
use crate::{RbExpr, RbResult};
|
10
|
+
|
11
|
+
impl RbExpr {
|
12
|
+
pub fn serialize_binary(&self, rb_f: Value) -> RbResult<()> {
|
13
|
+
let file = get_file_like(rb_f, true)?;
|
14
|
+
let writer = BufWriter::new(file);
|
15
|
+
pl_serialize::SerializeOptions::default()
|
16
|
+
.serialize_into_writer::<_, _, true>(writer, &self.inner)
|
17
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))
|
18
|
+
}
|
19
|
+
|
20
|
+
pub fn deserialize_binary(rb_f: Value) -> RbResult<RbExpr> {
|
21
|
+
let file = get_file_like(rb_f, false)?;
|
22
|
+
let reader = BufReader::new(file);
|
23
|
+
let expr: Expr = pl_serialize::SerializeOptions::default()
|
24
|
+
.deserialize_from_reader::<_, _, true>(reader)
|
25
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))?;
|
26
|
+
Ok(expr.into())
|
27
|
+
}
|
28
|
+
}
|
@@ -1,7 +1,7 @@
|
|
1
1
|
use polars::prelude::*;
|
2
2
|
|
3
3
|
use crate::conversion::Wrap;
|
4
|
-
use crate::{RbExpr, RbPolarsErr, RbResult};
|
4
|
+
use crate::{RbDataTypeExpr, RbExpr, RbPolarsErr, RbResult};
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn str_join(&self, delimiter: String, ignore_nulls: bool) -> Self {
|
@@ -122,6 +122,14 @@ impl RbExpr {
|
|
122
122
|
.into()
|
123
123
|
}
|
124
124
|
|
125
|
+
pub fn str_head(&self, n: &Self) -> Self {
|
126
|
+
self.inner.clone().str().head(n.inner.clone()).into()
|
127
|
+
}
|
128
|
+
|
129
|
+
pub fn str_tail(&self, n: &Self) -> Self {
|
130
|
+
self.inner.clone().str().tail(n.inner.clone()).into()
|
131
|
+
}
|
132
|
+
|
125
133
|
pub fn str_to_uppercase(&self) -> Self {
|
126
134
|
self.inner.clone().str().to_uppercase().into()
|
127
135
|
}
|
@@ -159,6 +167,10 @@ impl RbExpr {
|
|
159
167
|
.into()
|
160
168
|
}
|
161
169
|
|
170
|
+
pub fn str_normalize(&self, form: Wrap<UnicodeForm>) -> Self {
|
171
|
+
self.inner.clone().str().normalize(form.0).into()
|
172
|
+
}
|
173
|
+
|
162
174
|
pub fn str_reverse(&self) -> Self {
|
163
175
|
self.inner.clone().str().reverse().into()
|
164
176
|
}
|
@@ -200,6 +212,23 @@ impl RbExpr {
|
|
200
212
|
}
|
201
213
|
}
|
202
214
|
|
215
|
+
pub fn str_find(&self, pat: &Self, literal: Option<bool>, strict: bool) -> Self {
|
216
|
+
match literal {
|
217
|
+
Some(true) => self
|
218
|
+
.inner
|
219
|
+
.clone()
|
220
|
+
.str()
|
221
|
+
.find_literal(pat.inner.clone())
|
222
|
+
.into(),
|
223
|
+
_ => self
|
224
|
+
.inner
|
225
|
+
.clone()
|
226
|
+
.str()
|
227
|
+
.find(pat.inner.clone(), strict)
|
228
|
+
.into(),
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
203
232
|
pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
|
204
233
|
self.inner.clone().str().ends_with(sub.inner.clone()).into()
|
205
234
|
}
|
@@ -236,16 +265,11 @@ impl RbExpr {
|
|
236
265
|
.into()
|
237
266
|
}
|
238
267
|
|
239
|
-
pub fn str_json_decode(
|
240
|
-
&self,
|
241
|
-
dtype: Option<Wrap<DataType>>,
|
242
|
-
infer_schema_len: Option<usize>,
|
243
|
-
) -> Self {
|
244
|
-
let dtype = dtype.map(|wrap| wrap.0);
|
268
|
+
pub fn str_json_decode(&self, dtype: &RbDataTypeExpr) -> Self {
|
245
269
|
self.inner
|
246
270
|
.clone()
|
247
271
|
.str()
|
248
|
-
.json_decode(dtype
|
272
|
+
.json_decode(dtype.inner.clone())
|
249
273
|
.into()
|
250
274
|
}
|
251
275
|
|
@@ -323,8 +347,8 @@ impl RbExpr {
|
|
323
347
|
self.inner.clone().str().splitn(by.inner.clone(), n).into()
|
324
348
|
}
|
325
349
|
|
326
|
-
pub fn str_to_decimal(&self,
|
327
|
-
self.inner.clone().str().to_decimal(
|
350
|
+
pub fn str_to_decimal(&self, scale: usize) -> Self {
|
351
|
+
self.inner.clone().str().to_decimal(scale).into()
|
328
352
|
}
|
329
353
|
|
330
354
|
pub fn str_contains_any(&self, patterns: &RbExpr, ascii_case_insensitive: bool) -> Self {
|
@@ -351,4 +375,34 @@ impl RbExpr {
|
|
351
375
|
)
|
352
376
|
.into()
|
353
377
|
}
|
378
|
+
|
379
|
+
pub fn str_extract_many(
|
380
|
+
&self,
|
381
|
+
patterns: &RbExpr,
|
382
|
+
ascii_case_insensitive: bool,
|
383
|
+
overlapping: bool,
|
384
|
+
) -> Self {
|
385
|
+
self.inner
|
386
|
+
.clone()
|
387
|
+
.str()
|
388
|
+
.extract_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
|
389
|
+
.into()
|
390
|
+
}
|
391
|
+
|
392
|
+
pub fn str_find_many(
|
393
|
+
&self,
|
394
|
+
patterns: &RbExpr,
|
395
|
+
ascii_case_insensitive: bool,
|
396
|
+
overlapping: bool,
|
397
|
+
) -> Self {
|
398
|
+
self.inner
|
399
|
+
.clone()
|
400
|
+
.str()
|
401
|
+
.find_many(patterns.inner.clone(), ascii_case_insensitive, overlapping)
|
402
|
+
.into()
|
403
|
+
}
|
404
|
+
|
405
|
+
pub fn str_escape_regex(&self) -> Self {
|
406
|
+
self.inner.clone().str().escape_regex().into()
|
407
|
+
}
|
354
408
|
}
|
@@ -1,4 +1,6 @@
|
|
1
|
-
use
|
1
|
+
use magnus::RArray;
|
2
|
+
|
3
|
+
use crate::{RbExpr, RbResult, rb_exprs_to_exprs};
|
2
4
|
|
3
5
|
impl RbExpr {
|
4
6
|
pub fn struct_field_by_index(&self, index: i64) -> Self {
|
@@ -16,4 +18,10 @@ impl RbExpr {
|
|
16
18
|
pub fn struct_json_encode(&self) -> Self {
|
17
19
|
self.inner.clone().struct_().json_encode().into()
|
18
20
|
}
|
21
|
+
|
22
|
+
pub fn struct_with_fields(&self, fields: RArray) -> RbResult<Self> {
|
23
|
+
let fields = rb_exprs_to_exprs(fields)?;
|
24
|
+
let e = self.inner.clone().struct_().with_fields(fields);
|
25
|
+
Ok(e.into())
|
26
|
+
}
|
19
27
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -3,7 +3,7 @@ use std::io;
|
|
3
3
|
use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
|
-
use magnus::{Error, RString, Ruby, Value,
|
6
|
+
use magnus::{Error, RString, Ruby, Value, prelude::*, value::Opaque};
|
7
7
|
use polars::io::cloud::CloudOptions;
|
8
8
|
use polars::io::mmap::MmapBytesReader;
|
9
9
|
use polars::prelude::PlPath;
|
@@ -67,23 +67,25 @@ impl RbFileLikeObject {
|
|
67
67
|
/// ruby object has a `read`, `write`, and `seek` methods in respect to parameters.
|
68
68
|
/// Will return a `TypeError` if object does not have `read`, `seek`, and `write` methods.
|
69
69
|
pub fn with_requirements(object: Value, read: bool, write: bool, seek: bool) -> RbResult<Self> {
|
70
|
+
let ruby = Ruby::get_with(object);
|
71
|
+
|
70
72
|
if read && !object.respond_to("read", false)? {
|
71
73
|
return Err(Error::new(
|
72
|
-
|
74
|
+
ruby.exception_type_error(),
|
73
75
|
"Object does not have a .read() method.",
|
74
76
|
));
|
75
77
|
}
|
76
78
|
|
77
79
|
if seek && !object.respond_to("seek", false)? {
|
78
80
|
return Err(Error::new(
|
79
|
-
|
81
|
+
ruby.exception_type_error(),
|
80
82
|
"Object does not have a .seek() method.",
|
81
83
|
));
|
82
84
|
}
|
83
85
|
|
84
86
|
if write && !object.respond_to("write", false)? {
|
85
87
|
return Err(Error::new(
|
86
|
-
|
88
|
+
ruby.exception_type_error(),
|
87
89
|
"Object does not have a .write() method.",
|
88
90
|
));
|
89
91
|
}
|
@@ -113,10 +115,10 @@ impl Read for RbFileLikeObject {
|
|
113
115
|
|
114
116
|
impl Write for RbFileLikeObject {
|
115
117
|
fn write(&mut self, buf: &[u8]) -> Result<usize, io::Error> {
|
116
|
-
let
|
118
|
+
let ruby = Ruby::get().unwrap();
|
119
|
+
let rbbytes = ruby.str_from_slice(buf);
|
117
120
|
|
118
|
-
let number_bytes_written =
|
119
|
-
.unwrap()
|
121
|
+
let number_bytes_written = ruby
|
120
122
|
.get_inner(self.inner)
|
121
123
|
.funcall::<_, _, usize>("write", (rbbytes,))
|
122
124
|
.map_err(rberr_to_io_err)?;
|
@@ -265,8 +267,12 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
|
|
265
267
|
RbReadBytes::Bytes(v) => Ok((Box::new(Cursor::new(unsafe { v.as_slice() })), None)),
|
266
268
|
RbReadBytes::Other(v) => {
|
267
269
|
let path = PathBuf::try_convert(*v)?;
|
268
|
-
let f = File::open(&path)
|
269
|
-
|
270
|
+
let f = File::open(&path).map_err(|e| {
|
271
|
+
Error::new(
|
272
|
+
Ruby::get().unwrap().exception_runtime_error(),
|
273
|
+
e.to_string(),
|
274
|
+
)
|
275
|
+
})?;
|
270
276
|
Ok((Box::new(f), Some(path)))
|
271
277
|
}
|
272
278
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use std::io::BufReader;
|
2
2
|
|
3
|
-
use magnus::{RHash, Value};
|
3
|
+
use magnus::{RHash, Ruby, Value};
|
4
4
|
use polars::prelude::ArrowSchema;
|
5
5
|
|
6
6
|
use crate::conversion::Wrap;
|
@@ -16,11 +16,32 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
16
16
|
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
17
17
|
};
|
18
18
|
|
19
|
-
let
|
19
|
+
let ruby = Ruby::get_with(rb_f);
|
20
|
+
let dict = ruby.hash_new();
|
20
21
|
fields_to_rbdict(&metadata.schema, &dict)?;
|
21
22
|
Ok(dict)
|
22
23
|
}
|
23
24
|
|
25
|
+
pub fn read_parquet_metadata(rb_f: Value) -> RbResult<RHash> {
|
26
|
+
use polars_parquet::read::read_metadata;
|
27
|
+
use polars_parquet::read::schema::read_custom_key_value_metadata;
|
28
|
+
|
29
|
+
let metadata = match get_either_file(rb_f, false)? {
|
30
|
+
EitherRustRubyFile::Rust(r) => {
|
31
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
32
|
+
}
|
33
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
34
|
+
};
|
35
|
+
|
36
|
+
let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
|
37
|
+
let ruby = Ruby::get_with(rb_f);
|
38
|
+
let dict = ruby.hash_new();
|
39
|
+
for (key, value) in key_value_metadata.into_iter() {
|
40
|
+
dict.aset(key.as_str(), value.as_str())?;
|
41
|
+
}
|
42
|
+
Ok(dict)
|
43
|
+
}
|
44
|
+
|
24
45
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
25
46
|
use polars_parquet::read::{infer_schema, read_metadata};
|
26
47
|
|
@@ -32,7 +53,8 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
|
32
53
|
};
|
33
54
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
34
55
|
|
35
|
-
let
|
56
|
+
let ruby = Ruby::get_with(rb_f);
|
57
|
+
let dict = ruby.hash_new();
|
36
58
|
fields_to_rbdict(&arrow_schema, &dict)?;
|
37
59
|
Ok(dict)
|
38
60
|
}
|
@@ -1,6 +1,6 @@
|
|
1
|
-
use magnus::encoding::
|
1
|
+
use magnus::encoding::EncodingCapable;
|
2
2
|
use magnus::{
|
3
|
-
Float, Integer, RArray, RString, Ruby, Value,
|
3
|
+
Float, Integer, RArray, RString, Ruby, Value, prelude::*, typed_data::Obj, value::Opaque,
|
4
4
|
};
|
5
5
|
use polars::lazy::dsl;
|
6
6
|
use polars::prelude::*;
|
@@ -85,6 +85,10 @@ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
|
85
85
|
Ok(dsl::as_struct(exprs).into())
|
86
86
|
}
|
87
87
|
|
88
|
+
pub fn field(names: Vec<String>) -> RbExpr {
|
89
|
+
dsl::Expr::Field(names.into_iter().map(|x| x.into()).collect()).into()
|
90
|
+
}
|
91
|
+
|
88
92
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
89
93
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
90
94
|
Ok(dsl::coalesce(&exprs).into())
|
@@ -94,10 +98,10 @@ pub fn col(name: String) -> RbExpr {
|
|
94
98
|
dsl::col(&name).into()
|
95
99
|
}
|
96
100
|
|
97
|
-
pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
101
|
+
pub fn collect_all(ruby: &Ruby, lfs: RArray) -> RbResult<RArray> {
|
98
102
|
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
99
103
|
|
100
|
-
Ok(
|
104
|
+
Ok(ruby.ary_from_iter(lfs.iter().map(|lf| {
|
101
105
|
let df = lf.ldf.borrow().clone().collect().unwrap();
|
102
106
|
RbDataFrame::new(df)
|
103
107
|
})))
|
@@ -279,7 +283,8 @@ pub fn fold(
|
|
279
283
|
}
|
280
284
|
|
281
285
|
pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
|
282
|
-
|
286
|
+
let ruby = Ruby::get_with(value);
|
287
|
+
if value.is_kind_of(ruby.class_true_class()) || value.is_kind_of(ruby.class_false_class()) {
|
283
288
|
Ok(dsl::lit(bool::try_convert(value)?).into())
|
284
289
|
} else if let Some(v) = Integer::from_value(value) {
|
285
290
|
match v.to_i64() {
|
@@ -298,7 +303,7 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
|
|
298
303
|
} else if let Some(v) = Float::from_value(value) {
|
299
304
|
Ok(dsl::lit(v.to_f64()).into())
|
300
305
|
} else if let Some(v) = RString::from_value(value) {
|
301
|
-
if v.enc_get() ==
|
306
|
+
if v.enc_get() == ruby.utf8_encindex() {
|
302
307
|
Ok(dsl::lit(v.to_string()?).into())
|
303
308
|
} else {
|
304
309
|
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, Value};
|
1
|
+
use magnus::{IntoValue, Ruby, Value};
|
2
2
|
use polars_core;
|
3
3
|
use polars_core::POOL;
|
4
4
|
use polars_core::fmt::FloatFmt;
|
@@ -7,8 +7,8 @@ use polars_core::prelude::IDX_DTYPE;
|
|
7
7
|
use crate::conversion::Wrap;
|
8
8
|
use crate::{RbResult, RbValueError};
|
9
9
|
|
10
|
-
pub fn get_index_type() -> Value {
|
11
|
-
Wrap(IDX_DTYPE).
|
10
|
+
pub fn get_index_type(ruby: &Ruby) -> Value {
|
11
|
+
Wrap(IDX_DTYPE).into_value_with(ruby)
|
12
12
|
}
|
13
13
|
|
14
14
|
pub fn thread_pool_size() -> usize {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
use crate::RbResult;
|
2
|
-
use magnus::{
|
2
|
+
use magnus::{Ruby, Value};
|
3
3
|
|
4
4
|
pub fn enable_string_cache() {
|
5
5
|
// The string cache no longer exists.
|
@@ -18,7 +18,7 @@ pub fn using_string_cache() -> bool {
|
|
18
18
|
pub struct RbStringCacheHolder {}
|
19
19
|
|
20
20
|
impl RbStringCacheHolder {
|
21
|
-
pub fn hold() -> RbResult<Value> {
|
22
|
-
|
21
|
+
pub fn hold(ruby: &Ruby) -> RbResult<Value> {
|
22
|
+
ruby.yield_splat(ruby.ary_new())
|
23
23
|
}
|
24
24
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use arrow::datatypes::ArrowDataType;
|
2
2
|
use arrow::ffi;
|
3
|
-
use magnus::{IntoValue, Value};
|
3
|
+
use magnus::{IntoValue, Ruby, Value};
|
4
4
|
use polars::datatypes::CompatLevel;
|
5
5
|
use polars::frame::DataFrame;
|
6
6
|
use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
|
@@ -20,11 +20,11 @@ impl RbArrowArrayStream {
|
|
20
20
|
}
|
21
21
|
}
|
22
22
|
|
23
|
-
pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
|
23
|
+
pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value> {
|
24
24
|
let iter = Box::new(DataFrameStreamIterator::new(df));
|
25
25
|
let field = iter.field();
|
26
26
|
let stream = ffi::export_iterator(iter, field);
|
27
|
-
Ok(RbArrowArrayStream { stream }.
|
27
|
+
Ok(RbArrowArrayStream { stream }.into_value_with(ruby))
|
28
28
|
}
|
29
29
|
|
30
30
|
pub struct DataFrameStreamIterator {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, Module,
|
1
|
+
use magnus::{IntoValue, Module, RClass, RModule, Ruby, Value, prelude::*};
|
2
2
|
|
3
3
|
use crate::RbResult;
|
4
4
|
|
@@ -44,9 +44,10 @@ impl<T: Element> RbArray1<T> {
|
|
44
44
|
where
|
45
45
|
I: IntoIterator<Item = T>,
|
46
46
|
{
|
47
|
-
|
47
|
+
let ruby = Ruby::get().unwrap();
|
48
|
+
ruby.class_object()
|
48
49
|
.const_get::<_, RModule>("Numo")?
|
49
50
|
.const_get::<_, RClass>(T::class_name())?
|
50
|
-
.funcall("cast", (
|
51
|
+
.funcall("cast", (ruby.ary_from_iter(values),))
|
51
52
|
}
|
52
53
|
}
|
data/ext/polars/src/io/mod.rs
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
use std::sync::Arc;
|
2
2
|
|
3
3
|
use magnus::{TryConvert, Value, value::ReprValue};
|
4
|
+
use polars::prelude::default_values::DefaultFieldValues;
|
4
5
|
use polars::prelude::deletion::DeletionFilesList;
|
5
6
|
use polars::prelude::{
|
6
7
|
CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
|
@@ -48,6 +49,8 @@ impl RbScanOptions {
|
|
48
49
|
let deletion_files: Option<Wrap<DeletionFilesList>> =
|
49
50
|
self.0.funcall("deletion_files", ())?;
|
50
51
|
let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
|
52
|
+
let default_values: Option<Wrap<DefaultFieldValues>> =
|
53
|
+
self.0.funcall("default_values", ())?;
|
51
54
|
|
52
55
|
let cloud_options = storage_options;
|
53
56
|
|
@@ -95,6 +98,9 @@ impl RbScanOptions {
|
|
95
98
|
include_file_paths: include_file_paths.map(|x| x.0),
|
96
99
|
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
|
97
100
|
column_mapping: column_mapping.map(|x| x.0),
|
101
|
+
default_values: default_values
|
102
|
+
.map(|x| x.0)
|
103
|
+
.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
|
98
104
|
};
|
99
105
|
|
100
106
|
Ok(unified_scan_args)
|