polars-df 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/lazyframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{IntoValue, RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -78,32 +78,32 @@ impl RbLazyFrame {
|
|
78
78
|
pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
|
79
79
|
// start arguments
|
80
80
|
// this pattern is needed for more than 16
|
81
|
-
let path
|
82
|
-
let
|
83
|
-
let has_header
|
84
|
-
let ignore_errors
|
85
|
-
let skip_rows
|
86
|
-
let n_rows
|
87
|
-
let cache
|
88
|
-
let overwrite_dtype
|
89
|
-
let low_memory
|
90
|
-
let comment_char
|
91
|
-
let quote_char
|
92
|
-
let null_values
|
93
|
-
let infer_schema_length
|
94
|
-
let with_schema_modify
|
95
|
-
let rechunk
|
96
|
-
let skip_rows_after_header
|
97
|
-
let encoding
|
98
|
-
let row_count
|
99
|
-
let try_parse_dates
|
100
|
-
let eol_char
|
81
|
+
let path = String::try_convert(arguments[0])?;
|
82
|
+
let separator = String::try_convert(arguments[1])?;
|
83
|
+
let has_header = bool::try_convert(arguments[2])?;
|
84
|
+
let ignore_errors = bool::try_convert(arguments[3])?;
|
85
|
+
let skip_rows = usize::try_convert(arguments[4])?;
|
86
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
87
|
+
let cache = bool::try_convert(arguments[6])?;
|
88
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
|
89
|
+
let low_memory = bool::try_convert(arguments[8])?;
|
90
|
+
let comment_char = Option::<String>::try_convert(arguments[9])?;
|
91
|
+
let quote_char = Option::<String>::try_convert(arguments[10])?;
|
92
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
|
93
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
|
94
|
+
let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
|
95
|
+
let rechunk = bool::try_convert(arguments[14])?;
|
96
|
+
let skip_rows_after_header = usize::try_convert(arguments[15])?;
|
97
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
|
98
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
|
99
|
+
let try_parse_dates = bool::try_convert(arguments[18])?;
|
100
|
+
let eol_char = String::try_convert(arguments[19])?;
|
101
101
|
// end arguments
|
102
102
|
|
103
103
|
let null_values = null_values.map(|w| w.0);
|
104
104
|
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
105
105
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
106
|
-
let
|
106
|
+
let separator = separator.as_bytes()[0];
|
107
107
|
let eol_char = eol_char.as_bytes()[0];
|
108
108
|
|
109
109
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
@@ -116,7 +116,7 @@ impl RbLazyFrame {
|
|
116
116
|
});
|
117
117
|
let r = LazyCsvReader::new(path)
|
118
118
|
.with_infer_schema_length(infer_schema_length)
|
119
|
-
.
|
119
|
+
.with_separator(separator)
|
120
120
|
.has_header(has_header)
|
121
121
|
.with_ignore_errors(ignore_errors)
|
122
122
|
.with_skip_rows(skip_rows)
|
@@ -151,6 +151,7 @@ impl RbLazyFrame {
|
|
151
151
|
row_count: Option<(String, IdxSize)>,
|
152
152
|
low_memory: bool,
|
153
153
|
use_statistics: bool,
|
154
|
+
hive_partitioning: bool,
|
154
155
|
) -> RbResult<Self> {
|
155
156
|
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
156
157
|
let args = ScanArgsParquet {
|
@@ -163,6 +164,7 @@ impl RbLazyFrame {
|
|
163
164
|
// TODO support cloud options
|
164
165
|
cloud_options: None,
|
165
166
|
use_statistics,
|
167
|
+
hive_partitioning,
|
166
168
|
};
|
167
169
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
168
170
|
Ok(lf.into())
|
@@ -217,6 +219,7 @@ impl RbLazyFrame {
|
|
217
219
|
slice_pushdown: bool,
|
218
220
|
cse: bool,
|
219
221
|
allow_streaming: bool,
|
222
|
+
_eager: bool,
|
220
223
|
) -> RbLazyFrame {
|
221
224
|
let ldf = self.ldf.clone();
|
222
225
|
let ldf = ldf
|
@@ -224,13 +227,20 @@ impl RbLazyFrame {
|
|
224
227
|
.with_predicate_pushdown(predicate_pushdown)
|
225
228
|
.with_simplify_expr(simplify_expr)
|
226
229
|
.with_slice_pushdown(slice_pushdown)
|
227
|
-
.
|
230
|
+
.with_comm_subplan_elim(cse)
|
228
231
|
.with_streaming(allow_streaming)
|
232
|
+
._with_eager(_eager)
|
229
233
|
.with_projection_pushdown(projection_pushdown);
|
230
234
|
ldf.into()
|
231
235
|
}
|
232
236
|
|
233
|
-
pub fn sort(
|
237
|
+
pub fn sort(
|
238
|
+
&self,
|
239
|
+
by_column: String,
|
240
|
+
reverse: bool,
|
241
|
+
nulls_last: bool,
|
242
|
+
maintain_order: bool,
|
243
|
+
) -> Self {
|
234
244
|
let ldf = self.ldf.clone();
|
235
245
|
ldf.sort(
|
236
246
|
&by_column,
|
@@ -238,6 +248,7 @@ impl RbLazyFrame {
|
|
238
248
|
descending: reverse,
|
239
249
|
nulls_last,
|
240
250
|
multithreaded: true,
|
251
|
+
maintain_order,
|
241
252
|
},
|
242
253
|
)
|
243
254
|
.into()
|
@@ -248,10 +259,13 @@ impl RbLazyFrame {
|
|
248
259
|
by_column: RArray,
|
249
260
|
reverse: Vec<bool>,
|
250
261
|
nulls_last: bool,
|
262
|
+
maintain_order: bool,
|
251
263
|
) -> RbResult<Self> {
|
252
264
|
let ldf = self.ldf.clone();
|
253
265
|
let exprs = rb_exprs_to_exprs(by_column)?;
|
254
|
-
Ok(ldf
|
266
|
+
Ok(ldf
|
267
|
+
.sort_by_exprs(exprs, reverse, nulls_last, maintain_order)
|
268
|
+
.into())
|
255
269
|
}
|
256
270
|
|
257
271
|
pub fn cache(&self) -> Self {
|
@@ -308,31 +322,32 @@ impl RbLazyFrame {
|
|
308
322
|
Ok(ldf.select(exprs).into())
|
309
323
|
}
|
310
324
|
|
311
|
-
pub fn
|
325
|
+
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
312
326
|
let ldf = self.ldf.clone();
|
313
327
|
let by = rb_exprs_to_exprs(by)?;
|
314
328
|
let lazy_gb = if maintain_order {
|
315
|
-
ldf.
|
329
|
+
ldf.group_by_stable(by)
|
316
330
|
} else {
|
317
|
-
ldf.
|
331
|
+
ldf.group_by(by)
|
318
332
|
};
|
319
333
|
Ok(RbLazyGroupBy {
|
320
334
|
lgb: RefCell::new(Some(lazy_gb)),
|
321
335
|
})
|
322
336
|
}
|
323
337
|
|
324
|
-
pub fn
|
338
|
+
pub fn group_by_rolling(
|
325
339
|
&self,
|
326
340
|
index_column: &RbExpr,
|
327
341
|
period: String,
|
328
342
|
offset: String,
|
329
343
|
closed: Wrap<ClosedWindow>,
|
330
344
|
by: RArray,
|
345
|
+
check_sorted: bool,
|
331
346
|
) -> RbResult<RbLazyGroupBy> {
|
332
347
|
let closed_window = closed.0;
|
333
348
|
let ldf = self.ldf.clone();
|
334
349
|
let by = rb_exprs_to_exprs(by)?;
|
335
|
-
let lazy_gb = ldf.
|
350
|
+
let lazy_gb = ldf.group_by_rolling(
|
336
351
|
index_column.inner.clone(),
|
337
352
|
by,
|
338
353
|
RollingGroupOptions {
|
@@ -340,6 +355,7 @@ impl RbLazyFrame {
|
|
340
355
|
period: Duration::parse(&period),
|
341
356
|
offset: Duration::parse(&offset),
|
342
357
|
closed_window,
|
358
|
+
check_sorted,
|
343
359
|
},
|
344
360
|
);
|
345
361
|
|
@@ -349,32 +365,34 @@ impl RbLazyFrame {
|
|
349
365
|
}
|
350
366
|
|
351
367
|
#[allow(clippy::too_many_arguments)]
|
352
|
-
pub fn
|
368
|
+
pub fn group_by_dynamic(
|
353
369
|
&self,
|
354
370
|
index_column: &RbExpr,
|
355
371
|
every: String,
|
356
372
|
period: String,
|
357
373
|
offset: String,
|
358
|
-
|
374
|
+
label: Wrap<Label>,
|
359
375
|
include_boundaries: bool,
|
360
376
|
closed: Wrap<ClosedWindow>,
|
361
377
|
by: RArray,
|
362
378
|
start_by: Wrap<StartBy>,
|
379
|
+
check_sorted: bool,
|
363
380
|
) -> RbResult<RbLazyGroupBy> {
|
364
381
|
let closed_window = closed.0;
|
365
382
|
let by = rb_exprs_to_exprs(by)?;
|
366
383
|
let ldf = self.ldf.clone();
|
367
|
-
let lazy_gb = ldf.
|
384
|
+
let lazy_gb = ldf.group_by_dynamic(
|
368
385
|
index_column.inner.clone(),
|
369
386
|
by,
|
370
387
|
DynamicGroupOptions {
|
371
388
|
every: Duration::parse(&every),
|
372
389
|
period: Duration::parse(&period),
|
373
390
|
offset: Duration::parse(&offset),
|
374
|
-
|
391
|
+
label: label.0,
|
375
392
|
include_boundaries,
|
376
393
|
closed_window,
|
377
394
|
start_by: start_by.0,
|
395
|
+
check_sorted,
|
378
396
|
..Default::default()
|
379
397
|
},
|
380
398
|
);
|
@@ -387,7 +405,7 @@ impl RbLazyFrame {
|
|
387
405
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
388
406
|
let contexts = contexts
|
389
407
|
.each()
|
390
|
-
.map(|v| v.unwrap()
|
408
|
+
.map(|v| TryConvert::try_convert(v.unwrap()))
|
391
409
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
392
410
|
let contexts = contexts
|
393
411
|
.into_iter()
|
@@ -478,14 +496,13 @@ impl RbLazyFrame {
|
|
478
496
|
ldf.reverse().into()
|
479
497
|
}
|
480
498
|
|
481
|
-
pub fn shift(&self,
|
482
|
-
let
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
|
499
|
+
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
500
|
+
let lf = self.ldf.clone();
|
501
|
+
let out = match fill_value {
|
502
|
+
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
503
|
+
None => lf.shift(n.inner.clone()),
|
504
|
+
};
|
505
|
+
out.into()
|
489
506
|
}
|
490
507
|
|
491
508
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|