polars-df 0.10.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -15,19 +15,14 @@ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueEr
|
|
15
15
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
16
16
|
#[derive(Clone)]
|
17
17
|
pub struct RbLazyFrame {
|
18
|
-
pub ldf: LazyFrame
|
19
|
-
}
|
20
|
-
|
21
|
-
impl RbLazyFrame {
|
22
|
-
fn get_schema(&self) -> RbResult<SchemaRef> {
|
23
|
-
let schema = self.ldf.schema().map_err(RbPolarsErr::from)?;
|
24
|
-
Ok(schema)
|
25
|
-
}
|
18
|
+
pub ldf: RefCell<LazyFrame>,
|
26
19
|
}
|
27
20
|
|
28
21
|
impl From<LazyFrame> for RbLazyFrame {
|
29
22
|
fn from(ldf: LazyFrame) -> Self {
|
30
|
-
RbLazyFrame {
|
23
|
+
RbLazyFrame {
|
24
|
+
ldf: RefCell::new(ldf),
|
25
|
+
}
|
31
26
|
}
|
32
27
|
}
|
33
28
|
|
@@ -48,7 +43,7 @@ impl RbLazyFrame {
|
|
48
43
|
// in this scope
|
49
44
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
50
45
|
|
51
|
-
let lp = serde_json::from_str::<
|
46
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
52
47
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
53
48
|
Ok(LazyFrame::from(lp).into())
|
54
49
|
}
|
@@ -63,10 +58,13 @@ impl RbLazyFrame {
|
|
63
58
|
row_index: Option<(String, IdxSize)>,
|
64
59
|
) -> RbResult<Self> {
|
65
60
|
let batch_size = batch_size.map(|v| v.0);
|
66
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
61
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
62
|
+
name: Arc::from(name.as_str()),
|
63
|
+
offset,
|
64
|
+
});
|
67
65
|
|
68
66
|
let lf = LazyJsonLineReader::new(path)
|
69
|
-
.with_infer_schema_length(infer_schema_length)
|
67
|
+
.with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
|
70
68
|
.with_batch_size(batch_size)
|
71
69
|
.with_n_rows(n_rows)
|
72
70
|
.low_memory(low_memory)
|
@@ -107,7 +105,10 @@ impl RbLazyFrame {
|
|
107
105
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
108
106
|
let separator = separator.as_bytes()[0];
|
109
107
|
let eol_char = eol_char.as_bytes()[0];
|
110
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
108
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
109
|
+
name: Arc::from(name.as_str()),
|
110
|
+
offset,
|
111
|
+
});
|
111
112
|
|
112
113
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
113
114
|
overwrite_dtype
|
@@ -119,17 +120,17 @@ impl RbLazyFrame {
|
|
119
120
|
let r = LazyCsvReader::new(path)
|
120
121
|
.with_infer_schema_length(infer_schema_length)
|
121
122
|
.with_separator(separator)
|
122
|
-
.
|
123
|
+
.with_has_header(has_header)
|
123
124
|
.with_ignore_errors(ignore_errors)
|
124
125
|
.with_skip_rows(skip_rows)
|
125
126
|
.with_n_rows(n_rows)
|
126
127
|
.with_cache(cache)
|
127
|
-
.with_dtype_overwrite(overwrite_dtype.
|
128
|
+
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
128
129
|
// TODO add with_schema
|
129
|
-
.
|
130
|
+
.with_low_memory(low_memory)
|
130
131
|
.with_comment_prefix(comment_prefix.as_deref())
|
131
132
|
.with_quote_char(quote_char)
|
132
|
-
.
|
133
|
+
.with_eol_char(eol_char)
|
133
134
|
.with_rechunk(rechunk)
|
134
135
|
.with_skip_rows_after_header(skip_rows_after_header)
|
135
136
|
.with_encoding(encoding.0)
|
@@ -137,7 +138,7 @@ impl RbLazyFrame {
|
|
137
138
|
.with_try_parse_dates(try_parse_dates)
|
138
139
|
.with_null_values(null_values)
|
139
140
|
// TODO add with_missing_is_null
|
140
|
-
.
|
141
|
+
.with_truncate_ragged_lines(truncate_ragged_lines);
|
141
142
|
|
142
143
|
if let Some(_lambda) = with_schema_modify {
|
143
144
|
todo!();
|
@@ -157,8 +158,10 @@ impl RbLazyFrame {
|
|
157
158
|
row_index: Option<(String, IdxSize)>,
|
158
159
|
low_memory: bool,
|
159
160
|
use_statistics: bool,
|
160
|
-
hive_partitioning: bool
|
161
|
+
hive_partitioning: Option<bool>,
|
161
162
|
hive_schema: Option<Wrap<Schema>>,
|
163
|
+
try_parse_hive_dates: bool,
|
164
|
+
glob: bool,
|
162
165
|
) -> RbResult<Self> {
|
163
166
|
let parallel = parallel.0;
|
164
167
|
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
@@ -171,10 +174,15 @@ impl RbLazyFrame {
|
|
171
174
|
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
172
175
|
};
|
173
176
|
|
174
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
177
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
178
|
+
name: Arc::from(name.as_str()),
|
179
|
+
offset,
|
180
|
+
});
|
175
181
|
let hive_options = HiveOptions {
|
176
182
|
enabled: hive_partitioning,
|
183
|
+
hive_start_idx: 0,
|
177
184
|
schema: hive_schema,
|
185
|
+
try_parse_dates: try_parse_hive_dates,
|
178
186
|
};
|
179
187
|
|
180
188
|
let args = ScanArgsParquet {
|
@@ -187,6 +195,7 @@ impl RbLazyFrame {
|
|
187
195
|
cloud_options: None,
|
188
196
|
use_statistics,
|
189
197
|
hive_options,
|
198
|
+
glob,
|
190
199
|
};
|
191
200
|
|
192
201
|
let lf = if path.is_some() {
|
@@ -206,7 +215,11 @@ impl RbLazyFrame {
|
|
206
215
|
row_index: Option<(String, IdxSize)>,
|
207
216
|
memory_map: bool,
|
208
217
|
) -> RbResult<Self> {
|
209
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
218
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
219
|
+
name: Arc::from(name.as_str()),
|
220
|
+
offset,
|
221
|
+
});
|
222
|
+
|
210
223
|
let args = ScanArgsIpc {
|
211
224
|
n_rows,
|
212
225
|
cache,
|
@@ -221,18 +234,23 @@ impl RbLazyFrame {
|
|
221
234
|
|
222
235
|
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
223
236
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
224
|
-
serde_json::to_writer(file, &self.ldf.logical_plan)
|
237
|
+
serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
|
225
238
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
226
239
|
Ok(())
|
227
240
|
}
|
228
241
|
|
229
|
-
pub fn describe_plan(&self) -> String {
|
230
|
-
self.ldf
|
242
|
+
pub fn describe_plan(&self) -> RbResult<String> {
|
243
|
+
self.ldf
|
244
|
+
.borrow()
|
245
|
+
.describe_plan()
|
246
|
+
.map_err(RbPolarsErr::from)
|
247
|
+
.map_err(Into::into)
|
231
248
|
}
|
232
249
|
|
233
250
|
pub fn describe_optimized_plan(&self) -> RbResult<String> {
|
234
251
|
let result = self
|
235
252
|
.ldf
|
253
|
+
.borrow()
|
236
254
|
.describe_optimized_plan()
|
237
255
|
.map_err(RbPolarsErr::from)?;
|
238
256
|
Ok(result)
|
@@ -251,7 +269,7 @@ impl RbLazyFrame {
|
|
251
269
|
allow_streaming: bool,
|
252
270
|
_eager: bool,
|
253
271
|
) -> RbLazyFrame {
|
254
|
-
let ldf = self.ldf.clone();
|
272
|
+
let ldf = self.ldf.borrow().clone();
|
255
273
|
let mut ldf = ldf
|
256
274
|
.with_type_coercion(type_coercion)
|
257
275
|
.with_predicate_pushdown(predicate_pushdown)
|
@@ -275,12 +293,12 @@ impl RbLazyFrame {
|
|
275
293
|
maintain_order: bool,
|
276
294
|
multithreaded: bool,
|
277
295
|
) -> Self {
|
278
|
-
let ldf = self.ldf.clone();
|
296
|
+
let ldf = self.ldf.borrow().clone();
|
279
297
|
ldf.sort(
|
280
298
|
[&by_column],
|
281
299
|
SortMultipleOptions {
|
282
300
|
descending: vec![descending],
|
283
|
-
nulls_last,
|
301
|
+
nulls_last: vec![nulls_last],
|
284
302
|
multithreaded,
|
285
303
|
maintain_order,
|
286
304
|
},
|
@@ -292,11 +310,11 @@ impl RbLazyFrame {
|
|
292
310
|
&self,
|
293
311
|
by: RArray,
|
294
312
|
descending: Vec<bool>,
|
295
|
-
nulls_last: bool
|
313
|
+
nulls_last: Vec<bool>,
|
296
314
|
maintain_order: bool,
|
297
315
|
multithreaded: bool,
|
298
316
|
) -> RbResult<Self> {
|
299
|
-
let ldf = self.ldf.clone();
|
317
|
+
let ldf = self.ldf.borrow().clone();
|
300
318
|
let exprs = rb_exprs_to_exprs(by)?;
|
301
319
|
Ok(ldf
|
302
320
|
.sort_by_exprs(
|
@@ -312,12 +330,12 @@ impl RbLazyFrame {
|
|
312
330
|
}
|
313
331
|
|
314
332
|
pub fn cache(&self) -> Self {
|
315
|
-
let ldf = self.ldf.clone();
|
333
|
+
let ldf = self.ldf.borrow().clone();
|
316
334
|
ldf.cache().into()
|
317
335
|
}
|
318
336
|
|
319
337
|
pub fn collect(&self) -> RbResult<RbDataFrame> {
|
320
|
-
let ldf = self.ldf.clone();
|
338
|
+
let ldf = self.ldf.borrow().clone();
|
321
339
|
let df = ldf.collect().map_err(RbPolarsErr::from)?;
|
322
340
|
Ok(df.into())
|
323
341
|
}
|
@@ -328,7 +346,7 @@ impl RbLazyFrame {
|
|
328
346
|
path: PathBuf,
|
329
347
|
compression: String,
|
330
348
|
compression_level: Option<i32>,
|
331
|
-
statistics:
|
349
|
+
statistics: Wrap<StatisticsOptions>,
|
332
350
|
row_group_size: Option<usize>,
|
333
351
|
data_pagesize_limit: Option<usize>,
|
334
352
|
maintain_order: bool,
|
@@ -337,13 +355,13 @@ impl RbLazyFrame {
|
|
337
355
|
|
338
356
|
let options = ParquetWriteOptions {
|
339
357
|
compression,
|
340
|
-
statistics,
|
358
|
+
statistics: statistics.0,
|
341
359
|
row_group_size,
|
342
360
|
data_pagesize_limit,
|
343
361
|
maintain_order,
|
344
362
|
};
|
345
363
|
|
346
|
-
let ldf = self.ldf.clone();
|
364
|
+
let ldf = self.ldf.borrow().clone();
|
347
365
|
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
348
366
|
Ok(())
|
349
367
|
}
|
@@ -359,7 +377,7 @@ impl RbLazyFrame {
|
|
359
377
|
maintain_order,
|
360
378
|
};
|
361
379
|
|
362
|
-
let ldf = self.ldf.clone();
|
380
|
+
let ldf = self.ldf.borrow().clone();
|
363
381
|
ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
|
364
382
|
Ok(())
|
365
383
|
}
|
@@ -377,6 +395,7 @@ impl RbLazyFrame {
|
|
377
395
|
datetime_format: Option<String>,
|
378
396
|
date_format: Option<String>,
|
379
397
|
time_format: Option<String>,
|
398
|
+
float_scientific: Option<bool>,
|
380
399
|
float_precision: Option<usize>,
|
381
400
|
null_value: Option<String>,
|
382
401
|
quote_style: Option<Wrap<QuoteStyle>>,
|
@@ -389,6 +408,7 @@ impl RbLazyFrame {
|
|
389
408
|
date_format,
|
390
409
|
time_format,
|
391
410
|
datetime_format,
|
411
|
+
float_scientific,
|
392
412
|
float_precision,
|
393
413
|
separator,
|
394
414
|
quote_char,
|
@@ -405,7 +425,7 @@ impl RbLazyFrame {
|
|
405
425
|
serialize_options,
|
406
426
|
};
|
407
427
|
|
408
|
-
let ldf = self.ldf.clone();
|
428
|
+
let ldf = self.ldf.borrow().clone();
|
409
429
|
ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
|
410
430
|
Ok(())
|
411
431
|
}
|
@@ -413,36 +433,36 @@ impl RbLazyFrame {
|
|
413
433
|
pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
|
414
434
|
let options = JsonWriterOptions { maintain_order };
|
415
435
|
|
416
|
-
let ldf = self.ldf.clone();
|
436
|
+
let ldf = self.ldf.borrow().clone();
|
417
437
|
ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
|
418
438
|
Ok(())
|
419
439
|
}
|
420
440
|
|
421
441
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
422
|
-
let ldf = self.ldf.clone();
|
442
|
+
let ldf = self.ldf.borrow().clone();
|
423
443
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
424
444
|
Ok(df.into())
|
425
445
|
}
|
426
446
|
|
427
447
|
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
428
|
-
let ldf = self.ldf.clone();
|
448
|
+
let ldf = self.ldf.borrow().clone();
|
429
449
|
ldf.filter(predicate.inner.clone()).into()
|
430
450
|
}
|
431
451
|
|
432
452
|
pub fn select(&self, exprs: RArray) -> RbResult<Self> {
|
433
|
-
let ldf = self.ldf.clone();
|
453
|
+
let ldf = self.ldf.borrow().clone();
|
434
454
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
435
455
|
Ok(ldf.select(exprs).into())
|
436
456
|
}
|
437
457
|
|
438
458
|
pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
|
439
|
-
let ldf = self.ldf.clone();
|
459
|
+
let ldf = self.ldf.borrow().clone();
|
440
460
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
441
461
|
Ok(ldf.select_seq(exprs).into())
|
442
462
|
}
|
443
463
|
|
444
464
|
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
445
|
-
let ldf = self.ldf.clone();
|
465
|
+
let ldf = self.ldf.borrow().clone();
|
446
466
|
let by = rb_exprs_to_exprs(by)?;
|
447
467
|
let lazy_gb = if maintain_order {
|
448
468
|
ldf.group_by_stable(by)
|
@@ -461,10 +481,9 @@ impl RbLazyFrame {
|
|
461
481
|
offset: String,
|
462
482
|
closed: Wrap<ClosedWindow>,
|
463
483
|
by: RArray,
|
464
|
-
check_sorted: bool,
|
465
484
|
) -> RbResult<RbLazyGroupBy> {
|
466
485
|
let closed_window = closed.0;
|
467
|
-
let ldf = self.ldf.clone();
|
486
|
+
let ldf = self.ldf.borrow().clone();
|
468
487
|
let by = rb_exprs_to_exprs(by)?;
|
469
488
|
let lazy_gb = ldf.rolling(
|
470
489
|
index_column.inner.clone(),
|
@@ -474,7 +493,6 @@ impl RbLazyFrame {
|
|
474
493
|
period: Duration::parse(&period),
|
475
494
|
offset: Duration::parse(&offset),
|
476
495
|
closed_window,
|
477
|
-
check_sorted,
|
478
496
|
},
|
479
497
|
);
|
480
498
|
|
@@ -495,11 +513,10 @@ impl RbLazyFrame {
|
|
495
513
|
closed: Wrap<ClosedWindow>,
|
496
514
|
by: RArray,
|
497
515
|
start_by: Wrap<StartBy>,
|
498
|
-
check_sorted: bool,
|
499
516
|
) -> RbResult<RbLazyGroupBy> {
|
500
517
|
let closed_window = closed.0;
|
501
518
|
let by = rb_exprs_to_exprs(by)?;
|
502
|
-
let ldf = self.ldf.clone();
|
519
|
+
let ldf = self.ldf.borrow().clone();
|
503
520
|
let lazy_gb = ldf.group_by_dynamic(
|
504
521
|
index_column.inner.clone(),
|
505
522
|
by,
|
@@ -511,7 +528,6 @@ impl RbLazyFrame {
|
|
511
528
|
include_boundaries,
|
512
529
|
closed_window,
|
513
530
|
start_by: start_by.0,
|
514
|
-
check_sorted,
|
515
531
|
..Default::default()
|
516
532
|
},
|
517
533
|
);
|
@@ -523,14 +539,14 @@ impl RbLazyFrame {
|
|
523
539
|
|
524
540
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
525
541
|
let contexts = contexts
|
526
|
-
.
|
527
|
-
.map(
|
542
|
+
.into_iter()
|
543
|
+
.map(TryConvert::try_convert)
|
528
544
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
529
545
|
let contexts = contexts
|
530
546
|
.into_iter()
|
531
|
-
.map(|ldf| ldf.ldf.clone())
|
547
|
+
.map(|ldf| ldf.ldf.borrow().clone())
|
532
548
|
.collect::<Vec<_>>();
|
533
|
-
Ok(self.ldf.clone().with_context(contexts).into())
|
549
|
+
Ok(self.ldf.borrow().clone().with_context(contexts).into())
|
534
550
|
}
|
535
551
|
|
536
552
|
#[allow(clippy::too_many_arguments)]
|
@@ -548,8 +564,8 @@ impl RbLazyFrame {
|
|
548
564
|
tolerance: Option<Wrap<AnyValue<'_>>>,
|
549
565
|
tolerance_str: Option<String>,
|
550
566
|
) -> RbResult<Self> {
|
551
|
-
let ldf = self.ldf.clone();
|
552
|
-
let other = other.ldf.clone();
|
567
|
+
let ldf = self.ldf.borrow().clone();
|
568
|
+
let other = other.ldf.borrow().clone();
|
553
569
|
let left_on = left_on.inner.clone();
|
554
570
|
let right_on = right_on.inner.clone();
|
555
571
|
Ok(ldf
|
@@ -583,8 +599,8 @@ impl RbLazyFrame {
|
|
583
599
|
how: Wrap<JoinType>,
|
584
600
|
suffix: String,
|
585
601
|
) -> RbResult<Self> {
|
586
|
-
let ldf = self.ldf.clone();
|
587
|
-
let other = other.ldf.clone();
|
602
|
+
let ldf = self.ldf.borrow().clone();
|
603
|
+
let other = other.ldf.borrow().clone();
|
588
604
|
let left_on = rb_exprs_to_exprs(left_on)?;
|
589
605
|
let right_on = rb_exprs_to_exprs(right_on)?;
|
590
606
|
|
@@ -603,32 +619,32 @@ impl RbLazyFrame {
|
|
603
619
|
}
|
604
620
|
|
605
621
|
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
606
|
-
let ldf = self.ldf.clone();
|
622
|
+
let ldf = self.ldf.borrow().clone();
|
607
623
|
ldf.with_column(expr.inner.clone()).into()
|
608
624
|
}
|
609
625
|
|
610
626
|
pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
|
611
|
-
let ldf = self.ldf.clone();
|
627
|
+
let ldf = self.ldf.borrow().clone();
|
612
628
|
Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
|
613
629
|
}
|
614
630
|
|
615
631
|
pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
|
616
|
-
let ldf = self.ldf.clone();
|
632
|
+
let ldf = self.ldf.borrow().clone();
|
617
633
|
Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
|
618
634
|
}
|
619
635
|
|
620
636
|
pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
|
621
|
-
let ldf = self.ldf.clone();
|
637
|
+
let ldf = self.ldf.borrow().clone();
|
622
638
|
ldf.rename(existing, new).into()
|
623
639
|
}
|
624
640
|
|
625
641
|
pub fn reverse(&self) -> Self {
|
626
|
-
let ldf = self.ldf.clone();
|
642
|
+
let ldf = self.ldf.borrow().clone();
|
627
643
|
ldf.reverse().into()
|
628
644
|
}
|
629
645
|
|
630
646
|
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
631
|
-
let lf = self.ldf.clone();
|
647
|
+
let lf = self.ldf.borrow().clone();
|
632
648
|
let out = match fill_value {
|
633
649
|
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
634
650
|
None => lf.shift(n.inner.clone()),
|
@@ -637,72 +653,70 @@ impl RbLazyFrame {
|
|
637
653
|
}
|
638
654
|
|
639
655
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|
640
|
-
let ldf = self.ldf.clone();
|
656
|
+
let ldf = self.ldf.borrow().clone();
|
641
657
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
642
658
|
}
|
643
659
|
|
644
|
-
pub fn min(&self) ->
|
645
|
-
let ldf = self.ldf.clone();
|
646
|
-
let out = ldf.min()
|
647
|
-
|
660
|
+
pub fn min(&self) -> Self {
|
661
|
+
let ldf = self.ldf.borrow().clone();
|
662
|
+
let out = ldf.min();
|
663
|
+
out.into()
|
648
664
|
}
|
649
665
|
|
650
|
-
pub fn max(&self) ->
|
651
|
-
let ldf = self.ldf.clone();
|
652
|
-
let out = ldf.max()
|
653
|
-
|
666
|
+
pub fn max(&self) -> Self {
|
667
|
+
let ldf = self.ldf.borrow().clone();
|
668
|
+
let out = ldf.max();
|
669
|
+
out.into()
|
654
670
|
}
|
655
671
|
|
656
|
-
pub fn sum(&self) ->
|
657
|
-
let ldf = self.ldf.clone();
|
658
|
-
let out = ldf.sum()
|
659
|
-
|
672
|
+
pub fn sum(&self) -> Self {
|
673
|
+
let ldf = self.ldf.borrow().clone();
|
674
|
+
let out = ldf.sum();
|
675
|
+
out.into()
|
660
676
|
}
|
661
677
|
|
662
|
-
pub fn mean(&self) ->
|
663
|
-
let ldf = self.ldf.clone();
|
664
|
-
let out = ldf.mean()
|
665
|
-
|
678
|
+
pub fn mean(&self) -> Self {
|
679
|
+
let ldf = self.ldf.borrow().clone();
|
680
|
+
let out = ldf.mean();
|
681
|
+
out.into()
|
666
682
|
}
|
667
683
|
|
668
|
-
pub fn std(&self, ddof: u8) ->
|
669
|
-
let ldf = self.ldf.clone();
|
670
|
-
let out = ldf.std(ddof)
|
671
|
-
|
684
|
+
pub fn std(&self, ddof: u8) -> Self {
|
685
|
+
let ldf = self.ldf.borrow().clone();
|
686
|
+
let out = ldf.std(ddof);
|
687
|
+
out.into()
|
672
688
|
}
|
673
689
|
|
674
|
-
pub fn var(&self, ddof: u8) ->
|
675
|
-
let ldf = self.ldf.clone();
|
676
|
-
let out = ldf.var(ddof)
|
677
|
-
|
690
|
+
pub fn var(&self, ddof: u8) -> Self {
|
691
|
+
let ldf = self.ldf.borrow().clone();
|
692
|
+
let out = ldf.var(ddof);
|
693
|
+
out.into()
|
678
694
|
}
|
679
695
|
|
680
|
-
pub fn median(&self) ->
|
681
|
-
let ldf = self.ldf.clone();
|
682
|
-
let out = ldf.median()
|
683
|
-
|
696
|
+
pub fn median(&self) -> Self {
|
697
|
+
let ldf = self.ldf.borrow().clone();
|
698
|
+
let out = ldf.median();
|
699
|
+
out.into()
|
684
700
|
}
|
685
701
|
|
686
702
|
pub fn quantile(
|
687
703
|
&self,
|
688
704
|
quantile: &RbExpr,
|
689
705
|
interpolation: Wrap<QuantileInterpolOptions>,
|
690
|
-
) ->
|
691
|
-
let ldf = self.ldf.clone();
|
692
|
-
let out = ldf
|
693
|
-
|
694
|
-
.map_err(RbPolarsErr::from)?;
|
695
|
-
Ok(out.into())
|
706
|
+
) -> Self {
|
707
|
+
let ldf = self.ldf.borrow().clone();
|
708
|
+
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
709
|
+
out.into()
|
696
710
|
}
|
697
711
|
|
698
712
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
699
|
-
let ldf = self.ldf.clone();
|
713
|
+
let ldf = self.ldf.borrow().clone();
|
700
714
|
let column = rb_exprs_to_exprs(column)?;
|
701
715
|
Ok(ldf.explode(column).into())
|
702
716
|
}
|
703
717
|
|
704
718
|
pub fn null_count(&self) -> Self {
|
705
|
-
let ldf = self.ldf.clone();
|
719
|
+
let ldf = self.ldf.borrow().clone();
|
706
720
|
ldf.null_count().into()
|
707
721
|
}
|
708
722
|
|
@@ -712,7 +726,7 @@ impl RbLazyFrame {
|
|
712
726
|
subset: Option<Vec<String>>,
|
713
727
|
keep: Wrap<UniqueKeepStrategy>,
|
714
728
|
) -> RbResult<Self> {
|
715
|
-
let ldf = self.ldf.clone();
|
729
|
+
let ldf = self.ldf.borrow().clone();
|
716
730
|
Ok(match maintain_order {
|
717
731
|
true => ldf.unique_stable(subset, keep.0),
|
718
732
|
false => ldf.unique(subset, keep.0),
|
@@ -721,75 +735,63 @@ impl RbLazyFrame {
|
|
721
735
|
}
|
722
736
|
|
723
737
|
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
|
724
|
-
let ldf = self.ldf.clone();
|
738
|
+
let ldf = self.ldf.borrow().clone();
|
725
739
|
ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
|
726
740
|
.into()
|
727
741
|
}
|
728
742
|
|
729
743
|
pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
|
730
|
-
let ldf = self.ldf.clone();
|
744
|
+
let ldf = self.ldf.borrow().clone();
|
731
745
|
ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
|
732
746
|
}
|
733
747
|
|
734
748
|
pub fn tail(&self, n: IdxSize) -> Self {
|
735
|
-
let ldf = self.ldf.clone();
|
749
|
+
let ldf = self.ldf.borrow().clone();
|
736
750
|
ldf.tail(n).into()
|
737
751
|
}
|
738
752
|
|
739
|
-
pub fn
|
753
|
+
pub fn unpivot(
|
740
754
|
&self,
|
741
|
-
|
742
|
-
|
755
|
+
on: Vec<String>,
|
756
|
+
index: Vec<String>,
|
743
757
|
value_name: Option<String>,
|
744
758
|
variable_name: Option<String>,
|
745
759
|
streamable: bool,
|
746
760
|
) -> Self {
|
747
|
-
let args =
|
748
|
-
|
749
|
-
|
761
|
+
let args = UnpivotArgs {
|
762
|
+
on: strings_to_smartstrings(on),
|
763
|
+
index: strings_to_smartstrings(index),
|
750
764
|
value_name: value_name.map(|s| s.into()),
|
751
765
|
variable_name: variable_name.map(|s| s.into()),
|
752
766
|
streamable,
|
753
767
|
};
|
754
768
|
|
755
|
-
let ldf = self.ldf.clone();
|
756
|
-
ldf.
|
769
|
+
let ldf = self.ldf.borrow().clone();
|
770
|
+
ldf.unpivot(args).into()
|
757
771
|
}
|
758
772
|
|
759
773
|
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
760
|
-
let ldf = self.ldf.clone();
|
774
|
+
let ldf = self.ldf.borrow().clone();
|
761
775
|
ldf.with_row_index(&name, offset).into()
|
762
776
|
}
|
763
777
|
|
764
778
|
pub fn drop(&self, cols: Vec<String>) -> Self {
|
765
|
-
let ldf = self.ldf.clone();
|
779
|
+
let ldf = self.ldf.borrow().clone();
|
766
780
|
ldf.drop(cols).into()
|
767
781
|
}
|
768
782
|
|
769
783
|
pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
|
770
|
-
self.ldf.clone().cast_all(dtype.0, strict).into()
|
784
|
+
self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
|
771
785
|
}
|
772
786
|
|
773
787
|
pub fn clone(&self) -> Self {
|
774
|
-
self.ldf.clone().into()
|
788
|
+
self.ldf.borrow().clone().into()
|
775
789
|
}
|
776
790
|
|
777
|
-
pub fn
|
778
|
-
let schema = self.
|
779
|
-
let iter = schema.iter_names().map(|s| s.as_str());
|
780
|
-
Ok(RArray::from_iter(iter))
|
781
|
-
}
|
791
|
+
pub fn collect_schema(&self) -> RbResult<RHash> {
|
792
|
+
let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
|
782
793
|
|
783
|
-
pub fn dtypes(&self) -> RbResult<RArray> {
|
784
|
-
let schema = self.get_schema()?;
|
785
|
-
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
|
786
|
-
Ok(RArray::from_iter(iter))
|
787
|
-
}
|
788
|
-
|
789
|
-
pub fn schema(&self) -> RbResult<RHash> {
|
790
|
-
let schema = self.get_schema()?;
|
791
794
|
let schema_dict = RHash::new();
|
792
|
-
|
793
795
|
schema.iter_fields().for_each(|fld| {
|
794
796
|
// TODO remove unwrap
|
795
797
|
schema_dict
|
@@ -803,23 +805,20 @@ impl RbLazyFrame {
|
|
803
805
|
}
|
804
806
|
|
805
807
|
pub fn unnest(&self, cols: Vec<String>) -> Self {
|
806
|
-
self.ldf.clone().unnest(cols).into()
|
807
|
-
}
|
808
|
-
|
809
|
-
pub fn width(&self) -> RbResult<usize> {
|
810
|
-
Ok(self.get_schema()?.len())
|
808
|
+
self.ldf.borrow().clone().unnest(cols).into()
|
811
809
|
}
|
812
810
|
|
813
811
|
pub fn count(&self) -> Self {
|
814
|
-
let ldf = self.ldf.clone();
|
812
|
+
let ldf = self.ldf.borrow().clone();
|
815
813
|
ldf.count().into()
|
816
814
|
}
|
817
815
|
|
818
816
|
pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
|
819
817
|
let out = self
|
820
818
|
.ldf
|
819
|
+
.borrow()
|
821
820
|
.clone()
|
822
|
-
.merge_sorted(other.ldf.clone(), &key)
|
821
|
+
.merge_sorted(other.ldf.borrow().clone(), &key)
|
823
822
|
.map_err(RbPolarsErr::from)?;
|
824
823
|
Ok(out.into())
|
825
824
|
}
|