polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -15,19 +15,14 @@ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueEr
|
|
15
15
|
#[magnus::wrap(class = "Polars::RbLazyFrame")]
|
16
16
|
#[derive(Clone)]
|
17
17
|
pub struct RbLazyFrame {
|
18
|
-
pub ldf: LazyFrame
|
19
|
-
}
|
20
|
-
|
21
|
-
impl RbLazyFrame {
|
22
|
-
fn get_schema(&self) -> RbResult<SchemaRef> {
|
23
|
-
let schema = self.ldf.schema().map_err(RbPolarsErr::from)?;
|
24
|
-
Ok(schema)
|
25
|
-
}
|
18
|
+
pub ldf: RefCell<LazyFrame>,
|
26
19
|
}
|
27
20
|
|
28
21
|
impl From<LazyFrame> for RbLazyFrame {
|
29
22
|
fn from(ldf: LazyFrame) -> Self {
|
30
|
-
RbLazyFrame {
|
23
|
+
RbLazyFrame {
|
24
|
+
ldf: RefCell::new(ldf),
|
25
|
+
}
|
31
26
|
}
|
32
27
|
}
|
33
28
|
|
@@ -48,7 +43,7 @@ impl RbLazyFrame {
|
|
48
43
|
// in this scope
|
49
44
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
50
45
|
|
51
|
-
let lp = serde_json::from_str::<
|
46
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
52
47
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
53
48
|
Ok(LazyFrame::from(lp).into())
|
54
49
|
}
|
@@ -63,10 +58,13 @@ impl RbLazyFrame {
|
|
63
58
|
row_index: Option<(String, IdxSize)>,
|
64
59
|
) -> RbResult<Self> {
|
65
60
|
let batch_size = batch_size.map(|v| v.0);
|
66
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
61
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
62
|
+
name: Arc::from(name.as_str()),
|
63
|
+
offset,
|
64
|
+
});
|
67
65
|
|
68
66
|
let lf = LazyJsonLineReader::new(path)
|
69
|
-
.with_infer_schema_length(infer_schema_length)
|
67
|
+
.with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
|
70
68
|
.with_batch_size(batch_size)
|
71
69
|
.with_n_rows(n_rows)
|
72
70
|
.low_memory(low_memory)
|
@@ -107,7 +105,10 @@ impl RbLazyFrame {
|
|
107
105
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
108
106
|
let separator = separator.as_bytes()[0];
|
109
107
|
let eol_char = eol_char.as_bytes()[0];
|
110
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
108
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
109
|
+
name: Arc::from(name.as_str()),
|
110
|
+
offset,
|
111
|
+
});
|
111
112
|
|
112
113
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
113
114
|
overwrite_dtype
|
@@ -119,17 +120,17 @@ impl RbLazyFrame {
|
|
119
120
|
let r = LazyCsvReader::new(path)
|
120
121
|
.with_infer_schema_length(infer_schema_length)
|
121
122
|
.with_separator(separator)
|
122
|
-
.
|
123
|
+
.with_has_header(has_header)
|
123
124
|
.with_ignore_errors(ignore_errors)
|
124
125
|
.with_skip_rows(skip_rows)
|
125
126
|
.with_n_rows(n_rows)
|
126
127
|
.with_cache(cache)
|
127
|
-
.with_dtype_overwrite(overwrite_dtype.
|
128
|
+
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
128
129
|
// TODO add with_schema
|
129
|
-
.
|
130
|
+
.with_low_memory(low_memory)
|
130
131
|
.with_comment_prefix(comment_prefix.as_deref())
|
131
132
|
.with_quote_char(quote_char)
|
132
|
-
.
|
133
|
+
.with_eol_char(eol_char)
|
133
134
|
.with_rechunk(rechunk)
|
134
135
|
.with_skip_rows_after_header(skip_rows_after_header)
|
135
136
|
.with_encoding(encoding.0)
|
@@ -137,7 +138,7 @@ impl RbLazyFrame {
|
|
137
138
|
.with_try_parse_dates(try_parse_dates)
|
138
139
|
.with_null_values(null_values)
|
139
140
|
// TODO add with_missing_is_null
|
140
|
-
.
|
141
|
+
.with_truncate_ragged_lines(truncate_ragged_lines);
|
141
142
|
|
142
143
|
if let Some(_lambda) = with_schema_modify {
|
143
144
|
todo!();
|
@@ -157,8 +158,10 @@ impl RbLazyFrame {
|
|
157
158
|
row_index: Option<(String, IdxSize)>,
|
158
159
|
low_memory: bool,
|
159
160
|
use_statistics: bool,
|
160
|
-
hive_partitioning: bool
|
161
|
+
hive_partitioning: Option<bool>,
|
161
162
|
hive_schema: Option<Wrap<Schema>>,
|
163
|
+
try_parse_hive_dates: bool,
|
164
|
+
glob: bool,
|
162
165
|
) -> RbResult<Self> {
|
163
166
|
let parallel = parallel.0;
|
164
167
|
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
@@ -171,10 +174,15 @@ impl RbLazyFrame {
|
|
171
174
|
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
172
175
|
};
|
173
176
|
|
174
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
177
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
178
|
+
name: Arc::from(name.as_str()),
|
179
|
+
offset,
|
180
|
+
});
|
175
181
|
let hive_options = HiveOptions {
|
176
182
|
enabled: hive_partitioning,
|
183
|
+
hive_start_idx: 0,
|
177
184
|
schema: hive_schema,
|
185
|
+
try_parse_dates: try_parse_hive_dates,
|
178
186
|
};
|
179
187
|
|
180
188
|
let args = ScanArgsParquet {
|
@@ -187,6 +195,7 @@ impl RbLazyFrame {
|
|
187
195
|
cloud_options: None,
|
188
196
|
use_statistics,
|
189
197
|
hive_options,
|
198
|
+
glob,
|
190
199
|
};
|
191
200
|
|
192
201
|
let lf = if path.is_some() {
|
@@ -206,7 +215,11 @@ impl RbLazyFrame {
|
|
206
215
|
row_index: Option<(String, IdxSize)>,
|
207
216
|
memory_map: bool,
|
208
217
|
) -> RbResult<Self> {
|
209
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
218
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
219
|
+
name: Arc::from(name.as_str()),
|
220
|
+
offset,
|
221
|
+
});
|
222
|
+
|
210
223
|
let args = ScanArgsIpc {
|
211
224
|
n_rows,
|
212
225
|
cache,
|
@@ -221,18 +234,23 @@ impl RbLazyFrame {
|
|
221
234
|
|
222
235
|
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
223
236
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
224
|
-
serde_json::to_writer(file, &self.ldf.logical_plan)
|
237
|
+
serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
|
225
238
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
226
239
|
Ok(())
|
227
240
|
}
|
228
241
|
|
229
|
-
pub fn describe_plan(&self) -> String {
|
230
|
-
self.ldf
|
242
|
+
pub fn describe_plan(&self) -> RbResult<String> {
|
243
|
+
self.ldf
|
244
|
+
.borrow()
|
245
|
+
.describe_plan()
|
246
|
+
.map_err(RbPolarsErr::from)
|
247
|
+
.map_err(Into::into)
|
231
248
|
}
|
232
249
|
|
233
250
|
pub fn describe_optimized_plan(&self) -> RbResult<String> {
|
234
251
|
let result = self
|
235
252
|
.ldf
|
253
|
+
.borrow()
|
236
254
|
.describe_optimized_plan()
|
237
255
|
.map_err(RbPolarsErr::from)?;
|
238
256
|
Ok(result)
|
@@ -251,7 +269,7 @@ impl RbLazyFrame {
|
|
251
269
|
allow_streaming: bool,
|
252
270
|
_eager: bool,
|
253
271
|
) -> RbLazyFrame {
|
254
|
-
let ldf = self.ldf.clone();
|
272
|
+
let ldf = self.ldf.borrow().clone();
|
255
273
|
let mut ldf = ldf
|
256
274
|
.with_type_coercion(type_coercion)
|
257
275
|
.with_predicate_pushdown(predicate_pushdown)
|
@@ -275,12 +293,12 @@ impl RbLazyFrame {
|
|
275
293
|
maintain_order: bool,
|
276
294
|
multithreaded: bool,
|
277
295
|
) -> Self {
|
278
|
-
let ldf = self.ldf.clone();
|
296
|
+
let ldf = self.ldf.borrow().clone();
|
279
297
|
ldf.sort(
|
280
298
|
[&by_column],
|
281
299
|
SortMultipleOptions {
|
282
300
|
descending: vec![descending],
|
283
|
-
nulls_last,
|
301
|
+
nulls_last: vec![nulls_last],
|
284
302
|
multithreaded,
|
285
303
|
maintain_order,
|
286
304
|
},
|
@@ -292,11 +310,11 @@ impl RbLazyFrame {
|
|
292
310
|
&self,
|
293
311
|
by: RArray,
|
294
312
|
descending: Vec<bool>,
|
295
|
-
nulls_last: bool
|
313
|
+
nulls_last: Vec<bool>,
|
296
314
|
maintain_order: bool,
|
297
315
|
multithreaded: bool,
|
298
316
|
) -> RbResult<Self> {
|
299
|
-
let ldf = self.ldf.clone();
|
317
|
+
let ldf = self.ldf.borrow().clone();
|
300
318
|
let exprs = rb_exprs_to_exprs(by)?;
|
301
319
|
Ok(ldf
|
302
320
|
.sort_by_exprs(
|
@@ -312,12 +330,12 @@ impl RbLazyFrame {
|
|
312
330
|
}
|
313
331
|
|
314
332
|
pub fn cache(&self) -> Self {
|
315
|
-
let ldf = self.ldf.clone();
|
333
|
+
let ldf = self.ldf.borrow().clone();
|
316
334
|
ldf.cache().into()
|
317
335
|
}
|
318
336
|
|
319
337
|
pub fn collect(&self) -> RbResult<RbDataFrame> {
|
320
|
-
let ldf = self.ldf.clone();
|
338
|
+
let ldf = self.ldf.borrow().clone();
|
321
339
|
let df = ldf.collect().map_err(RbPolarsErr::from)?;
|
322
340
|
Ok(df.into())
|
323
341
|
}
|
@@ -328,7 +346,7 @@ impl RbLazyFrame {
|
|
328
346
|
path: PathBuf,
|
329
347
|
compression: String,
|
330
348
|
compression_level: Option<i32>,
|
331
|
-
statistics:
|
349
|
+
statistics: Wrap<StatisticsOptions>,
|
332
350
|
row_group_size: Option<usize>,
|
333
351
|
data_pagesize_limit: Option<usize>,
|
334
352
|
maintain_order: bool,
|
@@ -337,13 +355,13 @@ impl RbLazyFrame {
|
|
337
355
|
|
338
356
|
let options = ParquetWriteOptions {
|
339
357
|
compression,
|
340
|
-
statistics,
|
358
|
+
statistics: statistics.0,
|
341
359
|
row_group_size,
|
342
360
|
data_pagesize_limit,
|
343
361
|
maintain_order,
|
344
362
|
};
|
345
363
|
|
346
|
-
let ldf = self.ldf.clone();
|
364
|
+
let ldf = self.ldf.borrow().clone();
|
347
365
|
ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
|
348
366
|
Ok(())
|
349
367
|
}
|
@@ -359,7 +377,7 @@ impl RbLazyFrame {
|
|
359
377
|
maintain_order,
|
360
378
|
};
|
361
379
|
|
362
|
-
let ldf = self.ldf.clone();
|
380
|
+
let ldf = self.ldf.borrow().clone();
|
363
381
|
ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
|
364
382
|
Ok(())
|
365
383
|
}
|
@@ -377,6 +395,7 @@ impl RbLazyFrame {
|
|
377
395
|
datetime_format: Option<String>,
|
378
396
|
date_format: Option<String>,
|
379
397
|
time_format: Option<String>,
|
398
|
+
float_scientific: Option<bool>,
|
380
399
|
float_precision: Option<usize>,
|
381
400
|
null_value: Option<String>,
|
382
401
|
quote_style: Option<Wrap<QuoteStyle>>,
|
@@ -389,6 +408,7 @@ impl RbLazyFrame {
|
|
389
408
|
date_format,
|
390
409
|
time_format,
|
391
410
|
datetime_format,
|
411
|
+
float_scientific,
|
392
412
|
float_precision,
|
393
413
|
separator,
|
394
414
|
quote_char,
|
@@ -405,7 +425,7 @@ impl RbLazyFrame {
|
|
405
425
|
serialize_options,
|
406
426
|
};
|
407
427
|
|
408
|
-
let ldf = self.ldf.clone();
|
428
|
+
let ldf = self.ldf.borrow().clone();
|
409
429
|
ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
|
410
430
|
Ok(())
|
411
431
|
}
|
@@ -413,36 +433,36 @@ impl RbLazyFrame {
|
|
413
433
|
pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
|
414
434
|
let options = JsonWriterOptions { maintain_order };
|
415
435
|
|
416
|
-
let ldf = self.ldf.clone();
|
436
|
+
let ldf = self.ldf.borrow().clone();
|
417
437
|
ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
|
418
438
|
Ok(())
|
419
439
|
}
|
420
440
|
|
421
441
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
422
|
-
let ldf = self.ldf.clone();
|
442
|
+
let ldf = self.ldf.borrow().clone();
|
423
443
|
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
424
444
|
Ok(df.into())
|
425
445
|
}
|
426
446
|
|
427
447
|
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
428
|
-
let ldf = self.ldf.clone();
|
448
|
+
let ldf = self.ldf.borrow().clone();
|
429
449
|
ldf.filter(predicate.inner.clone()).into()
|
430
450
|
}
|
431
451
|
|
432
452
|
pub fn select(&self, exprs: RArray) -> RbResult<Self> {
|
433
|
-
let ldf = self.ldf.clone();
|
453
|
+
let ldf = self.ldf.borrow().clone();
|
434
454
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
435
455
|
Ok(ldf.select(exprs).into())
|
436
456
|
}
|
437
457
|
|
438
458
|
pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
|
439
|
-
let ldf = self.ldf.clone();
|
459
|
+
let ldf = self.ldf.borrow().clone();
|
440
460
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
441
461
|
Ok(ldf.select_seq(exprs).into())
|
442
462
|
}
|
443
463
|
|
444
464
|
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
445
|
-
let ldf = self.ldf.clone();
|
465
|
+
let ldf = self.ldf.borrow().clone();
|
446
466
|
let by = rb_exprs_to_exprs(by)?;
|
447
467
|
let lazy_gb = if maintain_order {
|
448
468
|
ldf.group_by_stable(by)
|
@@ -461,10 +481,9 @@ impl RbLazyFrame {
|
|
461
481
|
offset: String,
|
462
482
|
closed: Wrap<ClosedWindow>,
|
463
483
|
by: RArray,
|
464
|
-
check_sorted: bool,
|
465
484
|
) -> RbResult<RbLazyGroupBy> {
|
466
485
|
let closed_window = closed.0;
|
467
|
-
let ldf = self.ldf.clone();
|
486
|
+
let ldf = self.ldf.borrow().clone();
|
468
487
|
let by = rb_exprs_to_exprs(by)?;
|
469
488
|
let lazy_gb = ldf.rolling(
|
470
489
|
index_column.inner.clone(),
|
@@ -474,7 +493,6 @@ impl RbLazyFrame {
|
|
474
493
|
period: Duration::parse(&period),
|
475
494
|
offset: Duration::parse(&offset),
|
476
495
|
closed_window,
|
477
|
-
check_sorted,
|
478
496
|
},
|
479
497
|
);
|
480
498
|
|
@@ -495,11 +513,10 @@ impl RbLazyFrame {
|
|
495
513
|
closed: Wrap<ClosedWindow>,
|
496
514
|
by: RArray,
|
497
515
|
start_by: Wrap<StartBy>,
|
498
|
-
check_sorted: bool,
|
499
516
|
) -> RbResult<RbLazyGroupBy> {
|
500
517
|
let closed_window = closed.0;
|
501
518
|
let by = rb_exprs_to_exprs(by)?;
|
502
|
-
let ldf = self.ldf.clone();
|
519
|
+
let ldf = self.ldf.borrow().clone();
|
503
520
|
let lazy_gb = ldf.group_by_dynamic(
|
504
521
|
index_column.inner.clone(),
|
505
522
|
by,
|
@@ -511,7 +528,6 @@ impl RbLazyFrame {
|
|
511
528
|
include_boundaries,
|
512
529
|
closed_window,
|
513
530
|
start_by: start_by.0,
|
514
|
-
check_sorted,
|
515
531
|
..Default::default()
|
516
532
|
},
|
517
533
|
);
|
@@ -523,14 +539,14 @@ impl RbLazyFrame {
|
|
523
539
|
|
524
540
|
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
525
541
|
let contexts = contexts
|
526
|
-
.
|
527
|
-
.map(
|
542
|
+
.into_iter()
|
543
|
+
.map(TryConvert::try_convert)
|
528
544
|
.collect::<RbResult<Vec<&RbLazyFrame>>>()?;
|
529
545
|
let contexts = contexts
|
530
546
|
.into_iter()
|
531
|
-
.map(|ldf| ldf.ldf.clone())
|
547
|
+
.map(|ldf| ldf.ldf.borrow().clone())
|
532
548
|
.collect::<Vec<_>>();
|
533
|
-
Ok(self.ldf.clone().with_context(contexts).into())
|
549
|
+
Ok(self.ldf.borrow().clone().with_context(contexts).into())
|
534
550
|
}
|
535
551
|
|
536
552
|
#[allow(clippy::too_many_arguments)]
|
@@ -548,8 +564,8 @@ impl RbLazyFrame {
|
|
548
564
|
tolerance: Option<Wrap<AnyValue<'_>>>,
|
549
565
|
tolerance_str: Option<String>,
|
550
566
|
) -> RbResult<Self> {
|
551
|
-
let ldf = self.ldf.clone();
|
552
|
-
let other = other.ldf.clone();
|
567
|
+
let ldf = self.ldf.borrow().clone();
|
568
|
+
let other = other.ldf.borrow().clone();
|
553
569
|
let left_on = left_on.inner.clone();
|
554
570
|
let right_on = right_on.inner.clone();
|
555
571
|
Ok(ldf
|
@@ -583,8 +599,8 @@ impl RbLazyFrame {
|
|
583
599
|
how: Wrap<JoinType>,
|
584
600
|
suffix: String,
|
585
601
|
) -> RbResult<Self> {
|
586
|
-
let ldf = self.ldf.clone();
|
587
|
-
let other = other.ldf.clone();
|
602
|
+
let ldf = self.ldf.borrow().clone();
|
603
|
+
let other = other.ldf.borrow().clone();
|
588
604
|
let left_on = rb_exprs_to_exprs(left_on)?;
|
589
605
|
let right_on = rb_exprs_to_exprs(right_on)?;
|
590
606
|
|
@@ -603,32 +619,32 @@ impl RbLazyFrame {
|
|
603
619
|
}
|
604
620
|
|
605
621
|
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
606
|
-
let ldf = self.ldf.clone();
|
622
|
+
let ldf = self.ldf.borrow().clone();
|
607
623
|
ldf.with_column(expr.inner.clone()).into()
|
608
624
|
}
|
609
625
|
|
610
626
|
pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
|
611
|
-
let ldf = self.ldf.clone();
|
627
|
+
let ldf = self.ldf.borrow().clone();
|
612
628
|
Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
|
613
629
|
}
|
614
630
|
|
615
631
|
pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
|
616
|
-
let ldf = self.ldf.clone();
|
632
|
+
let ldf = self.ldf.borrow().clone();
|
617
633
|
Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
|
618
634
|
}
|
619
635
|
|
620
636
|
pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
|
621
|
-
let ldf = self.ldf.clone();
|
637
|
+
let ldf = self.ldf.borrow().clone();
|
622
638
|
ldf.rename(existing, new).into()
|
623
639
|
}
|
624
640
|
|
625
641
|
pub fn reverse(&self) -> Self {
|
626
|
-
let ldf = self.ldf.clone();
|
642
|
+
let ldf = self.ldf.borrow().clone();
|
627
643
|
ldf.reverse().into()
|
628
644
|
}
|
629
645
|
|
630
646
|
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
631
|
-
let lf = self.ldf.clone();
|
647
|
+
let lf = self.ldf.borrow().clone();
|
632
648
|
let out = match fill_value {
|
633
649
|
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
634
650
|
None => lf.shift(n.inner.clone()),
|
@@ -637,72 +653,70 @@ impl RbLazyFrame {
|
|
637
653
|
}
|
638
654
|
|
639
655
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|
640
|
-
let ldf = self.ldf.clone();
|
656
|
+
let ldf = self.ldf.borrow().clone();
|
641
657
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
642
658
|
}
|
643
659
|
|
644
|
-
pub fn min(&self) ->
|
645
|
-
let ldf = self.ldf.clone();
|
646
|
-
let out = ldf.min()
|
647
|
-
|
660
|
+
pub fn min(&self) -> Self {
|
661
|
+
let ldf = self.ldf.borrow().clone();
|
662
|
+
let out = ldf.min();
|
663
|
+
out.into()
|
648
664
|
}
|
649
665
|
|
650
|
-
pub fn max(&self) ->
|
651
|
-
let ldf = self.ldf.clone();
|
652
|
-
let out = ldf.max()
|
653
|
-
|
666
|
+
pub fn max(&self) -> Self {
|
667
|
+
let ldf = self.ldf.borrow().clone();
|
668
|
+
let out = ldf.max();
|
669
|
+
out.into()
|
654
670
|
}
|
655
671
|
|
656
|
-
pub fn sum(&self) ->
|
657
|
-
let ldf = self.ldf.clone();
|
658
|
-
let out = ldf.sum()
|
659
|
-
|
672
|
+
pub fn sum(&self) -> Self {
|
673
|
+
let ldf = self.ldf.borrow().clone();
|
674
|
+
let out = ldf.sum();
|
675
|
+
out.into()
|
660
676
|
}
|
661
677
|
|
662
|
-
pub fn mean(&self) ->
|
663
|
-
let ldf = self.ldf.clone();
|
664
|
-
let out = ldf.mean()
|
665
|
-
|
678
|
+
pub fn mean(&self) -> Self {
|
679
|
+
let ldf = self.ldf.borrow().clone();
|
680
|
+
let out = ldf.mean();
|
681
|
+
out.into()
|
666
682
|
}
|
667
683
|
|
668
|
-
pub fn std(&self, ddof: u8) ->
|
669
|
-
let ldf = self.ldf.clone();
|
670
|
-
let out = ldf.std(ddof)
|
671
|
-
|
684
|
+
pub fn std(&self, ddof: u8) -> Self {
|
685
|
+
let ldf = self.ldf.borrow().clone();
|
686
|
+
let out = ldf.std(ddof);
|
687
|
+
out.into()
|
672
688
|
}
|
673
689
|
|
674
|
-
pub fn var(&self, ddof: u8) ->
|
675
|
-
let ldf = self.ldf.clone();
|
676
|
-
let out = ldf.var(ddof)
|
677
|
-
|
690
|
+
pub fn var(&self, ddof: u8) -> Self {
|
691
|
+
let ldf = self.ldf.borrow().clone();
|
692
|
+
let out = ldf.var(ddof);
|
693
|
+
out.into()
|
678
694
|
}
|
679
695
|
|
680
|
-
pub fn median(&self) ->
|
681
|
-
let ldf = self.ldf.clone();
|
682
|
-
let out = ldf.median()
|
683
|
-
|
696
|
+
pub fn median(&self) -> Self {
|
697
|
+
let ldf = self.ldf.borrow().clone();
|
698
|
+
let out = ldf.median();
|
699
|
+
out.into()
|
684
700
|
}
|
685
701
|
|
686
702
|
pub fn quantile(
|
687
703
|
&self,
|
688
704
|
quantile: &RbExpr,
|
689
705
|
interpolation: Wrap<QuantileInterpolOptions>,
|
690
|
-
) ->
|
691
|
-
let ldf = self.ldf.clone();
|
692
|
-
let out = ldf
|
693
|
-
|
694
|
-
.map_err(RbPolarsErr::from)?;
|
695
|
-
Ok(out.into())
|
706
|
+
) -> Self {
|
707
|
+
let ldf = self.ldf.borrow().clone();
|
708
|
+
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
709
|
+
out.into()
|
696
710
|
}
|
697
711
|
|
698
712
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
699
|
-
let ldf = self.ldf.clone();
|
713
|
+
let ldf = self.ldf.borrow().clone();
|
700
714
|
let column = rb_exprs_to_exprs(column)?;
|
701
715
|
Ok(ldf.explode(column).into())
|
702
716
|
}
|
703
717
|
|
704
718
|
pub fn null_count(&self) -> Self {
|
705
|
-
let ldf = self.ldf.clone();
|
719
|
+
let ldf = self.ldf.borrow().clone();
|
706
720
|
ldf.null_count().into()
|
707
721
|
}
|
708
722
|
|
@@ -712,7 +726,7 @@ impl RbLazyFrame {
|
|
712
726
|
subset: Option<Vec<String>>,
|
713
727
|
keep: Wrap<UniqueKeepStrategy>,
|
714
728
|
) -> RbResult<Self> {
|
715
|
-
let ldf = self.ldf.clone();
|
729
|
+
let ldf = self.ldf.borrow().clone();
|
716
730
|
Ok(match maintain_order {
|
717
731
|
true => ldf.unique_stable(subset, keep.0),
|
718
732
|
false => ldf.unique(subset, keep.0),
|
@@ -721,75 +735,63 @@ impl RbLazyFrame {
|
|
721
735
|
}
|
722
736
|
|
723
737
|
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
|
724
|
-
let ldf = self.ldf.clone();
|
738
|
+
let ldf = self.ldf.borrow().clone();
|
725
739
|
ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
|
726
740
|
.into()
|
727
741
|
}
|
728
742
|
|
729
743
|
pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
|
730
|
-
let ldf = self.ldf.clone();
|
744
|
+
let ldf = self.ldf.borrow().clone();
|
731
745
|
ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
|
732
746
|
}
|
733
747
|
|
734
748
|
pub fn tail(&self, n: IdxSize) -> Self {
|
735
|
-
let ldf = self.ldf.clone();
|
749
|
+
let ldf = self.ldf.borrow().clone();
|
736
750
|
ldf.tail(n).into()
|
737
751
|
}
|
738
752
|
|
739
|
-
pub fn
|
753
|
+
pub fn unpivot(
|
740
754
|
&self,
|
741
|
-
|
742
|
-
|
755
|
+
on: Vec<String>,
|
756
|
+
index: Vec<String>,
|
743
757
|
value_name: Option<String>,
|
744
758
|
variable_name: Option<String>,
|
745
759
|
streamable: bool,
|
746
760
|
) -> Self {
|
747
|
-
let args =
|
748
|
-
|
749
|
-
|
761
|
+
let args = UnpivotArgs {
|
762
|
+
on: strings_to_smartstrings(on),
|
763
|
+
index: strings_to_smartstrings(index),
|
750
764
|
value_name: value_name.map(|s| s.into()),
|
751
765
|
variable_name: variable_name.map(|s| s.into()),
|
752
766
|
streamable,
|
753
767
|
};
|
754
768
|
|
755
|
-
let ldf = self.ldf.clone();
|
756
|
-
ldf.
|
769
|
+
let ldf = self.ldf.borrow().clone();
|
770
|
+
ldf.unpivot(args).into()
|
757
771
|
}
|
758
772
|
|
759
773
|
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
760
|
-
let ldf = self.ldf.clone();
|
774
|
+
let ldf = self.ldf.borrow().clone();
|
761
775
|
ldf.with_row_index(&name, offset).into()
|
762
776
|
}
|
763
777
|
|
764
778
|
pub fn drop(&self, cols: Vec<String>) -> Self {
|
765
|
-
let ldf = self.ldf.clone();
|
779
|
+
let ldf = self.ldf.borrow().clone();
|
766
780
|
ldf.drop(cols).into()
|
767
781
|
}
|
768
782
|
|
769
783
|
pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
|
770
|
-
self.ldf.clone().cast_all(dtype.0, strict).into()
|
784
|
+
self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
|
771
785
|
}
|
772
786
|
|
773
787
|
pub fn clone(&self) -> Self {
|
774
|
-
self.ldf.clone().into()
|
788
|
+
self.ldf.borrow().clone().into()
|
775
789
|
}
|
776
790
|
|
777
|
-
pub fn
|
778
|
-
let schema = self.
|
779
|
-
let iter = schema.iter_names().map(|s| s.as_str());
|
780
|
-
Ok(RArray::from_iter(iter))
|
781
|
-
}
|
791
|
+
pub fn collect_schema(&self) -> RbResult<RHash> {
|
792
|
+
let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
|
782
793
|
|
783
|
-
pub fn dtypes(&self) -> RbResult<RArray> {
|
784
|
-
let schema = self.get_schema()?;
|
785
|
-
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
|
786
|
-
Ok(RArray::from_iter(iter))
|
787
|
-
}
|
788
|
-
|
789
|
-
pub fn schema(&self) -> RbResult<RHash> {
|
790
|
-
let schema = self.get_schema()?;
|
791
794
|
let schema_dict = RHash::new();
|
792
|
-
|
793
795
|
schema.iter_fields().for_each(|fld| {
|
794
796
|
// TODO remove unwrap
|
795
797
|
schema_dict
|
@@ -803,23 +805,20 @@ impl RbLazyFrame {
|
|
803
805
|
}
|
804
806
|
|
805
807
|
pub fn unnest(&self, cols: Vec<String>) -> Self {
|
806
|
-
self.ldf.clone().unnest(cols).into()
|
807
|
-
}
|
808
|
-
|
809
|
-
pub fn width(&self) -> RbResult<usize> {
|
810
|
-
Ok(self.get_schema()?.len())
|
808
|
+
self.ldf.borrow().clone().unnest(cols).into()
|
811
809
|
}
|
812
810
|
|
813
811
|
pub fn count(&self) -> Self {
|
814
|
-
let ldf = self.ldf.clone();
|
812
|
+
let ldf = self.ldf.borrow().clone();
|
815
813
|
ldf.count().into()
|
816
814
|
}
|
817
815
|
|
818
816
|
pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
|
819
817
|
let out = self
|
820
818
|
.ldf
|
819
|
+
.borrow()
|
821
820
|
.clone()
|
822
|
-
.merge_sorted(other.ldf.clone(), &key)
|
821
|
+
.merge_sorted(other.ldf.borrow().clone(), &key)
|
823
822
|
.map_err(RbPolarsErr::from)?;
|
824
823
|
Ok(out.into())
|
825
824
|
}
|