polars-df 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -15,19 +15,14 @@ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueEr
15
15
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
16
16
  #[derive(Clone)]
17
17
  pub struct RbLazyFrame {
18
- pub ldf: LazyFrame,
19
- }
20
-
21
- impl RbLazyFrame {
22
- fn get_schema(&self) -> RbResult<SchemaRef> {
23
- let schema = self.ldf.schema().map_err(RbPolarsErr::from)?;
24
- Ok(schema)
25
- }
18
+ pub ldf: RefCell<LazyFrame>,
26
19
  }
27
20
 
28
21
  impl From<LazyFrame> for RbLazyFrame {
29
22
  fn from(ldf: LazyFrame) -> Self {
30
- RbLazyFrame { ldf }
23
+ RbLazyFrame {
24
+ ldf: RefCell::new(ldf),
25
+ }
31
26
  }
32
27
  }
33
28
 
@@ -48,7 +43,7 @@ impl RbLazyFrame {
48
43
  // in this scope
49
44
  let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
50
45
 
51
- let lp = serde_json::from_str::<LogicalPlan>(json)
46
+ let lp = serde_json::from_str::<DslPlan>(json)
52
47
  .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
53
48
  Ok(LazyFrame::from(lp).into())
54
49
  }
@@ -63,10 +58,13 @@ impl RbLazyFrame {
63
58
  row_index: Option<(String, IdxSize)>,
64
59
  ) -> RbResult<Self> {
65
60
  let batch_size = batch_size.map(|v| v.0);
66
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
61
+ let row_index = row_index.map(|(name, offset)| RowIndex {
62
+ name: Arc::from(name.as_str()),
63
+ offset,
64
+ });
67
65
 
68
66
  let lf = LazyJsonLineReader::new(path)
69
- .with_infer_schema_length(infer_schema_length)
67
+ .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
70
68
  .with_batch_size(batch_size)
71
69
  .with_n_rows(n_rows)
72
70
  .low_memory(low_memory)
@@ -107,7 +105,10 @@ impl RbLazyFrame {
107
105
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
108
106
  let separator = separator.as_bytes()[0];
109
107
  let eol_char = eol_char.as_bytes()[0];
110
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
108
+ let row_index = row_index.map(|(name, offset)| RowIndex {
109
+ name: Arc::from(name.as_str()),
110
+ offset,
111
+ });
111
112
 
112
113
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
113
114
  overwrite_dtype
@@ -119,17 +120,17 @@ impl RbLazyFrame {
119
120
  let r = LazyCsvReader::new(path)
120
121
  .with_infer_schema_length(infer_schema_length)
121
122
  .with_separator(separator)
122
- .has_header(has_header)
123
+ .with_has_header(has_header)
123
124
  .with_ignore_errors(ignore_errors)
124
125
  .with_skip_rows(skip_rows)
125
126
  .with_n_rows(n_rows)
126
127
  .with_cache(cache)
127
- .with_dtype_overwrite(overwrite_dtype.as_ref())
128
+ .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
128
129
  // TODO add with_schema
129
- .low_memory(low_memory)
130
+ .with_low_memory(low_memory)
130
131
  .with_comment_prefix(comment_prefix.as_deref())
131
132
  .with_quote_char(quote_char)
132
- .with_end_of_line_char(eol_char)
133
+ .with_eol_char(eol_char)
133
134
  .with_rechunk(rechunk)
134
135
  .with_skip_rows_after_header(skip_rows_after_header)
135
136
  .with_encoding(encoding.0)
@@ -137,7 +138,7 @@ impl RbLazyFrame {
137
138
  .with_try_parse_dates(try_parse_dates)
138
139
  .with_null_values(null_values)
139
140
  // TODO add with_missing_is_null
140
- .truncate_ragged_lines(truncate_ragged_lines);
141
+ .with_truncate_ragged_lines(truncate_ragged_lines);
141
142
 
142
143
  if let Some(_lambda) = with_schema_modify {
143
144
  todo!();
@@ -157,8 +158,10 @@ impl RbLazyFrame {
157
158
  row_index: Option<(String, IdxSize)>,
158
159
  low_memory: bool,
159
160
  use_statistics: bool,
160
- hive_partitioning: bool,
161
+ hive_partitioning: Option<bool>,
161
162
  hive_schema: Option<Wrap<Schema>>,
163
+ try_parse_hive_dates: bool,
164
+ glob: bool,
162
165
  ) -> RbResult<Self> {
163
166
  let parallel = parallel.0;
164
167
  let hive_schema = hive_schema.map(|s| Arc::new(s.0));
@@ -171,10 +174,15 @@ impl RbLazyFrame {
171
174
  .ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
172
175
  };
173
176
 
174
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
177
+ let row_index = row_index.map(|(name, offset)| RowIndex {
178
+ name: Arc::from(name.as_str()),
179
+ offset,
180
+ });
175
181
  let hive_options = HiveOptions {
176
182
  enabled: hive_partitioning,
183
+ hive_start_idx: 0,
177
184
  schema: hive_schema,
185
+ try_parse_dates: try_parse_hive_dates,
178
186
  };
179
187
 
180
188
  let args = ScanArgsParquet {
@@ -187,6 +195,7 @@ impl RbLazyFrame {
187
195
  cloud_options: None,
188
196
  use_statistics,
189
197
  hive_options,
198
+ glob,
190
199
  };
191
200
 
192
201
  let lf = if path.is_some() {
@@ -206,7 +215,11 @@ impl RbLazyFrame {
206
215
  row_index: Option<(String, IdxSize)>,
207
216
  memory_map: bool,
208
217
  ) -> RbResult<Self> {
209
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
218
+ let row_index = row_index.map(|(name, offset)| RowIndex {
219
+ name: Arc::from(name.as_str()),
220
+ offset,
221
+ });
222
+
210
223
  let args = ScanArgsIpc {
211
224
  n_rows,
212
225
  cache,
@@ -221,18 +234,23 @@ impl RbLazyFrame {
221
234
 
222
235
  pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
223
236
  let file = BufWriter::new(get_file_like(rb_f, true)?);
224
- serde_json::to_writer(file, &self.ldf.logical_plan)
237
+ serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
225
238
  .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
226
239
  Ok(())
227
240
  }
228
241
 
229
- pub fn describe_plan(&self) -> String {
230
- self.ldf.describe_plan()
242
+ pub fn describe_plan(&self) -> RbResult<String> {
243
+ self.ldf
244
+ .borrow()
245
+ .describe_plan()
246
+ .map_err(RbPolarsErr::from)
247
+ .map_err(Into::into)
231
248
  }
232
249
 
233
250
  pub fn describe_optimized_plan(&self) -> RbResult<String> {
234
251
  let result = self
235
252
  .ldf
253
+ .borrow()
236
254
  .describe_optimized_plan()
237
255
  .map_err(RbPolarsErr::from)?;
238
256
  Ok(result)
@@ -251,7 +269,7 @@ impl RbLazyFrame {
251
269
  allow_streaming: bool,
252
270
  _eager: bool,
253
271
  ) -> RbLazyFrame {
254
- let ldf = self.ldf.clone();
272
+ let ldf = self.ldf.borrow().clone();
255
273
  let mut ldf = ldf
256
274
  .with_type_coercion(type_coercion)
257
275
  .with_predicate_pushdown(predicate_pushdown)
@@ -275,12 +293,12 @@ impl RbLazyFrame {
275
293
  maintain_order: bool,
276
294
  multithreaded: bool,
277
295
  ) -> Self {
278
- let ldf = self.ldf.clone();
296
+ let ldf = self.ldf.borrow().clone();
279
297
  ldf.sort(
280
298
  [&by_column],
281
299
  SortMultipleOptions {
282
300
  descending: vec![descending],
283
- nulls_last,
301
+ nulls_last: vec![nulls_last],
284
302
  multithreaded,
285
303
  maintain_order,
286
304
  },
@@ -292,11 +310,11 @@ impl RbLazyFrame {
292
310
  &self,
293
311
  by: RArray,
294
312
  descending: Vec<bool>,
295
- nulls_last: bool,
313
+ nulls_last: Vec<bool>,
296
314
  maintain_order: bool,
297
315
  multithreaded: bool,
298
316
  ) -> RbResult<Self> {
299
- let ldf = self.ldf.clone();
317
+ let ldf = self.ldf.borrow().clone();
300
318
  let exprs = rb_exprs_to_exprs(by)?;
301
319
  Ok(ldf
302
320
  .sort_by_exprs(
@@ -312,12 +330,12 @@ impl RbLazyFrame {
312
330
  }
313
331
 
314
332
  pub fn cache(&self) -> Self {
315
- let ldf = self.ldf.clone();
333
+ let ldf = self.ldf.borrow().clone();
316
334
  ldf.cache().into()
317
335
  }
318
336
 
319
337
  pub fn collect(&self) -> RbResult<RbDataFrame> {
320
- let ldf = self.ldf.clone();
338
+ let ldf = self.ldf.borrow().clone();
321
339
  let df = ldf.collect().map_err(RbPolarsErr::from)?;
322
340
  Ok(df.into())
323
341
  }
@@ -328,7 +346,7 @@ impl RbLazyFrame {
328
346
  path: PathBuf,
329
347
  compression: String,
330
348
  compression_level: Option<i32>,
331
- statistics: bool,
349
+ statistics: Wrap<StatisticsOptions>,
332
350
  row_group_size: Option<usize>,
333
351
  data_pagesize_limit: Option<usize>,
334
352
  maintain_order: bool,
@@ -337,13 +355,13 @@ impl RbLazyFrame {
337
355
 
338
356
  let options = ParquetWriteOptions {
339
357
  compression,
340
- statistics,
358
+ statistics: statistics.0,
341
359
  row_group_size,
342
360
  data_pagesize_limit,
343
361
  maintain_order,
344
362
  };
345
363
 
346
- let ldf = self.ldf.clone();
364
+ let ldf = self.ldf.borrow().clone();
347
365
  ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
348
366
  Ok(())
349
367
  }
@@ -359,7 +377,7 @@ impl RbLazyFrame {
359
377
  maintain_order,
360
378
  };
361
379
 
362
- let ldf = self.ldf.clone();
380
+ let ldf = self.ldf.borrow().clone();
363
381
  ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
364
382
  Ok(())
365
383
  }
@@ -377,6 +395,7 @@ impl RbLazyFrame {
377
395
  datetime_format: Option<String>,
378
396
  date_format: Option<String>,
379
397
  time_format: Option<String>,
398
+ float_scientific: Option<bool>,
380
399
  float_precision: Option<usize>,
381
400
  null_value: Option<String>,
382
401
  quote_style: Option<Wrap<QuoteStyle>>,
@@ -389,6 +408,7 @@ impl RbLazyFrame {
389
408
  date_format,
390
409
  time_format,
391
410
  datetime_format,
411
+ float_scientific,
392
412
  float_precision,
393
413
  separator,
394
414
  quote_char,
@@ -405,7 +425,7 @@ impl RbLazyFrame {
405
425
  serialize_options,
406
426
  };
407
427
 
408
- let ldf = self.ldf.clone();
428
+ let ldf = self.ldf.borrow().clone();
409
429
  ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
410
430
  Ok(())
411
431
  }
@@ -413,36 +433,36 @@ impl RbLazyFrame {
413
433
  pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
414
434
  let options = JsonWriterOptions { maintain_order };
415
435
 
416
- let ldf = self.ldf.clone();
436
+ let ldf = self.ldf.borrow().clone();
417
437
  ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
418
438
  Ok(())
419
439
  }
420
440
 
421
441
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
422
- let ldf = self.ldf.clone();
442
+ let ldf = self.ldf.borrow().clone();
423
443
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
424
444
  Ok(df.into())
425
445
  }
426
446
 
427
447
  pub fn filter(&self, predicate: &RbExpr) -> Self {
428
- let ldf = self.ldf.clone();
448
+ let ldf = self.ldf.borrow().clone();
429
449
  ldf.filter(predicate.inner.clone()).into()
430
450
  }
431
451
 
432
452
  pub fn select(&self, exprs: RArray) -> RbResult<Self> {
433
- let ldf = self.ldf.clone();
453
+ let ldf = self.ldf.borrow().clone();
434
454
  let exprs = rb_exprs_to_exprs(exprs)?;
435
455
  Ok(ldf.select(exprs).into())
436
456
  }
437
457
 
438
458
  pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
439
- let ldf = self.ldf.clone();
459
+ let ldf = self.ldf.borrow().clone();
440
460
  let exprs = rb_exprs_to_exprs(exprs)?;
441
461
  Ok(ldf.select_seq(exprs).into())
442
462
  }
443
463
 
444
464
  pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
445
- let ldf = self.ldf.clone();
465
+ let ldf = self.ldf.borrow().clone();
446
466
  let by = rb_exprs_to_exprs(by)?;
447
467
  let lazy_gb = if maintain_order {
448
468
  ldf.group_by_stable(by)
@@ -461,10 +481,9 @@ impl RbLazyFrame {
461
481
  offset: String,
462
482
  closed: Wrap<ClosedWindow>,
463
483
  by: RArray,
464
- check_sorted: bool,
465
484
  ) -> RbResult<RbLazyGroupBy> {
466
485
  let closed_window = closed.0;
467
- let ldf = self.ldf.clone();
486
+ let ldf = self.ldf.borrow().clone();
468
487
  let by = rb_exprs_to_exprs(by)?;
469
488
  let lazy_gb = ldf.rolling(
470
489
  index_column.inner.clone(),
@@ -474,7 +493,6 @@ impl RbLazyFrame {
474
493
  period: Duration::parse(&period),
475
494
  offset: Duration::parse(&offset),
476
495
  closed_window,
477
- check_sorted,
478
496
  },
479
497
  );
480
498
 
@@ -495,11 +513,10 @@ impl RbLazyFrame {
495
513
  closed: Wrap<ClosedWindow>,
496
514
  by: RArray,
497
515
  start_by: Wrap<StartBy>,
498
- check_sorted: bool,
499
516
  ) -> RbResult<RbLazyGroupBy> {
500
517
  let closed_window = closed.0;
501
518
  let by = rb_exprs_to_exprs(by)?;
502
- let ldf = self.ldf.clone();
519
+ let ldf = self.ldf.borrow().clone();
503
520
  let lazy_gb = ldf.group_by_dynamic(
504
521
  index_column.inner.clone(),
505
522
  by,
@@ -511,7 +528,6 @@ impl RbLazyFrame {
511
528
  include_boundaries,
512
529
  closed_window,
513
530
  start_by: start_by.0,
514
- check_sorted,
515
531
  ..Default::default()
516
532
  },
517
533
  );
@@ -523,14 +539,14 @@ impl RbLazyFrame {
523
539
 
524
540
  pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
525
541
  let contexts = contexts
526
- .each()
527
- .map(|v| TryConvert::try_convert(v.unwrap()))
542
+ .into_iter()
543
+ .map(TryConvert::try_convert)
528
544
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
529
545
  let contexts = contexts
530
546
  .into_iter()
531
- .map(|ldf| ldf.ldf.clone())
547
+ .map(|ldf| ldf.ldf.borrow().clone())
532
548
  .collect::<Vec<_>>();
533
- Ok(self.ldf.clone().with_context(contexts).into())
549
+ Ok(self.ldf.borrow().clone().with_context(contexts).into())
534
550
  }
535
551
 
536
552
  #[allow(clippy::too_many_arguments)]
@@ -548,8 +564,8 @@ impl RbLazyFrame {
548
564
  tolerance: Option<Wrap<AnyValue<'_>>>,
549
565
  tolerance_str: Option<String>,
550
566
  ) -> RbResult<Self> {
551
- let ldf = self.ldf.clone();
552
- let other = other.ldf.clone();
567
+ let ldf = self.ldf.borrow().clone();
568
+ let other = other.ldf.borrow().clone();
553
569
  let left_on = left_on.inner.clone();
554
570
  let right_on = right_on.inner.clone();
555
571
  Ok(ldf
@@ -583,8 +599,8 @@ impl RbLazyFrame {
583
599
  how: Wrap<JoinType>,
584
600
  suffix: String,
585
601
  ) -> RbResult<Self> {
586
- let ldf = self.ldf.clone();
587
- let other = other.ldf.clone();
602
+ let ldf = self.ldf.borrow().clone();
603
+ let other = other.ldf.borrow().clone();
588
604
  let left_on = rb_exprs_to_exprs(left_on)?;
589
605
  let right_on = rb_exprs_to_exprs(right_on)?;
590
606
 
@@ -603,32 +619,32 @@ impl RbLazyFrame {
603
619
  }
604
620
 
605
621
  pub fn with_column(&self, expr: &RbExpr) -> Self {
606
- let ldf = self.ldf.clone();
622
+ let ldf = self.ldf.borrow().clone();
607
623
  ldf.with_column(expr.inner.clone()).into()
608
624
  }
609
625
 
610
626
  pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
611
- let ldf = self.ldf.clone();
627
+ let ldf = self.ldf.borrow().clone();
612
628
  Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
613
629
  }
614
630
 
615
631
  pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
616
- let ldf = self.ldf.clone();
632
+ let ldf = self.ldf.borrow().clone();
617
633
  Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
618
634
  }
619
635
 
620
636
  pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
621
- let ldf = self.ldf.clone();
637
+ let ldf = self.ldf.borrow().clone();
622
638
  ldf.rename(existing, new).into()
623
639
  }
624
640
 
625
641
  pub fn reverse(&self) -> Self {
626
- let ldf = self.ldf.clone();
642
+ let ldf = self.ldf.borrow().clone();
627
643
  ldf.reverse().into()
628
644
  }
629
645
 
630
646
  pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
631
- let lf = self.ldf.clone();
647
+ let lf = self.ldf.borrow().clone();
632
648
  let out = match fill_value {
633
649
  Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
634
650
  None => lf.shift(n.inner.clone()),
@@ -637,72 +653,70 @@ impl RbLazyFrame {
637
653
  }
638
654
 
639
655
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
640
- let ldf = self.ldf.clone();
656
+ let ldf = self.ldf.borrow().clone();
641
657
  ldf.fill_nan(fill_value.inner.clone()).into()
642
658
  }
643
659
 
644
- pub fn min(&self) -> RbResult<Self> {
645
- let ldf = self.ldf.clone();
646
- let out = ldf.min().map_err(RbPolarsErr::from)?;
647
- Ok(out.into())
660
+ pub fn min(&self) -> Self {
661
+ let ldf = self.ldf.borrow().clone();
662
+ let out = ldf.min();
663
+ out.into()
648
664
  }
649
665
 
650
- pub fn max(&self) -> RbResult<Self> {
651
- let ldf = self.ldf.clone();
652
- let out = ldf.max().map_err(RbPolarsErr::from)?;
653
- Ok(out.into())
666
+ pub fn max(&self) -> Self {
667
+ let ldf = self.ldf.borrow().clone();
668
+ let out = ldf.max();
669
+ out.into()
654
670
  }
655
671
 
656
- pub fn sum(&self) -> RbResult<Self> {
657
- let ldf = self.ldf.clone();
658
- let out = ldf.sum().map_err(RbPolarsErr::from)?;
659
- Ok(out.into())
672
+ pub fn sum(&self) -> Self {
673
+ let ldf = self.ldf.borrow().clone();
674
+ let out = ldf.sum();
675
+ out.into()
660
676
  }
661
677
 
662
- pub fn mean(&self) -> RbResult<Self> {
663
- let ldf = self.ldf.clone();
664
- let out = ldf.mean().map_err(RbPolarsErr::from)?;
665
- Ok(out.into())
678
+ pub fn mean(&self) -> Self {
679
+ let ldf = self.ldf.borrow().clone();
680
+ let out = ldf.mean();
681
+ out.into()
666
682
  }
667
683
 
668
- pub fn std(&self, ddof: u8) -> RbResult<Self> {
669
- let ldf = self.ldf.clone();
670
- let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
671
- Ok(out.into())
684
+ pub fn std(&self, ddof: u8) -> Self {
685
+ let ldf = self.ldf.borrow().clone();
686
+ let out = ldf.std(ddof);
687
+ out.into()
672
688
  }
673
689
 
674
- pub fn var(&self, ddof: u8) -> RbResult<Self> {
675
- let ldf = self.ldf.clone();
676
- let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
677
- Ok(out.into())
690
+ pub fn var(&self, ddof: u8) -> Self {
691
+ let ldf = self.ldf.borrow().clone();
692
+ let out = ldf.var(ddof);
693
+ out.into()
678
694
  }
679
695
 
680
- pub fn median(&self) -> RbResult<Self> {
681
- let ldf = self.ldf.clone();
682
- let out = ldf.median().map_err(RbPolarsErr::from)?;
683
- Ok(out.into())
696
+ pub fn median(&self) -> Self {
697
+ let ldf = self.ldf.borrow().clone();
698
+ let out = ldf.median();
699
+ out.into()
684
700
  }
685
701
 
686
702
  pub fn quantile(
687
703
  &self,
688
704
  quantile: &RbExpr,
689
705
  interpolation: Wrap<QuantileInterpolOptions>,
690
- ) -> RbResult<Self> {
691
- let ldf = self.ldf.clone();
692
- let out = ldf
693
- .quantile(quantile.inner.clone(), interpolation.0)
694
- .map_err(RbPolarsErr::from)?;
695
- Ok(out.into())
706
+ ) -> Self {
707
+ let ldf = self.ldf.borrow().clone();
708
+ let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
709
+ out.into()
696
710
  }
697
711
 
698
712
  pub fn explode(&self, column: RArray) -> RbResult<Self> {
699
- let ldf = self.ldf.clone();
713
+ let ldf = self.ldf.borrow().clone();
700
714
  let column = rb_exprs_to_exprs(column)?;
701
715
  Ok(ldf.explode(column).into())
702
716
  }
703
717
 
704
718
  pub fn null_count(&self) -> Self {
705
- let ldf = self.ldf.clone();
719
+ let ldf = self.ldf.borrow().clone();
706
720
  ldf.null_count().into()
707
721
  }
708
722
 
@@ -712,7 +726,7 @@ impl RbLazyFrame {
712
726
  subset: Option<Vec<String>>,
713
727
  keep: Wrap<UniqueKeepStrategy>,
714
728
  ) -> RbResult<Self> {
715
- let ldf = self.ldf.clone();
729
+ let ldf = self.ldf.borrow().clone();
716
730
  Ok(match maintain_order {
717
731
  true => ldf.unique_stable(subset, keep.0),
718
732
  false => ldf.unique(subset, keep.0),
@@ -721,75 +735,63 @@ impl RbLazyFrame {
721
735
  }
722
736
 
723
737
  pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
724
- let ldf = self.ldf.clone();
738
+ let ldf = self.ldf.borrow().clone();
725
739
  ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
726
740
  .into()
727
741
  }
728
742
 
729
743
  pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
730
- let ldf = self.ldf.clone();
744
+ let ldf = self.ldf.borrow().clone();
731
745
  ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
732
746
  }
733
747
 
734
748
  pub fn tail(&self, n: IdxSize) -> Self {
735
- let ldf = self.ldf.clone();
749
+ let ldf = self.ldf.borrow().clone();
736
750
  ldf.tail(n).into()
737
751
  }
738
752
 
739
- pub fn melt(
753
+ pub fn unpivot(
740
754
  &self,
741
- id_vars: Vec<String>,
742
- value_vars: Vec<String>,
755
+ on: Vec<String>,
756
+ index: Vec<String>,
743
757
  value_name: Option<String>,
744
758
  variable_name: Option<String>,
745
759
  streamable: bool,
746
760
  ) -> Self {
747
- let args = MeltArgs {
748
- id_vars: strings_to_smartstrings(id_vars),
749
- value_vars: strings_to_smartstrings(value_vars),
761
+ let args = UnpivotArgs {
762
+ on: strings_to_smartstrings(on),
763
+ index: strings_to_smartstrings(index),
750
764
  value_name: value_name.map(|s| s.into()),
751
765
  variable_name: variable_name.map(|s| s.into()),
752
766
  streamable,
753
767
  };
754
768
 
755
- let ldf = self.ldf.clone();
756
- ldf.melt(args).into()
769
+ let ldf = self.ldf.borrow().clone();
770
+ ldf.unpivot(args).into()
757
771
  }
758
772
 
759
773
  pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
760
- let ldf = self.ldf.clone();
774
+ let ldf = self.ldf.borrow().clone();
761
775
  ldf.with_row_index(&name, offset).into()
762
776
  }
763
777
 
764
778
  pub fn drop(&self, cols: Vec<String>) -> Self {
765
- let ldf = self.ldf.clone();
779
+ let ldf = self.ldf.borrow().clone();
766
780
  ldf.drop(cols).into()
767
781
  }
768
782
 
769
783
  pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
770
- self.ldf.clone().cast_all(dtype.0, strict).into()
784
+ self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
771
785
  }
772
786
 
773
787
  pub fn clone(&self) -> Self {
774
- self.ldf.clone().into()
788
+ self.ldf.borrow().clone().into()
775
789
  }
776
790
 
777
- pub fn columns(&self) -> RbResult<RArray> {
778
- let schema = self.get_schema()?;
779
- let iter = schema.iter_names().map(|s| s.as_str());
780
- Ok(RArray::from_iter(iter))
781
- }
791
+ pub fn collect_schema(&self) -> RbResult<RHash> {
792
+ let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
782
793
 
783
- pub fn dtypes(&self) -> RbResult<RArray> {
784
- let schema = self.get_schema()?;
785
- let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
786
- Ok(RArray::from_iter(iter))
787
- }
788
-
789
- pub fn schema(&self) -> RbResult<RHash> {
790
- let schema = self.get_schema()?;
791
794
  let schema_dict = RHash::new();
792
-
793
795
  schema.iter_fields().for_each(|fld| {
794
796
  // TODO remove unwrap
795
797
  schema_dict
@@ -803,23 +805,20 @@ impl RbLazyFrame {
803
805
  }
804
806
 
805
807
  pub fn unnest(&self, cols: Vec<String>) -> Self {
806
- self.ldf.clone().unnest(cols).into()
807
- }
808
-
809
- pub fn width(&self) -> RbResult<usize> {
810
- Ok(self.get_schema()?.len())
808
+ self.ldf.borrow().clone().unnest(cols).into()
811
809
  }
812
810
 
813
811
  pub fn count(&self) -> Self {
814
- let ldf = self.ldf.clone();
812
+ let ldf = self.ldf.borrow().clone();
815
813
  ldf.count().into()
816
814
  }
817
815
 
818
816
  pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
819
817
  let out = self
820
818
  .ldf
819
+ .borrow()
821
820
  .clone()
822
- .merge_sorted(other.ldf.clone(), &key)
821
+ .merge_sorted(other.ldf.borrow().clone(), &key)
823
822
  .map_err(RbPolarsErr::from)?;
824
823
  Ok(out.into())
825
824
  }