polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -15,19 +15,14 @@ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueEr
15
15
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
16
16
  #[derive(Clone)]
17
17
  pub struct RbLazyFrame {
18
- pub ldf: LazyFrame,
19
- }
20
-
21
- impl RbLazyFrame {
22
- fn get_schema(&self) -> RbResult<SchemaRef> {
23
- let schema = self.ldf.schema().map_err(RbPolarsErr::from)?;
24
- Ok(schema)
25
- }
18
+ pub ldf: RefCell<LazyFrame>,
26
19
  }
27
20
 
28
21
  impl From<LazyFrame> for RbLazyFrame {
29
22
  fn from(ldf: LazyFrame) -> Self {
30
- RbLazyFrame { ldf }
23
+ RbLazyFrame {
24
+ ldf: RefCell::new(ldf),
25
+ }
31
26
  }
32
27
  }
33
28
 
@@ -48,7 +43,7 @@ impl RbLazyFrame {
48
43
  // in this scope
49
44
  let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
50
45
 
51
- let lp = serde_json::from_str::<LogicalPlan>(json)
46
+ let lp = serde_json::from_str::<DslPlan>(json)
52
47
  .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
53
48
  Ok(LazyFrame::from(lp).into())
54
49
  }
@@ -63,10 +58,13 @@ impl RbLazyFrame {
63
58
  row_index: Option<(String, IdxSize)>,
64
59
  ) -> RbResult<Self> {
65
60
  let batch_size = batch_size.map(|v| v.0);
66
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
61
+ let row_index = row_index.map(|(name, offset)| RowIndex {
62
+ name: Arc::from(name.as_str()),
63
+ offset,
64
+ });
67
65
 
68
66
  let lf = LazyJsonLineReader::new(path)
69
- .with_infer_schema_length(infer_schema_length)
67
+ .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
70
68
  .with_batch_size(batch_size)
71
69
  .with_n_rows(n_rows)
72
70
  .low_memory(low_memory)
@@ -107,7 +105,10 @@ impl RbLazyFrame {
107
105
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
108
106
  let separator = separator.as_bytes()[0];
109
107
  let eol_char = eol_char.as_bytes()[0];
110
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
108
+ let row_index = row_index.map(|(name, offset)| RowIndex {
109
+ name: Arc::from(name.as_str()),
110
+ offset,
111
+ });
111
112
 
112
113
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
113
114
  overwrite_dtype
@@ -119,17 +120,17 @@ impl RbLazyFrame {
119
120
  let r = LazyCsvReader::new(path)
120
121
  .with_infer_schema_length(infer_schema_length)
121
122
  .with_separator(separator)
122
- .has_header(has_header)
123
+ .with_has_header(has_header)
123
124
  .with_ignore_errors(ignore_errors)
124
125
  .with_skip_rows(skip_rows)
125
126
  .with_n_rows(n_rows)
126
127
  .with_cache(cache)
127
- .with_dtype_overwrite(overwrite_dtype.as_ref())
128
+ .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
128
129
  // TODO add with_schema
129
- .low_memory(low_memory)
130
+ .with_low_memory(low_memory)
130
131
  .with_comment_prefix(comment_prefix.as_deref())
131
132
  .with_quote_char(quote_char)
132
- .with_end_of_line_char(eol_char)
133
+ .with_eol_char(eol_char)
133
134
  .with_rechunk(rechunk)
134
135
  .with_skip_rows_after_header(skip_rows_after_header)
135
136
  .with_encoding(encoding.0)
@@ -137,7 +138,7 @@ impl RbLazyFrame {
137
138
  .with_try_parse_dates(try_parse_dates)
138
139
  .with_null_values(null_values)
139
140
  // TODO add with_missing_is_null
140
- .truncate_ragged_lines(truncate_ragged_lines);
141
+ .with_truncate_ragged_lines(truncate_ragged_lines);
141
142
 
142
143
  if let Some(_lambda) = with_schema_modify {
143
144
  todo!();
@@ -157,8 +158,10 @@ impl RbLazyFrame {
157
158
  row_index: Option<(String, IdxSize)>,
158
159
  low_memory: bool,
159
160
  use_statistics: bool,
160
- hive_partitioning: bool,
161
+ hive_partitioning: Option<bool>,
161
162
  hive_schema: Option<Wrap<Schema>>,
163
+ try_parse_hive_dates: bool,
164
+ glob: bool,
162
165
  ) -> RbResult<Self> {
163
166
  let parallel = parallel.0;
164
167
  let hive_schema = hive_schema.map(|s| Arc::new(s.0));
@@ -171,10 +174,15 @@ impl RbLazyFrame {
171
174
  .ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
172
175
  };
173
176
 
174
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
177
+ let row_index = row_index.map(|(name, offset)| RowIndex {
178
+ name: Arc::from(name.as_str()),
179
+ offset,
180
+ });
175
181
  let hive_options = HiveOptions {
176
182
  enabled: hive_partitioning,
183
+ hive_start_idx: 0,
177
184
  schema: hive_schema,
185
+ try_parse_dates: try_parse_hive_dates,
178
186
  };
179
187
 
180
188
  let args = ScanArgsParquet {
@@ -187,6 +195,7 @@ impl RbLazyFrame {
187
195
  cloud_options: None,
188
196
  use_statistics,
189
197
  hive_options,
198
+ glob,
190
199
  };
191
200
 
192
201
  let lf = if path.is_some() {
@@ -206,7 +215,11 @@ impl RbLazyFrame {
206
215
  row_index: Option<(String, IdxSize)>,
207
216
  memory_map: bool,
208
217
  ) -> RbResult<Self> {
209
- let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
218
+ let row_index = row_index.map(|(name, offset)| RowIndex {
219
+ name: Arc::from(name.as_str()),
220
+ offset,
221
+ });
222
+
210
223
  let args = ScanArgsIpc {
211
224
  n_rows,
212
225
  cache,
@@ -221,18 +234,23 @@ impl RbLazyFrame {
221
234
 
222
235
  pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
223
236
  let file = BufWriter::new(get_file_like(rb_f, true)?);
224
- serde_json::to_writer(file, &self.ldf.logical_plan)
237
+ serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
225
238
  .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
226
239
  Ok(())
227
240
  }
228
241
 
229
- pub fn describe_plan(&self) -> String {
230
- self.ldf.describe_plan()
242
+ pub fn describe_plan(&self) -> RbResult<String> {
243
+ self.ldf
244
+ .borrow()
245
+ .describe_plan()
246
+ .map_err(RbPolarsErr::from)
247
+ .map_err(Into::into)
231
248
  }
232
249
 
233
250
  pub fn describe_optimized_plan(&self) -> RbResult<String> {
234
251
  let result = self
235
252
  .ldf
253
+ .borrow()
236
254
  .describe_optimized_plan()
237
255
  .map_err(RbPolarsErr::from)?;
238
256
  Ok(result)
@@ -251,7 +269,7 @@ impl RbLazyFrame {
251
269
  allow_streaming: bool,
252
270
  _eager: bool,
253
271
  ) -> RbLazyFrame {
254
- let ldf = self.ldf.clone();
272
+ let ldf = self.ldf.borrow().clone();
255
273
  let mut ldf = ldf
256
274
  .with_type_coercion(type_coercion)
257
275
  .with_predicate_pushdown(predicate_pushdown)
@@ -275,12 +293,12 @@ impl RbLazyFrame {
275
293
  maintain_order: bool,
276
294
  multithreaded: bool,
277
295
  ) -> Self {
278
- let ldf = self.ldf.clone();
296
+ let ldf = self.ldf.borrow().clone();
279
297
  ldf.sort(
280
298
  [&by_column],
281
299
  SortMultipleOptions {
282
300
  descending: vec![descending],
283
- nulls_last,
301
+ nulls_last: vec![nulls_last],
284
302
  multithreaded,
285
303
  maintain_order,
286
304
  },
@@ -292,11 +310,11 @@ impl RbLazyFrame {
292
310
  &self,
293
311
  by: RArray,
294
312
  descending: Vec<bool>,
295
- nulls_last: bool,
313
+ nulls_last: Vec<bool>,
296
314
  maintain_order: bool,
297
315
  multithreaded: bool,
298
316
  ) -> RbResult<Self> {
299
- let ldf = self.ldf.clone();
317
+ let ldf = self.ldf.borrow().clone();
300
318
  let exprs = rb_exprs_to_exprs(by)?;
301
319
  Ok(ldf
302
320
  .sort_by_exprs(
@@ -312,12 +330,12 @@ impl RbLazyFrame {
312
330
  }
313
331
 
314
332
  pub fn cache(&self) -> Self {
315
- let ldf = self.ldf.clone();
333
+ let ldf = self.ldf.borrow().clone();
316
334
  ldf.cache().into()
317
335
  }
318
336
 
319
337
  pub fn collect(&self) -> RbResult<RbDataFrame> {
320
- let ldf = self.ldf.clone();
338
+ let ldf = self.ldf.borrow().clone();
321
339
  let df = ldf.collect().map_err(RbPolarsErr::from)?;
322
340
  Ok(df.into())
323
341
  }
@@ -328,7 +346,7 @@ impl RbLazyFrame {
328
346
  path: PathBuf,
329
347
  compression: String,
330
348
  compression_level: Option<i32>,
331
- statistics: bool,
349
+ statistics: Wrap<StatisticsOptions>,
332
350
  row_group_size: Option<usize>,
333
351
  data_pagesize_limit: Option<usize>,
334
352
  maintain_order: bool,
@@ -337,13 +355,13 @@ impl RbLazyFrame {
337
355
 
338
356
  let options = ParquetWriteOptions {
339
357
  compression,
340
- statistics,
358
+ statistics: statistics.0,
341
359
  row_group_size,
342
360
  data_pagesize_limit,
343
361
  maintain_order,
344
362
  };
345
363
 
346
- let ldf = self.ldf.clone();
364
+ let ldf = self.ldf.borrow().clone();
347
365
  ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
348
366
  Ok(())
349
367
  }
@@ -359,7 +377,7 @@ impl RbLazyFrame {
359
377
  maintain_order,
360
378
  };
361
379
 
362
- let ldf = self.ldf.clone();
380
+ let ldf = self.ldf.borrow().clone();
363
381
  ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
364
382
  Ok(())
365
383
  }
@@ -377,6 +395,7 @@ impl RbLazyFrame {
377
395
  datetime_format: Option<String>,
378
396
  date_format: Option<String>,
379
397
  time_format: Option<String>,
398
+ float_scientific: Option<bool>,
380
399
  float_precision: Option<usize>,
381
400
  null_value: Option<String>,
382
401
  quote_style: Option<Wrap<QuoteStyle>>,
@@ -389,6 +408,7 @@ impl RbLazyFrame {
389
408
  date_format,
390
409
  time_format,
391
410
  datetime_format,
411
+ float_scientific,
392
412
  float_precision,
393
413
  separator,
394
414
  quote_char,
@@ -405,7 +425,7 @@ impl RbLazyFrame {
405
425
  serialize_options,
406
426
  };
407
427
 
408
- let ldf = self.ldf.clone();
428
+ let ldf = self.ldf.borrow().clone();
409
429
  ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
410
430
  Ok(())
411
431
  }
@@ -413,36 +433,36 @@ impl RbLazyFrame {
413
433
  pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
414
434
  let options = JsonWriterOptions { maintain_order };
415
435
 
416
- let ldf = self.ldf.clone();
436
+ let ldf = self.ldf.borrow().clone();
417
437
  ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
418
438
  Ok(())
419
439
  }
420
440
 
421
441
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
422
- let ldf = self.ldf.clone();
442
+ let ldf = self.ldf.borrow().clone();
423
443
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
424
444
  Ok(df.into())
425
445
  }
426
446
 
427
447
  pub fn filter(&self, predicate: &RbExpr) -> Self {
428
- let ldf = self.ldf.clone();
448
+ let ldf = self.ldf.borrow().clone();
429
449
  ldf.filter(predicate.inner.clone()).into()
430
450
  }
431
451
 
432
452
  pub fn select(&self, exprs: RArray) -> RbResult<Self> {
433
- let ldf = self.ldf.clone();
453
+ let ldf = self.ldf.borrow().clone();
434
454
  let exprs = rb_exprs_to_exprs(exprs)?;
435
455
  Ok(ldf.select(exprs).into())
436
456
  }
437
457
 
438
458
  pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
439
- let ldf = self.ldf.clone();
459
+ let ldf = self.ldf.borrow().clone();
440
460
  let exprs = rb_exprs_to_exprs(exprs)?;
441
461
  Ok(ldf.select_seq(exprs).into())
442
462
  }
443
463
 
444
464
  pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
445
- let ldf = self.ldf.clone();
465
+ let ldf = self.ldf.borrow().clone();
446
466
  let by = rb_exprs_to_exprs(by)?;
447
467
  let lazy_gb = if maintain_order {
448
468
  ldf.group_by_stable(by)
@@ -461,10 +481,9 @@ impl RbLazyFrame {
461
481
  offset: String,
462
482
  closed: Wrap<ClosedWindow>,
463
483
  by: RArray,
464
- check_sorted: bool,
465
484
  ) -> RbResult<RbLazyGroupBy> {
466
485
  let closed_window = closed.0;
467
- let ldf = self.ldf.clone();
486
+ let ldf = self.ldf.borrow().clone();
468
487
  let by = rb_exprs_to_exprs(by)?;
469
488
  let lazy_gb = ldf.rolling(
470
489
  index_column.inner.clone(),
@@ -474,7 +493,6 @@ impl RbLazyFrame {
474
493
  period: Duration::parse(&period),
475
494
  offset: Duration::parse(&offset),
476
495
  closed_window,
477
- check_sorted,
478
496
  },
479
497
  );
480
498
 
@@ -495,11 +513,10 @@ impl RbLazyFrame {
495
513
  closed: Wrap<ClosedWindow>,
496
514
  by: RArray,
497
515
  start_by: Wrap<StartBy>,
498
- check_sorted: bool,
499
516
  ) -> RbResult<RbLazyGroupBy> {
500
517
  let closed_window = closed.0;
501
518
  let by = rb_exprs_to_exprs(by)?;
502
- let ldf = self.ldf.clone();
519
+ let ldf = self.ldf.borrow().clone();
503
520
  let lazy_gb = ldf.group_by_dynamic(
504
521
  index_column.inner.clone(),
505
522
  by,
@@ -511,7 +528,6 @@ impl RbLazyFrame {
511
528
  include_boundaries,
512
529
  closed_window,
513
530
  start_by: start_by.0,
514
- check_sorted,
515
531
  ..Default::default()
516
532
  },
517
533
  );
@@ -523,14 +539,14 @@ impl RbLazyFrame {
523
539
 
524
540
  pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
525
541
  let contexts = contexts
526
- .each()
527
- .map(|v| TryConvert::try_convert(v.unwrap()))
542
+ .into_iter()
543
+ .map(TryConvert::try_convert)
528
544
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
529
545
  let contexts = contexts
530
546
  .into_iter()
531
- .map(|ldf| ldf.ldf.clone())
547
+ .map(|ldf| ldf.ldf.borrow().clone())
532
548
  .collect::<Vec<_>>();
533
- Ok(self.ldf.clone().with_context(contexts).into())
549
+ Ok(self.ldf.borrow().clone().with_context(contexts).into())
534
550
  }
535
551
 
536
552
  #[allow(clippy::too_many_arguments)]
@@ -548,8 +564,8 @@ impl RbLazyFrame {
548
564
  tolerance: Option<Wrap<AnyValue<'_>>>,
549
565
  tolerance_str: Option<String>,
550
566
  ) -> RbResult<Self> {
551
- let ldf = self.ldf.clone();
552
- let other = other.ldf.clone();
567
+ let ldf = self.ldf.borrow().clone();
568
+ let other = other.ldf.borrow().clone();
553
569
  let left_on = left_on.inner.clone();
554
570
  let right_on = right_on.inner.clone();
555
571
  Ok(ldf
@@ -583,8 +599,8 @@ impl RbLazyFrame {
583
599
  how: Wrap<JoinType>,
584
600
  suffix: String,
585
601
  ) -> RbResult<Self> {
586
- let ldf = self.ldf.clone();
587
- let other = other.ldf.clone();
602
+ let ldf = self.ldf.borrow().clone();
603
+ let other = other.ldf.borrow().clone();
588
604
  let left_on = rb_exprs_to_exprs(left_on)?;
589
605
  let right_on = rb_exprs_to_exprs(right_on)?;
590
606
 
@@ -603,32 +619,32 @@ impl RbLazyFrame {
603
619
  }
604
620
 
605
621
  pub fn with_column(&self, expr: &RbExpr) -> Self {
606
- let ldf = self.ldf.clone();
622
+ let ldf = self.ldf.borrow().clone();
607
623
  ldf.with_column(expr.inner.clone()).into()
608
624
  }
609
625
 
610
626
  pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
611
- let ldf = self.ldf.clone();
627
+ let ldf = self.ldf.borrow().clone();
612
628
  Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
613
629
  }
614
630
 
615
631
  pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
616
- let ldf = self.ldf.clone();
632
+ let ldf = self.ldf.borrow().clone();
617
633
  Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
618
634
  }
619
635
 
620
636
  pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
621
- let ldf = self.ldf.clone();
637
+ let ldf = self.ldf.borrow().clone();
622
638
  ldf.rename(existing, new).into()
623
639
  }
624
640
 
625
641
  pub fn reverse(&self) -> Self {
626
- let ldf = self.ldf.clone();
642
+ let ldf = self.ldf.borrow().clone();
627
643
  ldf.reverse().into()
628
644
  }
629
645
 
630
646
  pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
631
- let lf = self.ldf.clone();
647
+ let lf = self.ldf.borrow().clone();
632
648
  let out = match fill_value {
633
649
  Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
634
650
  None => lf.shift(n.inner.clone()),
@@ -637,72 +653,70 @@ impl RbLazyFrame {
637
653
  }
638
654
 
639
655
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
640
- let ldf = self.ldf.clone();
656
+ let ldf = self.ldf.borrow().clone();
641
657
  ldf.fill_nan(fill_value.inner.clone()).into()
642
658
  }
643
659
 
644
- pub fn min(&self) -> RbResult<Self> {
645
- let ldf = self.ldf.clone();
646
- let out = ldf.min().map_err(RbPolarsErr::from)?;
647
- Ok(out.into())
660
+ pub fn min(&self) -> Self {
661
+ let ldf = self.ldf.borrow().clone();
662
+ let out = ldf.min();
663
+ out.into()
648
664
  }
649
665
 
650
- pub fn max(&self) -> RbResult<Self> {
651
- let ldf = self.ldf.clone();
652
- let out = ldf.max().map_err(RbPolarsErr::from)?;
653
- Ok(out.into())
666
+ pub fn max(&self) -> Self {
667
+ let ldf = self.ldf.borrow().clone();
668
+ let out = ldf.max();
669
+ out.into()
654
670
  }
655
671
 
656
- pub fn sum(&self) -> RbResult<Self> {
657
- let ldf = self.ldf.clone();
658
- let out = ldf.sum().map_err(RbPolarsErr::from)?;
659
- Ok(out.into())
672
+ pub fn sum(&self) -> Self {
673
+ let ldf = self.ldf.borrow().clone();
674
+ let out = ldf.sum();
675
+ out.into()
660
676
  }
661
677
 
662
- pub fn mean(&self) -> RbResult<Self> {
663
- let ldf = self.ldf.clone();
664
- let out = ldf.mean().map_err(RbPolarsErr::from)?;
665
- Ok(out.into())
678
+ pub fn mean(&self) -> Self {
679
+ let ldf = self.ldf.borrow().clone();
680
+ let out = ldf.mean();
681
+ out.into()
666
682
  }
667
683
 
668
- pub fn std(&self, ddof: u8) -> RbResult<Self> {
669
- let ldf = self.ldf.clone();
670
- let out = ldf.std(ddof).map_err(RbPolarsErr::from)?;
671
- Ok(out.into())
684
+ pub fn std(&self, ddof: u8) -> Self {
685
+ let ldf = self.ldf.borrow().clone();
686
+ let out = ldf.std(ddof);
687
+ out.into()
672
688
  }
673
689
 
674
- pub fn var(&self, ddof: u8) -> RbResult<Self> {
675
- let ldf = self.ldf.clone();
676
- let out = ldf.var(ddof).map_err(RbPolarsErr::from)?;
677
- Ok(out.into())
690
+ pub fn var(&self, ddof: u8) -> Self {
691
+ let ldf = self.ldf.borrow().clone();
692
+ let out = ldf.var(ddof);
693
+ out.into()
678
694
  }
679
695
 
680
- pub fn median(&self) -> RbResult<Self> {
681
- let ldf = self.ldf.clone();
682
- let out = ldf.median().map_err(RbPolarsErr::from)?;
683
- Ok(out.into())
696
+ pub fn median(&self) -> Self {
697
+ let ldf = self.ldf.borrow().clone();
698
+ let out = ldf.median();
699
+ out.into()
684
700
  }
685
701
 
686
702
  pub fn quantile(
687
703
  &self,
688
704
  quantile: &RbExpr,
689
705
  interpolation: Wrap<QuantileInterpolOptions>,
690
- ) -> RbResult<Self> {
691
- let ldf = self.ldf.clone();
692
- let out = ldf
693
- .quantile(quantile.inner.clone(), interpolation.0)
694
- .map_err(RbPolarsErr::from)?;
695
- Ok(out.into())
706
+ ) -> Self {
707
+ let ldf = self.ldf.borrow().clone();
708
+ let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
709
+ out.into()
696
710
  }
697
711
 
698
712
  pub fn explode(&self, column: RArray) -> RbResult<Self> {
699
- let ldf = self.ldf.clone();
713
+ let ldf = self.ldf.borrow().clone();
700
714
  let column = rb_exprs_to_exprs(column)?;
701
715
  Ok(ldf.explode(column).into())
702
716
  }
703
717
 
704
718
  pub fn null_count(&self) -> Self {
705
- let ldf = self.ldf.clone();
719
+ let ldf = self.ldf.borrow().clone();
706
720
  ldf.null_count().into()
707
721
  }
708
722
 
@@ -712,7 +726,7 @@ impl RbLazyFrame {
712
726
  subset: Option<Vec<String>>,
713
727
  keep: Wrap<UniqueKeepStrategy>,
714
728
  ) -> RbResult<Self> {
715
- let ldf = self.ldf.clone();
729
+ let ldf = self.ldf.borrow().clone();
716
730
  Ok(match maintain_order {
717
731
  true => ldf.unique_stable(subset, keep.0),
718
732
  false => ldf.unique(subset, keep.0),
@@ -721,75 +735,63 @@ impl RbLazyFrame {
721
735
  }
722
736
 
723
737
  pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
724
- let ldf = self.ldf.clone();
738
+ let ldf = self.ldf.borrow().clone();
725
739
  ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
726
740
  .into()
727
741
  }
728
742
 
729
743
  pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
730
- let ldf = self.ldf.clone();
744
+ let ldf = self.ldf.borrow().clone();
731
745
  ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
732
746
  }
733
747
 
734
748
  pub fn tail(&self, n: IdxSize) -> Self {
735
- let ldf = self.ldf.clone();
749
+ let ldf = self.ldf.borrow().clone();
736
750
  ldf.tail(n).into()
737
751
  }
738
752
 
739
- pub fn melt(
753
+ pub fn unpivot(
740
754
  &self,
741
- id_vars: Vec<String>,
742
- value_vars: Vec<String>,
755
+ on: Vec<String>,
756
+ index: Vec<String>,
743
757
  value_name: Option<String>,
744
758
  variable_name: Option<String>,
745
759
  streamable: bool,
746
760
  ) -> Self {
747
- let args = MeltArgs {
748
- id_vars: strings_to_smartstrings(id_vars),
749
- value_vars: strings_to_smartstrings(value_vars),
761
+ let args = UnpivotArgs {
762
+ on: strings_to_smartstrings(on),
763
+ index: strings_to_smartstrings(index),
750
764
  value_name: value_name.map(|s| s.into()),
751
765
  variable_name: variable_name.map(|s| s.into()),
752
766
  streamable,
753
767
  };
754
768
 
755
- let ldf = self.ldf.clone();
756
- ldf.melt(args).into()
769
+ let ldf = self.ldf.borrow().clone();
770
+ ldf.unpivot(args).into()
757
771
  }
758
772
 
759
773
  pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
760
- let ldf = self.ldf.clone();
774
+ let ldf = self.ldf.borrow().clone();
761
775
  ldf.with_row_index(&name, offset).into()
762
776
  }
763
777
 
764
778
  pub fn drop(&self, cols: Vec<String>) -> Self {
765
- let ldf = self.ldf.clone();
779
+ let ldf = self.ldf.borrow().clone();
766
780
  ldf.drop(cols).into()
767
781
  }
768
782
 
769
783
  pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
770
- self.ldf.clone().cast_all(dtype.0, strict).into()
784
+ self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
771
785
  }
772
786
 
773
787
  pub fn clone(&self) -> Self {
774
- self.ldf.clone().into()
788
+ self.ldf.borrow().clone().into()
775
789
  }
776
790
 
777
- pub fn columns(&self) -> RbResult<RArray> {
778
- let schema = self.get_schema()?;
779
- let iter = schema.iter_names().map(|s| s.as_str());
780
- Ok(RArray::from_iter(iter))
781
- }
791
+ pub fn collect_schema(&self) -> RbResult<RHash> {
792
+ let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
782
793
 
783
- pub fn dtypes(&self) -> RbResult<RArray> {
784
- let schema = self.get_schema()?;
785
- let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
786
- Ok(RArray::from_iter(iter))
787
- }
788
-
789
- pub fn schema(&self) -> RbResult<RHash> {
790
- let schema = self.get_schema()?;
791
794
  let schema_dict = RHash::new();
792
-
793
795
  schema.iter_fields().for_each(|fld| {
794
796
  // TODO remove unwrap
795
797
  schema_dict
@@ -803,23 +805,20 @@ impl RbLazyFrame {
803
805
  }
804
806
 
805
807
  pub fn unnest(&self, cols: Vec<String>) -> Self {
806
- self.ldf.clone().unnest(cols).into()
807
- }
808
-
809
- pub fn width(&self) -> RbResult<usize> {
810
- Ok(self.get_schema()?.len())
808
+ self.ldf.borrow().clone().unnest(cols).into()
811
809
  }
812
810
 
813
811
  pub fn count(&self) -> Self {
814
- let ldf = self.ldf.clone();
812
+ let ldf = self.ldf.borrow().clone();
815
813
  ldf.count().into()
816
814
  }
817
815
 
818
816
  pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
819
817
  let out = self
820
818
  .ldf
819
+ .borrow()
821
820
  .clone()
822
- .merge_sorted(other.ldf.clone(), &key)
821
+ .merge_sorted(other.ldf.borrow().clone(), &key)
823
822
  .map_err(RbPolarsErr::from)?;
824
823
  Ok(out.into())
825
824
  }