polars-df 0.20.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +19 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +275 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +2 -1
  18. data/ext/polars/src/expr/array.rs +73 -4
  19. data/ext/polars/src/expr/binary.rs +26 -1
  20. data/ext/polars/src/expr/bitwise.rs +39 -0
  21. data/ext/polars/src/expr/categorical.rs +20 -0
  22. data/ext/polars/src/expr/datatype.rs +37 -0
  23. data/ext/polars/src/expr/datetime.rs +58 -0
  24. data/ext/polars/src/expr/general.rs +106 -22
  25. data/ext/polars/src/expr/list.rs +45 -2
  26. data/ext/polars/src/expr/meta.rs +5 -28
  27. data/ext/polars/src/expr/mod.rs +4 -1
  28. data/ext/polars/src/expr/name.rs +10 -2
  29. data/ext/polars/src/expr/rolling.rs +21 -1
  30. data/ext/polars/src/expr/selector.rs +219 -0
  31. data/ext/polars/src/expr/string.rs +73 -6
  32. data/ext/polars/src/expr/struct.rs +9 -1
  33. data/ext/polars/src/file.rs +11 -5
  34. data/ext/polars/src/functions/io.rs +21 -11
  35. data/ext/polars/src/functions/lazy.rs +26 -54
  36. data/ext/polars/src/functions/meta.rs +2 -2
  37. data/ext/polars/src/functions/misc.rs +1 -1
  38. data/ext/polars/src/functions/string_cache.rs +4 -5
  39. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  40. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  41. data/ext/polars/src/io/mod.rs +102 -0
  42. data/ext/polars/src/lazyframe/general.rs +124 -111
  43. data/ext/polars/src/lazyframe/serde.rs +1 -1
  44. data/ext/polars/src/lazyframe/sink.rs +6 -6
  45. data/ext/polars/src/lib.rs +216 -29
  46. data/ext/polars/src/map/dataframe.rs +9 -9
  47. data/ext/polars/src/map/lazy.rs +1 -1
  48. data/ext/polars/src/map/mod.rs +31 -19
  49. data/ext/polars/src/map/series.rs +9 -9
  50. data/ext/polars/src/on_startup.rs +5 -2
  51. data/ext/polars/src/rb_modules.rs +1 -1
  52. data/ext/polars/src/series/aggregation.rs +44 -0
  53. data/ext/polars/src/series/construction.rs +11 -7
  54. data/ext/polars/src/series/export.rs +6 -4
  55. data/ext/polars/src/series/general.rs +75 -210
  56. data/ext/polars/src/series/import.rs +2 -2
  57. data/ext/polars/src/series/map.rs +227 -0
  58. data/ext/polars/src/series/mod.rs +2 -1
  59. data/ext/polars/src/series/scatter.rs +1 -1
  60. data/ext/polars/src/utils.rs +10 -2
  61. data/lib/polars/array_expr.rb +382 -3
  62. data/lib/polars/array_name_space.rb +281 -0
  63. data/lib/polars/binary_expr.rb +67 -0
  64. data/lib/polars/binary_name_space.rb +43 -0
  65. data/lib/polars/cat_expr.rb +224 -0
  66. data/lib/polars/cat_name_space.rb +130 -32
  67. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  68. data/lib/polars/catalog/unity/column_info.rb +31 -0
  69. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  70. data/lib/polars/catalog/unity/table_info.rb +50 -0
  71. data/lib/polars/catalog.rb +448 -0
  72. data/lib/polars/config.rb +2 -2
  73. data/lib/polars/convert.rb +12 -2
  74. data/lib/polars/data_frame.rb +834 -48
  75. data/lib/polars/data_type_expr.rb +52 -0
  76. data/lib/polars/data_types.rb +61 -5
  77. data/lib/polars/date_time_expr.rb +251 -0
  78. data/lib/polars/date_time_name_space.rb +299 -0
  79. data/lib/polars/exceptions.rb +7 -2
  80. data/lib/polars/expr.rb +1247 -211
  81. data/lib/polars/functions/col.rb +6 -5
  82. data/lib/polars/functions/datatype.rb +21 -0
  83. data/lib/polars/functions/lazy.rb +127 -15
  84. data/lib/polars/functions/repeat.rb +4 -0
  85. data/lib/polars/io/csv.rb +19 -1
  86. data/lib/polars/io/json.rb +16 -0
  87. data/lib/polars/io/ndjson.rb +13 -0
  88. data/lib/polars/io/parquet.rb +70 -66
  89. data/lib/polars/io/scan_options.rb +47 -0
  90. data/lib/polars/lazy_frame.rb +1099 -95
  91. data/lib/polars/list_expr.rb +400 -11
  92. data/lib/polars/list_name_space.rb +321 -5
  93. data/lib/polars/meta_expr.rb +71 -22
  94. data/lib/polars/name_expr.rb +36 -0
  95. data/lib/polars/scan_cast_options.rb +64 -0
  96. data/lib/polars/schema.rb +84 -3
  97. data/lib/polars/selector.rb +210 -0
  98. data/lib/polars/selectors.rb +932 -203
  99. data/lib/polars/series.rb +1083 -63
  100. data/lib/polars/string_expr.rb +435 -9
  101. data/lib/polars/string_name_space.rb +729 -45
  102. data/lib/polars/struct_expr.rb +103 -0
  103. data/lib/polars/struct_name_space.rb +19 -1
  104. data/lib/polars/utils/parse.rb +40 -0
  105. data/lib/polars/utils/various.rb +18 -1
  106. data/lib/polars/utils.rb +9 -1
  107. data/lib/polars/version.rb +1 -1
  108. data/lib/polars.rb +10 -0
  109. metadata +20 -2
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConvert, Value};
1
+ use magnus::{IntoValue, RArray, RHash, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
2
2
  use polars::io::{HiveOptions, RowIndex};
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
@@ -6,16 +6,17 @@ use polars_plan::dsl::ScanSources;
6
6
  use std::cell::RefCell;
7
7
  use std::io::BufWriter;
8
8
  use std::num::NonZeroUsize;
9
- use std::path::PathBuf;
10
9
 
11
10
  use super::SinkTarget;
12
11
  use crate::conversion::*;
13
12
  use crate::expr::rb_exprs_to_exprs;
13
+ use crate::expr::selector::RbSelector;
14
14
  use crate::file::get_file_like;
15
+ use crate::io::RbScanOptions;
15
16
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
16
17
 
17
- fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PathBuf>, ScanSources)> {
18
- use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
18
+ fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
19
+ use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
19
20
  Ok(match get_ruby_scan_source_input(obj, false)? {
20
21
  RubyScanSourceInput::Path(path) => (Some(path.clone()), ScanSources::Paths([path].into())),
21
22
  RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
@@ -43,7 +44,7 @@ impl RbLazyFrame {
43
44
 
44
45
  let sources = sources.0;
45
46
  let (_first_path, sources) = match source {
46
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
47
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
47
48
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
48
49
  };
49
50
 
@@ -111,7 +112,7 @@ impl RbLazyFrame {
111
112
 
112
113
  let sources = sources.0;
113
114
  let (_first_path, sources) = match source {
114
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
115
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
115
116
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
116
117
  };
117
118
 
@@ -147,72 +148,35 @@ impl RbLazyFrame {
147
148
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
148
149
  }
149
150
 
150
- pub fn new_from_parquet(arguments: &[Value]) -> RbResult<Self> {
151
- let source = Option::<Value>::try_convert(arguments[0])?;
152
- let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
153
- let n_rows = Option::<usize>::try_convert(arguments[2])?;
154
- let cache = bool::try_convert(arguments[3])?;
155
- let parallel = Wrap::<ParallelStrategy>::try_convert(arguments[4])?;
156
- let rechunk = bool::try_convert(arguments[5])?;
157
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[6])?;
158
- let low_memory = bool::try_convert(arguments[7])?;
159
- let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[8])?;
160
- let _credential_provider = Option::<Value>::try_convert(arguments[9])?;
161
- let use_statistics = bool::try_convert(arguments[10])?;
162
- let hive_partitioning = Option::<bool>::try_convert(arguments[11])?;
163
- let schema = Option::<Wrap<Schema>>::try_convert(arguments[12])?;
164
- let hive_schema = Option::<Wrap<Schema>>::try_convert(arguments[13])?;
165
- let try_parse_hive_dates = bool::try_convert(arguments[14])?;
166
- let retries = usize::try_convert(arguments[15])?;
167
- let glob = bool::try_convert(arguments[16])?;
168
- let include_file_paths = Option::<String>::try_convert(arguments[17])?;
169
- let allow_missing_columns = bool::try_convert(arguments[18])?;
151
+ pub fn new_from_parquet(
152
+ sources: Wrap<ScanSources>,
153
+ schema: Option<Wrap<Schema>>,
154
+ scan_options: RbScanOptions,
155
+ parallel: Wrap<ParallelStrategy>,
156
+ low_memory: bool,
157
+ use_statistics: bool,
158
+ ) -> RbResult<Self> {
159
+ use crate::utils::to_rb_err;
170
160
 
171
161
  let parallel = parallel.0;
172
- let hive_schema = hive_schema.map(|s| Arc::new(s.0));
173
-
174
- let row_index = row_index.map(|(name, offset)| RowIndex {
175
- name: name.into(),
176
- offset,
177
- });
178
-
179
- let hive_options = HiveOptions {
180
- enabled: hive_partitioning,
181
- hive_start_idx: 0,
182
- schema: hive_schema,
183
- try_parse_dates: try_parse_hive_dates,
184
- };
185
162
 
186
- let mut args = ScanArgsParquet {
187
- n_rows,
188
- cache,
163
+ let options = ParquetOptions {
164
+ schema: schema.map(|x| Arc::new(x.0)),
189
165
  parallel,
190
- rechunk,
191
- row_index,
192
166
  low_memory,
193
- cloud_options: None,
194
167
  use_statistics,
195
- schema: schema.map(|x| Arc::new(x.0)),
196
- hive_options,
197
- glob,
198
- include_file_paths: include_file_paths.map(|x| x.into()),
199
- allow_missing_columns,
200
168
  };
201
169
 
202
170
  let sources = sources.0;
203
- let (first_path, sources) = match source {
204
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
205
- Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
206
- };
171
+ let first_path = sources.first_path().map(|p| p.into_owned());
207
172
 
208
- if let Some(first_path) = first_path {
209
- let first_path_url = first_path.to_string_lossy();
210
- let cloud_options =
211
- parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
212
- args.cloud_options = Some(cloud_options.with_max_retries(retries));
213
- }
173
+ let unified_scan_args =
174
+ scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
214
175
 
215
- let lf = LazyFrame::scan_parquet_sources(sources, args).map_err(RbPolarsErr::from)?;
176
+ let lf: LazyFrame = DslBuilder::scan_parquet(sources, options, unified_scan_args)
177
+ .map_err(to_rb_err)?
178
+ .build()
179
+ .into();
216
180
 
217
181
  Ok(lf.into())
218
182
  }
@@ -254,7 +218,7 @@ impl RbLazyFrame {
254
218
 
255
219
  let sources = sources.0;
256
220
  let (_first_path, sources) = match source {
257
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
221
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
258
222
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
259
223
  };
260
224
 
@@ -265,7 +229,7 @@ impl RbLazyFrame {
265
229
  pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
266
230
  let file = BufWriter::new(get_file_like(rb_f, true)?);
267
231
  serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
268
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
232
+ .map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
269
233
  Ok(())
270
234
  }
271
235
 
@@ -361,6 +325,30 @@ impl RbLazyFrame {
361
325
  .into())
362
326
  }
363
327
 
328
+ pub fn top_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
329
+ let ldf = self.ldf.borrow().clone();
330
+ let exprs = rb_exprs_to_exprs(by)?;
331
+ Ok(ldf
332
+ .top_k(
333
+ k,
334
+ exprs,
335
+ SortMultipleOptions::new().with_order_descending_multi(reverse),
336
+ )
337
+ .into())
338
+ }
339
+
340
+ pub fn bottom_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
341
+ let ldf = self.ldf.borrow().clone();
342
+ let exprs = rb_exprs_to_exprs(by)?;
343
+ Ok(ldf
344
+ .bottom_k(
345
+ k,
346
+ exprs,
347
+ SortMultipleOptions::new().with_order_descending_multi(reverse),
348
+ )
349
+ .into())
350
+ }
351
+
364
352
  pub fn cache(&self) -> Self {
365
353
  let ldf = self.ldf.borrow().clone();
366
354
  ldf.cache().into()
@@ -399,10 +387,8 @@ impl RbLazyFrame {
399
387
  let cloud_options = match target.base_path() {
400
388
  None => None,
401
389
  Some(base_path) => {
402
- let cloud_options = parse_cloud_options(
403
- base_path.to_str().unwrap(),
404
- cloud_options.unwrap_or_default(),
405
- )?;
390
+ let cloud_options =
391
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
406
392
  Some(cloud_options.with_max_retries(retries))
407
393
  }
408
394
  };
@@ -434,10 +420,8 @@ impl RbLazyFrame {
434
420
  let cloud_options = match target.base_path() {
435
421
  None => None,
436
422
  Some(base_path) => {
437
- let cloud_options = parse_cloud_options(
438
- base_path.to_str().unwrap(),
439
- cloud_options.unwrap_or_default(),
440
- )?;
423
+ let cloud_options =
424
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
441
425
  Some(cloud_options.with_max_retries(retries))
442
426
  }
443
427
  };
@@ -466,11 +450,12 @@ impl RbLazyFrame {
466
450
  let time_format = Option::<String>::try_convert(arguments[9])?;
467
451
  let float_scientific = Option::<bool>::try_convert(arguments[10])?;
468
452
  let float_precision = Option::<usize>::try_convert(arguments[11])?;
469
- let null_value = Option::<String>::try_convert(arguments[12])?;
470
- let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
471
- let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
472
- let retries = usize::try_convert(arguments[15])?;
473
- let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
453
+ let decimal_comma = bool::try_convert(arguments[12])?;
454
+ let null_value = Option::<String>::try_convert(arguments[13])?;
455
+ let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
456
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
457
+ let retries = usize::try_convert(arguments[16])?;
458
+ let sink_options = Wrap::<SinkOptions>::try_convert(arguments[17])?;
474
459
 
475
460
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
476
461
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -481,6 +466,7 @@ impl RbLazyFrame {
481
466
  datetime_format,
482
467
  float_scientific,
483
468
  float_precision,
469
+ decimal_comma,
484
470
  separator,
485
471
  quote_char,
486
472
  null: null_value,
@@ -498,10 +484,8 @@ impl RbLazyFrame {
498
484
  let cloud_options = match target.base_path() {
499
485
  None => None,
500
486
  Some(base_path) => {
501
- let cloud_options = parse_cloud_options(
502
- base_path.to_str().unwrap(),
503
- cloud_options.unwrap_or_default(),
504
- )?;
487
+ let cloud_options =
488
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
505
489
  Some(cloud_options.with_max_retries(retries))
506
490
  }
507
491
  };
@@ -529,10 +513,8 @@ impl RbLazyFrame {
529
513
  let cloud_options = match target.base_path() {
530
514
  None => None,
531
515
  Some(base_path) => {
532
- let cloud_options = parse_cloud_options(
533
- base_path.to_str().unwrap(),
534
- cloud_options.unwrap_or_default(),
535
- )?;
516
+ let cloud_options =
517
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
536
518
  Some(cloud_options.with_max_retries(retries))
537
519
  }
538
520
  };
@@ -546,15 +528,14 @@ impl RbLazyFrame {
546
528
  .map_err(Into::into)
547
529
  }
548
530
 
549
- pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
531
+ pub fn filter(&self, predicate: &RbExpr) -> Self {
550
532
  let ldf = self.ldf.borrow().clone();
551
- let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
552
- Ok(df.into())
533
+ ldf.filter(predicate.inner.clone()).into()
553
534
  }
554
535
 
555
- pub fn filter(&self, predicate: &RbExpr) -> Self {
536
+ pub fn remove(&self, predicate: &RbExpr) -> Self {
556
537
  let ldf = self.ldf.borrow().clone();
557
- ldf.filter(predicate.inner.clone()).into()
538
+ ldf.remove(predicate.inner.clone()).into()
558
539
  }
559
540
 
560
541
  pub fn select(&self, exprs: RArray) -> RbResult<Self> {
@@ -689,15 +670,19 @@ impl RbLazyFrame {
689
670
  .allow_parallel(allow_parallel)
690
671
  .force_parallel(force_parallel)
691
672
  .coalesce(coalesce)
692
- .how(JoinType::AsOf(AsOfOptions {
673
+ .how(JoinType::AsOf(Box::new(AsOfOptions {
693
674
  strategy: strategy.0,
694
675
  left_by: left_by.map(strings_to_pl_smallstr),
695
676
  right_by: right_by.map(strings_to_pl_smallstr),
696
- tolerance: tolerance.map(|t| t.0.into_static()),
677
+ tolerance: tolerance.map(|t| {
678
+ let av = t.0.into_static();
679
+ let dtype = av.dtype();
680
+ Scalar::new(dtype, av)
681
+ }),
697
682
  tolerance_str: tolerance_str.map(|s| s.into()),
698
683
  allow_eq,
699
684
  check_sortedness,
700
- }))
685
+ })))
701
686
  .suffix(suffix)
702
687
  .finish()
703
688
  .into())
@@ -745,6 +730,20 @@ impl RbLazyFrame {
745
730
  .into())
746
731
  }
747
732
 
733
+ pub fn join_where(&self, other: &Self, predicates: RArray, suffix: String) -> RbResult<Self> {
734
+ let ldf = self.ldf.borrow().clone();
735
+ let other = other.ldf.borrow().clone();
736
+
737
+ let predicates = rb_exprs_to_exprs(predicates)?;
738
+
739
+ Ok(ldf
740
+ .join_builder()
741
+ .with(other)
742
+ .suffix(suffix)
743
+ .join_where(predicates)
744
+ .into())
745
+ }
746
+
748
747
  pub fn with_column(&self, expr: &RbExpr) -> Self {
749
748
  let ldf = self.ldf.borrow().clone();
750
749
  ldf.with_column(expr.inner.clone()).into()
@@ -832,10 +831,12 @@ impl RbLazyFrame {
832
831
  out.into()
833
832
  }
834
833
 
835
- pub fn explode(&self, column: RArray) -> RbResult<Self> {
836
- let ldf = self.ldf.borrow().clone();
837
- let column = rb_exprs_to_exprs(column)?;
838
- Ok(ldf.explode(column).into())
834
+ pub fn explode(&self, subset: &RbSelector) -> Self {
835
+ self.ldf
836
+ .borrow()
837
+ .clone()
838
+ .explode(subset.inner.clone())
839
+ .into()
839
840
  }
840
841
 
841
842
  pub fn null_count(&self) -> Self {
@@ -846,10 +847,11 @@ impl RbLazyFrame {
846
847
  pub fn unique(
847
848
  &self,
848
849
  maintain_order: bool,
849
- subset: Option<Vec<String>>,
850
+ subset: Option<&RbSelector>,
850
851
  keep: Wrap<UniqueKeepStrategy>,
851
852
  ) -> RbResult<Self> {
852
853
  let ldf = self.ldf.borrow().clone();
854
+ let subset = subset.map(|e| e.inner.clone());
853
855
  Ok(match maintain_order {
854
856
  true => ldf.unique_stable_generic(subset, keep.0),
855
857
  false => ldf.unique_generic(subset, keep.0),
@@ -857,9 +859,19 @@ impl RbLazyFrame {
857
859
  .into())
858
860
  }
859
861
 
860
- pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
861
- let ldf = self.ldf.borrow().clone();
862
- ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
862
+ pub fn drop_nans(&self, subset: Option<&RbSelector>) -> Self {
863
+ self.ldf
864
+ .borrow()
865
+ .clone()
866
+ .drop_nans(subset.map(|e| e.inner.clone()))
867
+ .into()
868
+ }
869
+
870
+ pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
871
+ self.ldf
872
+ .borrow()
873
+ .clone()
874
+ .drop_nulls(subset.map(|e| e.inner.clone()))
863
875
  .into()
864
876
  }
865
877
 
@@ -875,16 +887,14 @@ impl RbLazyFrame {
875
887
 
876
888
  pub fn unpivot(
877
889
  &self,
878
- on: RArray,
879
- index: RArray,
890
+ on: &RbSelector,
891
+ index: &RbSelector,
880
892
  value_name: Option<String>,
881
893
  variable_name: Option<String>,
882
894
  ) -> RbResult<Self> {
883
- let on = rb_exprs_to_exprs(on)?;
884
- let index = rb_exprs_to_exprs(index)?;
885
895
  let args = UnpivotArgsDSL {
886
- on: on.into_iter().map(|e| e.into()).collect(),
887
- index: index.into_iter().map(|e| e.into()).collect(),
896
+ on: on.inner.clone(),
897
+ index: index.inner.clone(),
888
898
  value_name: value_name.map(|s| s.into()),
889
899
  variable_name: variable_name.map(|s| s.into()),
890
900
  };
@@ -898,9 +908,8 @@ impl RbLazyFrame {
898
908
  ldf.with_row_index(&name, offset).into()
899
909
  }
900
910
 
901
- pub fn drop(&self, cols: Vec<String>) -> Self {
902
- let ldf = self.ldf.borrow().clone();
903
- ldf.drop(cols).into()
911
+ pub fn drop(&self, columns: &RbSelector) -> Self {
912
+ self.ldf.borrow().clone().drop(columns.inner.clone()).into()
904
913
  }
905
914
 
906
915
  pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
@@ -941,8 +950,12 @@ impl RbLazyFrame {
941
950
  Ok(schema_dict)
942
951
  }
943
952
 
944
- pub fn unnest(&self, cols: Vec<String>) -> Self {
945
- self.ldf.borrow().clone().unnest(cols).into()
953
+ pub fn unnest(&self, columns: &RbSelector) -> Self {
954
+ self.ldf
955
+ .borrow()
956
+ .clone()
957
+ .unnest(columns.inner.clone())
958
+ .into()
946
959
  }
947
960
 
948
961
  pub fn count(&self) -> Self {
@@ -25,7 +25,7 @@ impl RbLazyFrame {
25
25
  let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
26
 
27
27
  let lp = serde_json::from_str::<DslPlan>(json)
28
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
28
+ .map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
29
29
  Ok(LazyFrame::from(lp).into())
30
30
  }
31
31
  }
@@ -1,9 +1,9 @@
1
- use std::path::{Path, PathBuf};
2
1
  use std::sync::{Arc, Mutex};
3
2
 
4
3
  use magnus::{RHash, TryConvert, Value};
5
4
  use polars::prelude::sync_on_close::SyncOnCloseType;
6
- use polars::prelude::{SinkOptions, SpecialEq};
5
+ use polars::prelude::{PlPath, SinkOptions, SpecialEq};
6
+ use polars_utils::plpath::PlPathRef;
7
7
 
8
8
  use crate::prelude::Wrap;
9
9
  use crate::{RbResult, RbValueError};
@@ -15,8 +15,8 @@ pub enum SinkTarget {
15
15
 
16
16
  impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
17
17
  fn try_convert(ob: Value) -> RbResult<Self> {
18
- if let Ok(v) = PathBuf::try_convert(ob) {
19
- Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
18
+ if let Ok(v) = String::try_convert(ob) {
19
+ Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
20
20
  } else {
21
21
  let writer = {
22
22
  let rb_f = ob;
@@ -39,10 +39,10 @@ impl TryConvert for SinkTarget {
39
39
  }
40
40
 
41
41
  impl SinkTarget {
42
- pub fn base_path(&self) -> Option<&Path> {
42
+ pub fn base_path(&self) -> Option<PlPathRef<'_>> {
43
43
  match self {
44
44
  Self::File(t) => match t {
45
- polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
45
+ polars::prelude::SinkTarget::Path(p) => Some(p.as_ref()),
46
46
  polars::prelude::SinkTarget::Dyn(_) => None,
47
47
  },
48
48
  }