polars-df 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/Cargo.lock +211 -320
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +13 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +187 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +1 -1
  18. data/ext/polars/src/expr/datatype.rs +14 -0
  19. data/ext/polars/src/expr/general.rs +36 -44
  20. data/ext/polars/src/expr/list.rs +27 -17
  21. data/ext/polars/src/expr/meta.rs +18 -41
  22. data/ext/polars/src/expr/mod.rs +3 -1
  23. data/ext/polars/src/expr/name.rs +2 -2
  24. data/ext/polars/src/expr/rolling.rs +1 -1
  25. data/ext/polars/src/expr/selector.rs +219 -0
  26. data/ext/polars/src/expr/string.rs +14 -7
  27. data/ext/polars/src/file.rs +12 -6
  28. data/ext/polars/src/functions/io.rs +2 -11
  29. data/ext/polars/src/functions/lazy.rs +22 -54
  30. data/ext/polars/src/functions/meta.rs +2 -2
  31. data/ext/polars/src/functions/misc.rs +1 -1
  32. data/ext/polars/src/functions/range.rs +14 -10
  33. data/ext/polars/src/functions/string_cache.rs +4 -5
  34. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  35. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  36. data/ext/polars/src/io/mod.rs +102 -0
  37. data/ext/polars/src/lazyframe/general.rs +75 -113
  38. data/ext/polars/src/lazyframe/serde.rs +1 -1
  39. data/ext/polars/src/lazyframe/sink.rs +6 -6
  40. data/ext/polars/src/lib.rs +104 -26
  41. data/ext/polars/src/map/dataframe.rs +7 -7
  42. data/ext/polars/src/map/lazy.rs +1 -1
  43. data/ext/polars/src/map/mod.rs +31 -19
  44. data/ext/polars/src/map/series.rs +8 -8
  45. data/ext/polars/src/on_startup.rs +5 -2
  46. data/ext/polars/src/rb_modules.rs +1 -1
  47. data/ext/polars/src/series/construction.rs +11 -7
  48. data/ext/polars/src/series/export.rs +6 -4
  49. data/ext/polars/src/series/general.rs +12 -207
  50. data/ext/polars/src/series/import.rs +2 -2
  51. data/ext/polars/src/series/map.rs +227 -0
  52. data/ext/polars/src/series/mod.rs +2 -1
  53. data/ext/polars/src/series/scatter.rs +1 -1
  54. data/ext/polars/src/utils.rs +10 -2
  55. data/lib/polars/cat_name_space.rb +3 -43
  56. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  57. data/lib/polars/catalog/unity/column_info.rb +31 -0
  58. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  59. data/lib/polars/catalog/unity/table_info.rb +50 -0
  60. data/lib/polars/catalog.rb +448 -0
  61. data/lib/polars/convert.rb +10 -0
  62. data/lib/polars/data_frame.rb +151 -30
  63. data/lib/polars/data_types.rb +47 -3
  64. data/lib/polars/exceptions.rb +7 -2
  65. data/lib/polars/expr.rb +48 -39
  66. data/lib/polars/functions/col.rb +6 -5
  67. data/lib/polars/functions/eager.rb +1 -1
  68. data/lib/polars/functions/lazy.rb +114 -15
  69. data/lib/polars/functions/repeat.rb +4 -0
  70. data/lib/polars/io/csv.rb +18 -0
  71. data/lib/polars/io/json.rb +16 -0
  72. data/lib/polars/io/ndjson.rb +13 -0
  73. data/lib/polars/io/parquet.rb +45 -63
  74. data/lib/polars/io/scan_options.rb +47 -0
  75. data/lib/polars/lazy_frame.rb +163 -75
  76. data/lib/polars/list_expr.rb +213 -17
  77. data/lib/polars/list_name_space.rb +121 -8
  78. data/lib/polars/meta_expr.rb +14 -29
  79. data/lib/polars/scan_cast_options.rb +64 -0
  80. data/lib/polars/schema.rb +6 -1
  81. data/lib/polars/selector.rb +138 -0
  82. data/lib/polars/selectors.rb +931 -202
  83. data/lib/polars/series.rb +46 -19
  84. data/lib/polars/string_expr.rb +24 -3
  85. data/lib/polars/string_name_space.rb +12 -1
  86. data/lib/polars/utils/parse.rb +40 -0
  87. data/lib/polars/utils.rb +5 -1
  88. data/lib/polars/version.rb +1 -1
  89. data/lib/polars.rb +8 -0
  90. metadata +17 -2
@@ -1,4 +1,4 @@
1
- use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConvert, Value};
1
+ use magnus::{IntoValue, RArray, RHash, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
2
2
  use polars::io::{HiveOptions, RowIndex};
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
@@ -6,16 +6,17 @@ use polars_plan::dsl::ScanSources;
6
6
  use std::cell::RefCell;
7
7
  use std::io::BufWriter;
8
8
  use std::num::NonZeroUsize;
9
- use std::path::PathBuf;
10
9
 
11
10
  use super::SinkTarget;
12
11
  use crate::conversion::*;
13
12
  use crate::expr::rb_exprs_to_exprs;
13
+ use crate::expr::selector::RbSelector;
14
14
  use crate::file::get_file_like;
15
+ use crate::io::RbScanOptions;
15
16
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
16
17
 
17
- fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PathBuf>, ScanSources)> {
18
- use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
18
+ fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
19
+ use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
19
20
  Ok(match get_ruby_scan_source_input(obj, false)? {
20
21
  RubyScanSourceInput::Path(path) => (Some(path.clone()), ScanSources::Paths([path].into())),
21
22
  RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
@@ -43,7 +44,7 @@ impl RbLazyFrame {
43
44
 
44
45
  let sources = sources.0;
45
46
  let (_first_path, sources) = match source {
46
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
47
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
47
48
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
48
49
  };
49
50
 
@@ -111,7 +112,7 @@ impl RbLazyFrame {
111
112
 
112
113
  let sources = sources.0;
113
114
  let (_first_path, sources) = match source {
114
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
115
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
115
116
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
116
117
  };
117
118
 
@@ -147,72 +148,35 @@ impl RbLazyFrame {
147
148
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
148
149
  }
149
150
 
150
- pub fn new_from_parquet(arguments: &[Value]) -> RbResult<Self> {
151
- let source = Option::<Value>::try_convert(arguments[0])?;
152
- let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
153
- let n_rows = Option::<usize>::try_convert(arguments[2])?;
154
- let cache = bool::try_convert(arguments[3])?;
155
- let parallel = Wrap::<ParallelStrategy>::try_convert(arguments[4])?;
156
- let rechunk = bool::try_convert(arguments[5])?;
157
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[6])?;
158
- let low_memory = bool::try_convert(arguments[7])?;
159
- let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[8])?;
160
- let _credential_provider = Option::<Value>::try_convert(arguments[9])?;
161
- let use_statistics = bool::try_convert(arguments[10])?;
162
- let hive_partitioning = Option::<bool>::try_convert(arguments[11])?;
163
- let schema = Option::<Wrap<Schema>>::try_convert(arguments[12])?;
164
- let hive_schema = Option::<Wrap<Schema>>::try_convert(arguments[13])?;
165
- let try_parse_hive_dates = bool::try_convert(arguments[14])?;
166
- let retries = usize::try_convert(arguments[15])?;
167
- let glob = bool::try_convert(arguments[16])?;
168
- let include_file_paths = Option::<String>::try_convert(arguments[17])?;
169
- let allow_missing_columns = bool::try_convert(arguments[18])?;
151
+ pub fn new_from_parquet(
152
+ sources: Wrap<ScanSources>,
153
+ schema: Option<Wrap<Schema>>,
154
+ scan_options: RbScanOptions,
155
+ parallel: Wrap<ParallelStrategy>,
156
+ low_memory: bool,
157
+ use_statistics: bool,
158
+ ) -> RbResult<Self> {
159
+ use crate::utils::to_rb_err;
170
160
 
171
161
  let parallel = parallel.0;
172
- let hive_schema = hive_schema.map(|s| Arc::new(s.0));
173
-
174
- let row_index = row_index.map(|(name, offset)| RowIndex {
175
- name: name.into(),
176
- offset,
177
- });
178
162
 
179
- let hive_options = HiveOptions {
180
- enabled: hive_partitioning,
181
- hive_start_idx: 0,
182
- schema: hive_schema,
183
- try_parse_dates: try_parse_hive_dates,
184
- };
185
-
186
- let mut args = ScanArgsParquet {
187
- n_rows,
188
- cache,
163
+ let options = ParquetOptions {
164
+ schema: schema.map(|x| Arc::new(x.0)),
189
165
  parallel,
190
- rechunk,
191
- row_index,
192
166
  low_memory,
193
- cloud_options: None,
194
167
  use_statistics,
195
- schema: schema.map(|x| Arc::new(x.0)),
196
- hive_options,
197
- glob,
198
- include_file_paths: include_file_paths.map(|x| x.into()),
199
- allow_missing_columns,
200
168
  };
201
169
 
202
170
  let sources = sources.0;
203
- let (first_path, sources) = match source {
204
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
205
- Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
206
- };
171
+ let first_path = sources.first_path().map(|p| p.into_owned());
207
172
 
208
- if let Some(first_path) = first_path {
209
- let first_path_url = first_path.to_string_lossy();
210
- let cloud_options =
211
- parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
212
- args.cloud_options = Some(cloud_options.with_max_retries(retries));
213
- }
173
+ let unified_scan_args =
174
+ scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
214
175
 
215
- let lf = LazyFrame::scan_parquet_sources(sources, args).map_err(RbPolarsErr::from)?;
176
+ let lf: LazyFrame = DslBuilder::scan_parquet(sources, options, unified_scan_args)
177
+ .map_err(to_rb_err)?
178
+ .build()
179
+ .into();
216
180
 
217
181
  Ok(lf.into())
218
182
  }
@@ -254,7 +218,7 @@ impl RbLazyFrame {
254
218
 
255
219
  let sources = sources.0;
256
220
  let (_first_path, sources) = match source {
257
- None => (sources.first_path().map(|p| p.to_path_buf()), sources),
221
+ None => (sources.first_path().map(|p| p.into_owned()), sources),
258
222
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
259
223
  };
260
224
 
@@ -265,7 +229,7 @@ impl RbLazyFrame {
265
229
  pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
266
230
  let file = BufWriter::new(get_file_like(rb_f, true)?);
267
231
  serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
268
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
232
+ .map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
269
233
  Ok(())
270
234
  }
271
235
 
@@ -305,7 +269,7 @@ impl RbLazyFrame {
305
269
  .with_predicate_pushdown(predicate_pushdown)
306
270
  .with_simplify_expr(simplify_expr)
307
271
  .with_slice_pushdown(slice_pushdown)
308
- .with_streaming(allow_streaming)
272
+ .with_new_streaming(allow_streaming)
309
273
  ._with_eager(_eager)
310
274
  .with_projection_pushdown(projection_pushdown);
311
275
 
@@ -399,10 +363,8 @@ impl RbLazyFrame {
399
363
  let cloud_options = match target.base_path() {
400
364
  None => None,
401
365
  Some(base_path) => {
402
- let cloud_options = parse_cloud_options(
403
- base_path.to_str().unwrap(),
404
- cloud_options.unwrap_or_default(),
405
- )?;
366
+ let cloud_options =
367
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
406
368
  Some(cloud_options.with_max_retries(retries))
407
369
  }
408
370
  };
@@ -434,10 +396,8 @@ impl RbLazyFrame {
434
396
  let cloud_options = match target.base_path() {
435
397
  None => None,
436
398
  Some(base_path) => {
437
- let cloud_options = parse_cloud_options(
438
- base_path.to_str().unwrap(),
439
- cloud_options.unwrap_or_default(),
440
- )?;
399
+ let cloud_options =
400
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
441
401
  Some(cloud_options.with_max_retries(retries))
442
402
  }
443
403
  };
@@ -466,11 +426,12 @@ impl RbLazyFrame {
466
426
  let time_format = Option::<String>::try_convert(arguments[9])?;
467
427
  let float_scientific = Option::<bool>::try_convert(arguments[10])?;
468
428
  let float_precision = Option::<usize>::try_convert(arguments[11])?;
469
- let null_value = Option::<String>::try_convert(arguments[12])?;
470
- let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
471
- let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
472
- let retries = usize::try_convert(arguments[15])?;
473
- let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
429
+ let decimal_comma = bool::try_convert(arguments[12])?;
430
+ let null_value = Option::<String>::try_convert(arguments[13])?;
431
+ let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
432
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
433
+ let retries = usize::try_convert(arguments[16])?;
434
+ let sink_options = Wrap::<SinkOptions>::try_convert(arguments[17])?;
474
435
 
475
436
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
476
437
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -481,6 +442,7 @@ impl RbLazyFrame {
481
442
  datetime_format,
482
443
  float_scientific,
483
444
  float_precision,
445
+ decimal_comma,
484
446
  separator,
485
447
  quote_char,
486
448
  null: null_value,
@@ -498,10 +460,8 @@ impl RbLazyFrame {
498
460
  let cloud_options = match target.base_path() {
499
461
  None => None,
500
462
  Some(base_path) => {
501
- let cloud_options = parse_cloud_options(
502
- base_path.to_str().unwrap(),
503
- cloud_options.unwrap_or_default(),
504
- )?;
463
+ let cloud_options =
464
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
505
465
  Some(cloud_options.with_max_retries(retries))
506
466
  }
507
467
  };
@@ -529,10 +489,8 @@ impl RbLazyFrame {
529
489
  let cloud_options = match target.base_path() {
530
490
  None => None,
531
491
  Some(base_path) => {
532
- let cloud_options = parse_cloud_options(
533
- base_path.to_str().unwrap(),
534
- cloud_options.unwrap_or_default(),
535
- )?;
492
+ let cloud_options =
493
+ parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
536
494
  Some(cloud_options.with_max_retries(retries))
537
495
  }
538
496
  };
@@ -546,12 +504,6 @@ impl RbLazyFrame {
546
504
  .map_err(Into::into)
547
505
  }
548
506
 
549
- pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
550
- let ldf = self.ldf.borrow().clone();
551
- let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
552
- Ok(df.into())
553
- }
554
-
555
507
  pub fn filter(&self, predicate: &RbExpr) -> Self {
556
508
  let ldf = self.ldf.borrow().clone();
557
509
  ldf.filter(predicate.inner.clone()).into()
@@ -689,15 +641,19 @@ impl RbLazyFrame {
689
641
  .allow_parallel(allow_parallel)
690
642
  .force_parallel(force_parallel)
691
643
  .coalesce(coalesce)
692
- .how(JoinType::AsOf(AsOfOptions {
644
+ .how(JoinType::AsOf(Box::new(AsOfOptions {
693
645
  strategy: strategy.0,
694
646
  left_by: left_by.map(strings_to_pl_smallstr),
695
647
  right_by: right_by.map(strings_to_pl_smallstr),
696
- tolerance: tolerance.map(|t| t.0.into_static()),
648
+ tolerance: tolerance.map(|t| {
649
+ let av = t.0.into_static();
650
+ let dtype = av.dtype();
651
+ Scalar::new(dtype, av)
652
+ }),
697
653
  tolerance_str: tolerance_str.map(|s| s.into()),
698
654
  allow_eq,
699
655
  check_sortedness,
700
- }))
656
+ })))
701
657
  .suffix(suffix)
702
658
  .finish()
703
659
  .into())
@@ -832,10 +788,12 @@ impl RbLazyFrame {
832
788
  out.into()
833
789
  }
834
790
 
835
- pub fn explode(&self, column: RArray) -> RbResult<Self> {
836
- let ldf = self.ldf.borrow().clone();
837
- let column = rb_exprs_to_exprs(column)?;
838
- Ok(ldf.explode(column).into())
791
+ pub fn explode(&self, subset: &RbSelector) -> Self {
792
+ self.ldf
793
+ .borrow()
794
+ .clone()
795
+ .explode(subset.inner.clone())
796
+ .into()
839
797
  }
840
798
 
841
799
  pub fn null_count(&self) -> Self {
@@ -846,10 +804,11 @@ impl RbLazyFrame {
846
804
  pub fn unique(
847
805
  &self,
848
806
  maintain_order: bool,
849
- subset: Option<Vec<String>>,
807
+ subset: Option<&RbSelector>,
850
808
  keep: Wrap<UniqueKeepStrategy>,
851
809
  ) -> RbResult<Self> {
852
810
  let ldf = self.ldf.borrow().clone();
811
+ let subset = subset.map(|e| e.inner.clone());
853
812
  Ok(match maintain_order {
854
813
  true => ldf.unique_stable_generic(subset, keep.0),
855
814
  false => ldf.unique_generic(subset, keep.0),
@@ -857,9 +816,11 @@ impl RbLazyFrame {
857
816
  .into())
858
817
  }
859
818
 
860
- pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
861
- let ldf = self.ldf.borrow().clone();
862
- ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
819
+ pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
820
+ self.ldf
821
+ .borrow()
822
+ .clone()
823
+ .drop_nulls(subset.map(|e| e.inner.clone()))
863
824
  .into()
864
825
  }
865
826
 
@@ -875,16 +836,14 @@ impl RbLazyFrame {
875
836
 
876
837
  pub fn unpivot(
877
838
  &self,
878
- on: RArray,
879
- index: RArray,
839
+ on: &RbSelector,
840
+ index: &RbSelector,
880
841
  value_name: Option<String>,
881
842
  variable_name: Option<String>,
882
843
  ) -> RbResult<Self> {
883
- let on = rb_exprs_to_exprs(on)?;
884
- let index = rb_exprs_to_exprs(index)?;
885
844
  let args = UnpivotArgsDSL {
886
- on: on.into_iter().map(|e| e.into()).collect(),
887
- index: index.into_iter().map(|e| e.into()).collect(),
845
+ on: on.inner.clone(),
846
+ index: index.inner.clone(),
888
847
  value_name: value_name.map(|s| s.into()),
889
848
  variable_name: variable_name.map(|s| s.into()),
890
849
  };
@@ -898,9 +857,8 @@ impl RbLazyFrame {
898
857
  ldf.with_row_index(&name, offset).into()
899
858
  }
900
859
 
901
- pub fn drop(&self, cols: Vec<String>) -> Self {
902
- let ldf = self.ldf.borrow().clone();
903
- ldf.drop(cols).into()
860
+ pub fn drop(&self, columns: &RbSelector) -> Self {
861
+ self.ldf.borrow().clone().drop(columns.inner.clone()).into()
904
862
  }
905
863
 
906
864
  pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
@@ -941,8 +899,12 @@ impl RbLazyFrame {
941
899
  Ok(schema_dict)
942
900
  }
943
901
 
944
- pub fn unnest(&self, cols: Vec<String>) -> Self {
945
- self.ldf.borrow().clone().unnest(cols).into()
902
+ pub fn unnest(&self, columns: &RbSelector) -> Self {
903
+ self.ldf
904
+ .borrow()
905
+ .clone()
906
+ .unnest(columns.inner.clone())
907
+ .into()
946
908
  }
947
909
 
948
910
  pub fn count(&self) -> Self {
@@ -25,7 +25,7 @@ impl RbLazyFrame {
25
25
  let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
26
26
 
27
27
  let lp = serde_json::from_str::<DslPlan>(json)
28
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
28
+ .map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
29
29
  Ok(LazyFrame::from(lp).into())
30
30
  }
31
31
  }
@@ -1,9 +1,9 @@
1
- use std::path::{Path, PathBuf};
2
1
  use std::sync::{Arc, Mutex};
3
2
 
4
3
  use magnus::{RHash, TryConvert, Value};
5
4
  use polars::prelude::sync_on_close::SyncOnCloseType;
6
- use polars::prelude::{SinkOptions, SpecialEq};
5
+ use polars::prelude::{PlPath, SinkOptions, SpecialEq};
6
+ use polars_utils::plpath::PlPathRef;
7
7
 
8
8
  use crate::prelude::Wrap;
9
9
  use crate::{RbResult, RbValueError};
@@ -15,8 +15,8 @@ pub enum SinkTarget {
15
15
 
16
16
  impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
17
17
  fn try_convert(ob: Value) -> RbResult<Self> {
18
- if let Ok(v) = PathBuf::try_convert(ob) {
19
- Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
18
+ if let Ok(v) = String::try_convert(ob) {
19
+ Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
20
20
  } else {
21
21
  let writer = {
22
22
  let rb_f = ob;
@@ -39,10 +39,10 @@ impl TryConvert for SinkTarget {
39
39
  }
40
40
 
41
41
  impl SinkTarget {
42
- pub fn base_path(&self) -> Option<&Path> {
42
+ pub fn base_path(&self) -> Option<PlPathRef<'_>> {
43
43
  match self {
44
44
  Self::File(t) => match t {
45
- polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
45
+ polars::prelude::SinkTarget::Path(p) => Some(p.as_ref()),
46
46
  polars::prelude::SinkTarget::Dyn(_) => None,
47
47
  },
48
48
  }
@@ -1,5 +1,6 @@
1
1
  mod allocator;
2
2
  mod batched_csv;
3
+ mod catalog;
3
4
  mod conversion;
4
5
  mod dataframe;
5
6
  mod error;
@@ -8,6 +9,7 @@ mod expr;
8
9
  mod file;
9
10
  mod functions;
10
11
  mod interop;
12
+ mod io;
11
13
  mod lazyframe;
12
14
  mod lazygroupby;
13
15
  mod map;
@@ -20,23 +22,25 @@ mod sql;
20
22
  mod utils;
21
23
 
22
24
  use batched_csv::RbBatchedCsv;
25
+ use catalog::unity::RbCatalogClient;
23
26
  use conversion::*;
24
27
  use dataframe::RbDataFrame;
25
28
  use error::RbPolarsErr;
26
29
  use exceptions::{RbTypeError, RbValueError};
27
- use expr::rb_exprs_to_exprs;
28
30
  use expr::RbExpr;
31
+ use expr::rb_exprs_to_exprs;
32
+ use expr::selector::RbSelector;
29
33
  use functions::string_cache::RbStringCacheHolder;
30
34
  use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
31
35
  use interop::arrow::to_ruby::RbArrowArrayStream;
32
36
  use lazyframe::RbLazyFrame;
33
37
  use lazygroupby::RbLazyGroupBy;
34
- use magnus::{define_module, function, method, prelude::*, Ruby};
38
+ use magnus::{Ruby, define_module, function, method, prelude::*};
35
39
  use series::RbSeries;
36
40
  use sql::RbSQLContext;
37
41
 
38
- use magnus::error::Result as RbResult;
39
42
  use magnus::Error as RbErr;
43
+ use magnus::error::Result as RbResult;
40
44
 
41
45
  // TODO move
42
46
  fn re_escape(pattern: String) -> String {
@@ -143,7 +147,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
147
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
144
148
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
145
149
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
146
- class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
150
+ class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 4))?;
147
151
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
148
152
  class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
149
153
  class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
@@ -217,7 +221,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
217
221
  class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
218
222
  class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
219
223
  class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
220
- class.define_method("search_sorted", method!(RbExpr::search_sorted, 2))?;
224
+ class.define_method("search_sorted", method!(RbExpr::search_sorted, 3))?;
221
225
  class.define_method("gather", method!(RbExpr::gather, 1))?;
222
226
  class.define_method("get", method!(RbExpr::get, 1))?;
223
227
  class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
@@ -329,7 +333,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
329
333
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
330
334
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
331
335
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
332
- class.define_method("str_to_integer", method!(RbExpr::str_to_integer, 2))?;
336
+ class.define_method("str_to_integer", method!(RbExpr::str_to_integer, 3))?;
333
337
  class.define_method("str_json_decode", method!(RbExpr::str_json_decode, 2))?;
334
338
  class.define_method("binary_hex_encode", method!(RbExpr::bin_hex_encode, 0))?;
335
339
  class.define_method("binary_hex_decode", method!(RbExpr::bin_hex_decode, 1))?;
@@ -426,7 +430,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
426
430
  class.define_method("dot", method!(RbExpr::dot, 1))?;
427
431
  class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
428
432
  class.define_method("mode", method!(RbExpr::mode, 0))?;
429
- class.define_method("exclude", method!(RbExpr::exclude, 1))?;
430
433
  class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
431
434
  class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
432
435
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
@@ -464,9 +467,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
464
467
  class.define_method("list_to_array", method!(RbExpr::list_to_array, 1))?;
465
468
  class.define_method("list_mean", method!(RbExpr::list_mean, 0))?;
466
469
  class.define_method("list_tail", method!(RbExpr::list_tail, 1))?;
467
- class.define_method("list_sort", method!(RbExpr::list_sort, 1))?;
470
+ class.define_method("list_sort", method!(RbExpr::list_sort, 2))?;
468
471
  class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
469
472
  class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
473
+ class.define_method("list_set_operation", method!(RbExpr::list_set_operation, 2))?;
470
474
  class.define_method("list_get", method!(RbExpr::list_get, 2))?;
471
475
  class.define_method("list_join", method!(RbExpr::list_join, 2))?;
472
476
  class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
@@ -476,8 +480,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
476
480
  class.define_method("list_diff", method!(RbExpr::list_diff, 2))?;
477
481
  class.define_method("list_shift", method!(RbExpr::list_shift, 1))?;
478
482
  class.define_method("list_slice", method!(RbExpr::list_slice, 2))?;
479
- class.define_method("list_eval", method!(RbExpr::list_eval, 2))?;
480
- class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 3))?;
483
+ class.define_method("list_eval", method!(RbExpr::list_eval, 1))?;
484
+ class.define_method("list_filter", method!(RbExpr::list_filter, 1))?;
485
+ class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 2))?;
481
486
  class.define_method("list_to_struct", method!(RbExpr::list_to_struct, 3))?;
482
487
  class.define_method("rank", method!(RbExpr::rank, 3))?;
483
488
  class.define_method("diff", method!(RbExpr::diff, 2))?;
@@ -518,9 +523,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
518
523
  class.define_method("set_sorted_flag", method!(RbExpr::set_sorted_flag, 1))?;
519
524
  class.define_method("replace", method!(RbExpr::replace, 2))?;
520
525
  class.define_method("replace_strict", method!(RbExpr::replace_strict, 4))?;
526
+ class.define_method("into_selector", method!(RbExpr::into_selector, 0))?;
527
+ class.define_singleton_method("new_selector", function!(RbExpr::new_selector, 1))?;
521
528
 
522
529
  // meta
523
- class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
530
+ class.define_method("meta_pop", method!(RbExpr::meta_pop, 1))?;
524
531
  class.define_method("meta_eq", method!(RbExpr::meta_eq, 1))?;
525
532
  class.define_method("meta_roots", method!(RbExpr::meta_root_names, 0))?;
526
533
  class.define_method("meta_output_name", method!(RbExpr::meta_output_name, 0))?;
@@ -534,11 +541,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
534
541
  "meta_is_regex_projection",
535
542
  method!(RbExpr::meta_is_regex_projection, 0),
536
543
  )?;
537
- class.define_method("_meta_selector_add", method!(RbExpr::_meta_selector_add, 1))?;
538
- class.define_method("_meta_selector_sub", method!(RbExpr::_meta_selector_sub, 1))?;
539
- class.define_method("_meta_selector_and", method!(RbExpr::_meta_selector_and, 1))?;
540
- class.define_method("_meta_as_selector", method!(RbExpr::_meta_as_selector, 0))?;
541
- class.define_method("meta_tree_format", method!(RbExpr::meta_tree_format, 0))?;
544
+ class.define_method("meta_tree_format", method!(RbExpr::meta_tree_format, 1))?;
542
545
 
543
546
  // name
544
547
  class.define_method("name_keep", method!(RbExpr::name_keep, 0))?;
@@ -550,15 +553,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
550
553
 
551
554
  // maybe add to different class
552
555
  let class = module.define_module("Plr")?;
553
- class.define_singleton_method("dtype_cols", function!(functions::lazy::dtype_cols, 1))?;
554
- class.define_singleton_method("index_cols", function!(functions::lazy::index_cols, 1))?;
555
556
  class.define_singleton_method("col", function!(functions::lazy::col, 1))?;
556
557
  class.define_singleton_method("len", function!(functions::lazy::len, 0))?;
557
- class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
558
- class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
559
- class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
560
558
  class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
561
- class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
559
+ class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 6))?;
562
560
  class.define_singleton_method("lit", function!(functions::lazy::lit, 3))?;
563
561
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
564
562
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
@@ -729,7 +727,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
729
727
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
730
728
  class.define_singleton_method(
731
729
  "new_from_parquet",
732
- function!(RbLazyFrame::new_from_parquet, -1),
730
+ function!(RbLazyFrame::new_from_parquet, 6),
733
731
  )?;
734
732
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
735
733
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -750,7 +748,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
750
748
  class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
751
749
  class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
752
750
  class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
753
- class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
754
751
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
755
752
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
756
753
  class.define_method("select_seq", method!(RbLazyFrame::select_seq, 1))?;
@@ -908,9 +905,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
908
905
  class.define_method("median", method!(RbSeries::median, 0))?;
909
906
  class.define_method("quantile", method!(RbSeries::quantile, 2))?;
910
907
  class.define_method("_clone", method!(RbSeries::clone, 0))?;
911
- class.define_method("apply_lambda", method!(RbSeries::apply_lambda, 3))?;
908
+ class.define_method("map_elements", method!(RbSeries::map_elements, 3))?;
912
909
  class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
913
- class.define_method("to_dummies", method!(RbSeries::to_dummies, 2))?;
910
+ class.define_method("to_dummies", method!(RbSeries::to_dummies, 3))?;
914
911
  class.define_method("n_unique", method!(RbSeries::n_unique, 0))?;
915
912
  class.define_method("floor", method!(RbSeries::floor, 0))?;
916
913
  class.define_method("shrink_to_fit", method!(RbSeries::shrink_to_fit, 0))?;
@@ -1111,5 +1108,86 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1111
1108
  let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1112
1109
  class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1113
1110
 
1111
+ // catalog
1112
+ let class = module.define_class("RbCatalogClient", ruby.class_object())?;
1113
+ class.define_singleton_method("new", function!(RbCatalogClient::new, 2))?;
1114
+ class.define_singleton_method(
1115
+ "type_json_to_polars_type",
1116
+ function!(RbCatalogClient::type_json_to_polars_type, 1),
1117
+ )?;
1118
+ class.define_method("list_catalogs", method!(RbCatalogClient::list_catalogs, 0))?;
1119
+ class.define_method(
1120
+ "list_namespaces",
1121
+ method!(RbCatalogClient::list_namespaces, 1),
1122
+ )?;
1123
+ class.define_method("list_tables", method!(RbCatalogClient::list_tables, 2))?;
1124
+ class.define_method(
1125
+ "get_table_info",
1126
+ method!(RbCatalogClient::get_table_info, 3),
1127
+ )?;
1128
+ class.define_method(
1129
+ "create_catalog",
1130
+ method!(RbCatalogClient::create_catalog, 3),
1131
+ )?;
1132
+ class.define_method(
1133
+ "delete_catalog",
1134
+ method!(RbCatalogClient::delete_catalog, 2),
1135
+ )?;
1136
+ class.define_method(
1137
+ "create_namespace",
1138
+ method!(RbCatalogClient::create_namespace, 4),
1139
+ )?;
1140
+ class.define_method(
1141
+ "delete_namespace",
1142
+ method!(RbCatalogClient::delete_namespace, 3),
1143
+ )?;
1144
+ class.define_method("create_table", method!(RbCatalogClient::create_table, 9))?;
1145
+ class.define_method("delete_table", method!(RbCatalogClient::delete_table, 3))?;
1146
+
1147
+ // categories
1148
+ let class = module.define_class("RbCategories", ruby.class_object())?;
1149
+ class.define_singleton_method(
1150
+ "global_categories",
1151
+ function!(RbCategories::global_categories, 0),
1152
+ )?;
1153
+
1154
+ // data type expr
1155
+ let _class = module.define_class("RbDataTypeExpr", ruby.class_object())?;
1156
+
1157
+ // selector
1158
+ let class = module.define_class("RbSelector", ruby.class_object())?;
1159
+ class.define_method("union", method!(RbSelector::union, 1))?;
1160
+ class.define_method("difference", method!(RbSelector::difference, 1))?;
1161
+ class.define_method("exclusive_or", method!(RbSelector::exclusive_or, 1))?;
1162
+ class.define_method("intersect", method!(RbSelector::intersect, 1))?;
1163
+ class.define_singleton_method("by_dtype", function!(RbSelector::by_dtype, 1))?;
1164
+ class.define_singleton_method("by_name", function!(RbSelector::by_name, 2))?;
1165
+ class.define_singleton_method("by_index", function!(RbSelector::by_index, 2))?;
1166
+ class.define_singleton_method("first", function!(RbSelector::first, 1))?;
1167
+ class.define_singleton_method("last", function!(RbSelector::last, 1))?;
1168
+ class.define_singleton_method("matches", function!(RbSelector::matches, 1))?;
1169
+ class.define_singleton_method("enum_", function!(RbSelector::enum_, 0))?;
1170
+ class.define_singleton_method("categorical", function!(RbSelector::categorical, 0))?;
1171
+ class.define_singleton_method("nested", function!(RbSelector::nested, 0))?;
1172
+ class.define_singleton_method("list", function!(RbSelector::list, 1))?;
1173
+ class.define_singleton_method("array", function!(RbSelector::array, 2))?;
1174
+ class.define_singleton_method("struct_", function!(RbSelector::struct_, 0))?;
1175
+ class.define_singleton_method("integer", function!(RbSelector::integer, 0))?;
1176
+ class.define_singleton_method("signed_integer", function!(RbSelector::signed_integer, 0))?;
1177
+ class.define_singleton_method(
1178
+ "unsigned_integer",
1179
+ function!(RbSelector::unsigned_integer, 0),
1180
+ )?;
1181
+ class.define_singleton_method("float", function!(RbSelector::float, 0))?;
1182
+ class.define_singleton_method("decimal", function!(RbSelector::decimal, 0))?;
1183
+ class.define_singleton_method("numeric", function!(RbSelector::numeric, 0))?;
1184
+ class.define_singleton_method("temporal", function!(RbSelector::temporal, 0))?;
1185
+ class.define_singleton_method("datetime", function!(RbSelector::datetime, 2))?;
1186
+ class.define_singleton_method("duration", function!(RbSelector::duration, 1))?;
1187
+ class.define_singleton_method("object", function!(RbSelector::object, 0))?;
1188
+ class.define_singleton_method("empty", function!(RbSelector::empty, 0))?;
1189
+ class.define_singleton_method("all", function!(RbSelector::all, 0))?;
1190
+ class.define_method("_hash", method!(RbSelector::hash, 0))?;
1191
+
1114
1192
  Ok(())
1115
1193
  }