polars-df 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/Cargo.lock +211 -320
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +13 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +187 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +1 -1
- data/ext/polars/src/expr/datatype.rs +14 -0
- data/ext/polars/src/expr/general.rs +36 -44
- data/ext/polars/src/expr/list.rs +27 -17
- data/ext/polars/src/expr/meta.rs +18 -41
- data/ext/polars/src/expr/mod.rs +3 -1
- data/ext/polars/src/expr/name.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +1 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +14 -7
- data/ext/polars/src/file.rs +12 -6
- data/ext/polars/src/functions/io.rs +2 -11
- data/ext/polars/src/functions/lazy.rs +22 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/range.rs +14 -10
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +75 -113
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +104 -26
- data/ext/polars/src/map/dataframe.rs +7 -7
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +8 -8
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +12 -207
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/cat_name_space.rb +3 -43
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/convert.rb +10 -0
- data/lib/polars/data_frame.rb +151 -30
- data/lib/polars/data_types.rb +47 -3
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +48 -39
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/eager.rb +1 -1
- data/lib/polars/functions/lazy.rb +114 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +18 -0
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +45 -63
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +163 -75
- data/lib/polars/list_expr.rb +213 -17
- data/lib/polars/list_name_space.rb +121 -8
- data/lib/polars/meta_expr.rb +14 -29
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +6 -1
- data/lib/polars/selector.rb +138 -0
- data/lib/polars/selectors.rb +931 -202
- data/lib/polars/series.rb +46 -19
- data/lib/polars/string_expr.rb +24 -3
- data/lib/polars/string_name_space.rb +12 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils.rb +5 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +8 -0
- metadata +17 -2
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
|
2
2
|
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -6,16 +6,17 @@ use polars_plan::dsl::ScanSources;
|
|
6
6
|
use std::cell::RefCell;
|
7
7
|
use std::io::BufWriter;
|
8
8
|
use std::num::NonZeroUsize;
|
9
|
-
use std::path::PathBuf;
|
10
9
|
|
11
10
|
use super::SinkTarget;
|
12
11
|
use crate::conversion::*;
|
13
12
|
use crate::expr::rb_exprs_to_exprs;
|
13
|
+
use crate::expr::selector::RbSelector;
|
14
14
|
use crate::file::get_file_like;
|
15
|
+
use crate::io::RbScanOptions;
|
15
16
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
16
17
|
|
17
|
-
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<
|
18
|
-
use crate::file::{
|
18
|
+
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
|
19
|
+
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
19
20
|
Ok(match get_ruby_scan_source_input(obj, false)? {
|
20
21
|
RubyScanSourceInput::Path(path) => (Some(path.clone()), ScanSources::Paths([path].into())),
|
21
22
|
RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
|
@@ -43,7 +44,7 @@ impl RbLazyFrame {
|
|
43
44
|
|
44
45
|
let sources = sources.0;
|
45
46
|
let (_first_path, sources) = match source {
|
46
|
-
None => (sources.first_path().map(|p| p.
|
47
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
47
48
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
48
49
|
};
|
49
50
|
|
@@ -111,7 +112,7 @@ impl RbLazyFrame {
|
|
111
112
|
|
112
113
|
let sources = sources.0;
|
113
114
|
let (_first_path, sources) = match source {
|
114
|
-
None => (sources.first_path().map(|p| p.
|
115
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
115
116
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
116
117
|
};
|
117
118
|
|
@@ -147,72 +148,35 @@ impl RbLazyFrame {
|
|
147
148
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
148
149
|
}
|
149
150
|
|
150
|
-
pub fn new_from_parquet(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[8])?;
|
160
|
-
let _credential_provider = Option::<Value>::try_convert(arguments[9])?;
|
161
|
-
let use_statistics = bool::try_convert(arguments[10])?;
|
162
|
-
let hive_partitioning = Option::<bool>::try_convert(arguments[11])?;
|
163
|
-
let schema = Option::<Wrap<Schema>>::try_convert(arguments[12])?;
|
164
|
-
let hive_schema = Option::<Wrap<Schema>>::try_convert(arguments[13])?;
|
165
|
-
let try_parse_hive_dates = bool::try_convert(arguments[14])?;
|
166
|
-
let retries = usize::try_convert(arguments[15])?;
|
167
|
-
let glob = bool::try_convert(arguments[16])?;
|
168
|
-
let include_file_paths = Option::<String>::try_convert(arguments[17])?;
|
169
|
-
let allow_missing_columns = bool::try_convert(arguments[18])?;
|
151
|
+
pub fn new_from_parquet(
|
152
|
+
sources: Wrap<ScanSources>,
|
153
|
+
schema: Option<Wrap<Schema>>,
|
154
|
+
scan_options: RbScanOptions,
|
155
|
+
parallel: Wrap<ParallelStrategy>,
|
156
|
+
low_memory: bool,
|
157
|
+
use_statistics: bool,
|
158
|
+
) -> RbResult<Self> {
|
159
|
+
use crate::utils::to_rb_err;
|
170
160
|
|
171
161
|
let parallel = parallel.0;
|
172
|
-
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
173
|
-
|
174
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
175
|
-
name: name.into(),
|
176
|
-
offset,
|
177
|
-
});
|
178
162
|
|
179
|
-
let
|
180
|
-
|
181
|
-
hive_start_idx: 0,
|
182
|
-
schema: hive_schema,
|
183
|
-
try_parse_dates: try_parse_hive_dates,
|
184
|
-
};
|
185
|
-
|
186
|
-
let mut args = ScanArgsParquet {
|
187
|
-
n_rows,
|
188
|
-
cache,
|
163
|
+
let options = ParquetOptions {
|
164
|
+
schema: schema.map(|x| Arc::new(x.0)),
|
189
165
|
parallel,
|
190
|
-
rechunk,
|
191
|
-
row_index,
|
192
166
|
low_memory,
|
193
|
-
cloud_options: None,
|
194
167
|
use_statistics,
|
195
|
-
schema: schema.map(|x| Arc::new(x.0)),
|
196
|
-
hive_options,
|
197
|
-
glob,
|
198
|
-
include_file_paths: include_file_paths.map(|x| x.into()),
|
199
|
-
allow_missing_columns,
|
200
168
|
};
|
201
169
|
|
202
170
|
let sources = sources.0;
|
203
|
-
let
|
204
|
-
None => (sources.first_path().map(|p| p.to_path_buf()), sources),
|
205
|
-
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
206
|
-
};
|
171
|
+
let first_path = sources.first_path().map(|p| p.into_owned());
|
207
172
|
|
208
|
-
|
209
|
-
|
210
|
-
let cloud_options =
|
211
|
-
parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
|
212
|
-
args.cloud_options = Some(cloud_options.with_max_retries(retries));
|
213
|
-
}
|
173
|
+
let unified_scan_args =
|
174
|
+
scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
|
214
175
|
|
215
|
-
let lf =
|
176
|
+
let lf: LazyFrame = DslBuilder::scan_parquet(sources, options, unified_scan_args)
|
177
|
+
.map_err(to_rb_err)?
|
178
|
+
.build()
|
179
|
+
.into();
|
216
180
|
|
217
181
|
Ok(lf.into())
|
218
182
|
}
|
@@ -254,7 +218,7 @@ impl RbLazyFrame {
|
|
254
218
|
|
255
219
|
let sources = sources.0;
|
256
220
|
let (_first_path, sources) = match source {
|
257
|
-
None => (sources.first_path().map(|p| p.
|
221
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
258
222
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
259
223
|
};
|
260
224
|
|
@@ -265,7 +229,7 @@ impl RbLazyFrame {
|
|
265
229
|
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
266
230
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
267
231
|
serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
|
268
|
-
.map_err(|err| RbValueError::new_err(format!("{:?}"
|
232
|
+
.map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
|
269
233
|
Ok(())
|
270
234
|
}
|
271
235
|
|
@@ -305,7 +269,7 @@ impl RbLazyFrame {
|
|
305
269
|
.with_predicate_pushdown(predicate_pushdown)
|
306
270
|
.with_simplify_expr(simplify_expr)
|
307
271
|
.with_slice_pushdown(slice_pushdown)
|
308
|
-
.
|
272
|
+
.with_new_streaming(allow_streaming)
|
309
273
|
._with_eager(_eager)
|
310
274
|
.with_projection_pushdown(projection_pushdown);
|
311
275
|
|
@@ -399,10 +363,8 @@ impl RbLazyFrame {
|
|
399
363
|
let cloud_options = match target.base_path() {
|
400
364
|
None => None,
|
401
365
|
Some(base_path) => {
|
402
|
-
let cloud_options =
|
403
|
-
base_path.to_str().
|
404
|
-
cloud_options.unwrap_or_default(),
|
405
|
-
)?;
|
366
|
+
let cloud_options =
|
367
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
406
368
|
Some(cloud_options.with_max_retries(retries))
|
407
369
|
}
|
408
370
|
};
|
@@ -434,10 +396,8 @@ impl RbLazyFrame {
|
|
434
396
|
let cloud_options = match target.base_path() {
|
435
397
|
None => None,
|
436
398
|
Some(base_path) => {
|
437
|
-
let cloud_options =
|
438
|
-
base_path.to_str().
|
439
|
-
cloud_options.unwrap_or_default(),
|
440
|
-
)?;
|
399
|
+
let cloud_options =
|
400
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
441
401
|
Some(cloud_options.with_max_retries(retries))
|
442
402
|
}
|
443
403
|
};
|
@@ -466,11 +426,12 @@ impl RbLazyFrame {
|
|
466
426
|
let time_format = Option::<String>::try_convert(arguments[9])?;
|
467
427
|
let float_scientific = Option::<bool>::try_convert(arguments[10])?;
|
468
428
|
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
469
|
-
let
|
470
|
-
let
|
471
|
-
let
|
472
|
-
let
|
473
|
-
let
|
429
|
+
let decimal_comma = bool::try_convert(arguments[12])?;
|
430
|
+
let null_value = Option::<String>::try_convert(arguments[13])?;
|
431
|
+
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
|
432
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
|
433
|
+
let retries = usize::try_convert(arguments[16])?;
|
434
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[17])?;
|
474
435
|
|
475
436
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
476
437
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -481,6 +442,7 @@ impl RbLazyFrame {
|
|
481
442
|
datetime_format,
|
482
443
|
float_scientific,
|
483
444
|
float_precision,
|
445
|
+
decimal_comma,
|
484
446
|
separator,
|
485
447
|
quote_char,
|
486
448
|
null: null_value,
|
@@ -498,10 +460,8 @@ impl RbLazyFrame {
|
|
498
460
|
let cloud_options = match target.base_path() {
|
499
461
|
None => None,
|
500
462
|
Some(base_path) => {
|
501
|
-
let cloud_options =
|
502
|
-
base_path.to_str().
|
503
|
-
cloud_options.unwrap_or_default(),
|
504
|
-
)?;
|
463
|
+
let cloud_options =
|
464
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
505
465
|
Some(cloud_options.with_max_retries(retries))
|
506
466
|
}
|
507
467
|
};
|
@@ -529,10 +489,8 @@ impl RbLazyFrame {
|
|
529
489
|
let cloud_options = match target.base_path() {
|
530
490
|
None => None,
|
531
491
|
Some(base_path) => {
|
532
|
-
let cloud_options =
|
533
|
-
base_path.to_str().
|
534
|
-
cloud_options.unwrap_or_default(),
|
535
|
-
)?;
|
492
|
+
let cloud_options =
|
493
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
536
494
|
Some(cloud_options.with_max_retries(retries))
|
537
495
|
}
|
538
496
|
};
|
@@ -546,12 +504,6 @@ impl RbLazyFrame {
|
|
546
504
|
.map_err(Into::into)
|
547
505
|
}
|
548
506
|
|
549
|
-
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
550
|
-
let ldf = self.ldf.borrow().clone();
|
551
|
-
let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
|
552
|
-
Ok(df.into())
|
553
|
-
}
|
554
|
-
|
555
507
|
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
556
508
|
let ldf = self.ldf.borrow().clone();
|
557
509
|
ldf.filter(predicate.inner.clone()).into()
|
@@ -689,15 +641,19 @@ impl RbLazyFrame {
|
|
689
641
|
.allow_parallel(allow_parallel)
|
690
642
|
.force_parallel(force_parallel)
|
691
643
|
.coalesce(coalesce)
|
692
|
-
.how(JoinType::AsOf(AsOfOptions {
|
644
|
+
.how(JoinType::AsOf(Box::new(AsOfOptions {
|
693
645
|
strategy: strategy.0,
|
694
646
|
left_by: left_by.map(strings_to_pl_smallstr),
|
695
647
|
right_by: right_by.map(strings_to_pl_smallstr),
|
696
|
-
tolerance: tolerance.map(|t|
|
648
|
+
tolerance: tolerance.map(|t| {
|
649
|
+
let av = t.0.into_static();
|
650
|
+
let dtype = av.dtype();
|
651
|
+
Scalar::new(dtype, av)
|
652
|
+
}),
|
697
653
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
698
654
|
allow_eq,
|
699
655
|
check_sortedness,
|
700
|
-
}))
|
656
|
+
})))
|
701
657
|
.suffix(suffix)
|
702
658
|
.finish()
|
703
659
|
.into())
|
@@ -832,10 +788,12 @@ impl RbLazyFrame {
|
|
832
788
|
out.into()
|
833
789
|
}
|
834
790
|
|
835
|
-
pub fn explode(&self,
|
836
|
-
|
837
|
-
|
838
|
-
|
791
|
+
pub fn explode(&self, subset: &RbSelector) -> Self {
|
792
|
+
self.ldf
|
793
|
+
.borrow()
|
794
|
+
.clone()
|
795
|
+
.explode(subset.inner.clone())
|
796
|
+
.into()
|
839
797
|
}
|
840
798
|
|
841
799
|
pub fn null_count(&self) -> Self {
|
@@ -846,10 +804,11 @@ impl RbLazyFrame {
|
|
846
804
|
pub fn unique(
|
847
805
|
&self,
|
848
806
|
maintain_order: bool,
|
849
|
-
subset: Option
|
807
|
+
subset: Option<&RbSelector>,
|
850
808
|
keep: Wrap<UniqueKeepStrategy>,
|
851
809
|
) -> RbResult<Self> {
|
852
810
|
let ldf = self.ldf.borrow().clone();
|
811
|
+
let subset = subset.map(|e| e.inner.clone());
|
853
812
|
Ok(match maintain_order {
|
854
813
|
true => ldf.unique_stable_generic(subset, keep.0),
|
855
814
|
false => ldf.unique_generic(subset, keep.0),
|
@@ -857,9 +816,11 @@ impl RbLazyFrame {
|
|
857
816
|
.into())
|
858
817
|
}
|
859
818
|
|
860
|
-
pub fn drop_nulls(&self, subset: Option
|
861
|
-
|
862
|
-
|
819
|
+
pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
|
820
|
+
self.ldf
|
821
|
+
.borrow()
|
822
|
+
.clone()
|
823
|
+
.drop_nulls(subset.map(|e| e.inner.clone()))
|
863
824
|
.into()
|
864
825
|
}
|
865
826
|
|
@@ -875,16 +836,14 @@ impl RbLazyFrame {
|
|
875
836
|
|
876
837
|
pub fn unpivot(
|
877
838
|
&self,
|
878
|
-
on:
|
879
|
-
index:
|
839
|
+
on: &RbSelector,
|
840
|
+
index: &RbSelector,
|
880
841
|
value_name: Option<String>,
|
881
842
|
variable_name: Option<String>,
|
882
843
|
) -> RbResult<Self> {
|
883
|
-
let on = rb_exprs_to_exprs(on)?;
|
884
|
-
let index = rb_exprs_to_exprs(index)?;
|
885
844
|
let args = UnpivotArgsDSL {
|
886
|
-
on: on.
|
887
|
-
index: index.
|
845
|
+
on: on.inner.clone(),
|
846
|
+
index: index.inner.clone(),
|
888
847
|
value_name: value_name.map(|s| s.into()),
|
889
848
|
variable_name: variable_name.map(|s| s.into()),
|
890
849
|
};
|
@@ -898,9 +857,8 @@ impl RbLazyFrame {
|
|
898
857
|
ldf.with_row_index(&name, offset).into()
|
899
858
|
}
|
900
859
|
|
901
|
-
pub fn drop(&self,
|
902
|
-
|
903
|
-
ldf.drop(cols).into()
|
860
|
+
pub fn drop(&self, columns: &RbSelector) -> Self {
|
861
|
+
self.ldf.borrow().clone().drop(columns.inner.clone()).into()
|
904
862
|
}
|
905
863
|
|
906
864
|
pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
|
@@ -941,8 +899,12 @@ impl RbLazyFrame {
|
|
941
899
|
Ok(schema_dict)
|
942
900
|
}
|
943
901
|
|
944
|
-
pub fn unnest(&self,
|
945
|
-
self.ldf
|
902
|
+
pub fn unnest(&self, columns: &RbSelector) -> Self {
|
903
|
+
self.ldf
|
904
|
+
.borrow()
|
905
|
+
.clone()
|
906
|
+
.unnest(columns.inner.clone())
|
907
|
+
.into()
|
946
908
|
}
|
947
909
|
|
948
910
|
pub fn count(&self) -> Self {
|
@@ -25,7 +25,7 @@ impl RbLazyFrame {
|
|
25
25
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
26
|
|
27
27
|
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
-
.map_err(|err| RbValueError::new_err(format!("{:?}"
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
|
29
29
|
Ok(LazyFrame::from(lp).into())
|
30
30
|
}
|
31
31
|
}
|
@@ -1,9 +1,9 @@
|
|
1
|
-
use std::path::{Path, PathBuf};
|
2
1
|
use std::sync::{Arc, Mutex};
|
3
2
|
|
4
3
|
use magnus::{RHash, TryConvert, Value};
|
5
4
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
6
|
-
use polars::prelude::{SinkOptions, SpecialEq};
|
5
|
+
use polars::prelude::{PlPath, SinkOptions, SpecialEq};
|
6
|
+
use polars_utils::plpath::PlPathRef;
|
7
7
|
|
8
8
|
use crate::prelude::Wrap;
|
9
9
|
use crate::{RbResult, RbValueError};
|
@@ -15,8 +15,8 @@ pub enum SinkTarget {
|
|
15
15
|
|
16
16
|
impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
17
17
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
-
if let Ok(v) =
|
19
|
-
Ok(Wrap(polars::prelude::SinkTarget::Path(
|
18
|
+
if let Ok(v) = String::try_convert(ob) {
|
19
|
+
Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
|
20
20
|
} else {
|
21
21
|
let writer = {
|
22
22
|
let rb_f = ob;
|
@@ -39,10 +39,10 @@ impl TryConvert for SinkTarget {
|
|
39
39
|
}
|
40
40
|
|
41
41
|
impl SinkTarget {
|
42
|
-
pub fn base_path(&self) -> Option
|
42
|
+
pub fn base_path(&self) -> Option<PlPathRef<'_>> {
|
43
43
|
match self {
|
44
44
|
Self::File(t) => match t {
|
45
|
-
polars::prelude::SinkTarget::Path(p) => Some(p.
|
45
|
+
polars::prelude::SinkTarget::Path(p) => Some(p.as_ref()),
|
46
46
|
polars::prelude::SinkTarget::Dyn(_) => None,
|
47
47
|
},
|
48
48
|
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
mod allocator;
|
2
2
|
mod batched_csv;
|
3
|
+
mod catalog;
|
3
4
|
mod conversion;
|
4
5
|
mod dataframe;
|
5
6
|
mod error;
|
@@ -8,6 +9,7 @@ mod expr;
|
|
8
9
|
mod file;
|
9
10
|
mod functions;
|
10
11
|
mod interop;
|
12
|
+
mod io;
|
11
13
|
mod lazyframe;
|
12
14
|
mod lazygroupby;
|
13
15
|
mod map;
|
@@ -20,23 +22,25 @@ mod sql;
|
|
20
22
|
mod utils;
|
21
23
|
|
22
24
|
use batched_csv::RbBatchedCsv;
|
25
|
+
use catalog::unity::RbCatalogClient;
|
23
26
|
use conversion::*;
|
24
27
|
use dataframe::RbDataFrame;
|
25
28
|
use error::RbPolarsErr;
|
26
29
|
use exceptions::{RbTypeError, RbValueError};
|
27
|
-
use expr::rb_exprs_to_exprs;
|
28
30
|
use expr::RbExpr;
|
31
|
+
use expr::rb_exprs_to_exprs;
|
32
|
+
use expr::selector::RbSelector;
|
29
33
|
use functions::string_cache::RbStringCacheHolder;
|
30
34
|
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
31
35
|
use interop::arrow::to_ruby::RbArrowArrayStream;
|
32
36
|
use lazyframe::RbLazyFrame;
|
33
37
|
use lazygroupby::RbLazyGroupBy;
|
34
|
-
use magnus::{define_module, function, method, prelude
|
38
|
+
use magnus::{Ruby, define_module, function, method, prelude::*};
|
35
39
|
use series::RbSeries;
|
36
40
|
use sql::RbSQLContext;
|
37
41
|
|
38
|
-
use magnus::error::Result as RbResult;
|
39
42
|
use magnus::Error as RbErr;
|
43
|
+
use magnus::error::Result as RbResult;
|
40
44
|
|
41
45
|
// TODO move
|
42
46
|
fn re_escape(pattern: String) -> String {
|
@@ -143,7 +147,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
147
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
144
148
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
145
149
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
146
|
-
class.define_method("to_dummies", method!(RbDataFrame::to_dummies,
|
150
|
+
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 4))?;
|
147
151
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
148
152
|
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
149
153
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
@@ -217,7 +221,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
217
221
|
class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
|
218
222
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
219
223
|
class.define_method("arg_min", method!(RbExpr::arg_min, 0))?;
|
220
|
-
class.define_method("search_sorted", method!(RbExpr::search_sorted,
|
224
|
+
class.define_method("search_sorted", method!(RbExpr::search_sorted, 3))?;
|
221
225
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
222
226
|
class.define_method("get", method!(RbExpr::get, 1))?;
|
223
227
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
@@ -329,7 +333,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
329
333
|
class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
|
330
334
|
class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
|
331
335
|
class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
|
332
|
-
class.define_method("str_to_integer", method!(RbExpr::str_to_integer,
|
336
|
+
class.define_method("str_to_integer", method!(RbExpr::str_to_integer, 3))?;
|
333
337
|
class.define_method("str_json_decode", method!(RbExpr::str_json_decode, 2))?;
|
334
338
|
class.define_method("binary_hex_encode", method!(RbExpr::bin_hex_encode, 0))?;
|
335
339
|
class.define_method("binary_hex_decode", method!(RbExpr::bin_hex_decode, 1))?;
|
@@ -426,7 +430,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
426
430
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
427
431
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
428
432
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
429
|
-
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
430
433
|
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
431
434
|
class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
|
432
435
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
|
@@ -464,9 +467,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
464
467
|
class.define_method("list_to_array", method!(RbExpr::list_to_array, 1))?;
|
465
468
|
class.define_method("list_mean", method!(RbExpr::list_mean, 0))?;
|
466
469
|
class.define_method("list_tail", method!(RbExpr::list_tail, 1))?;
|
467
|
-
class.define_method("list_sort", method!(RbExpr::list_sort,
|
470
|
+
class.define_method("list_sort", method!(RbExpr::list_sort, 2))?;
|
468
471
|
class.define_method("list_reverse", method!(RbExpr::list_reverse, 0))?;
|
469
472
|
class.define_method("list_unique", method!(RbExpr::list_unique, 1))?;
|
473
|
+
class.define_method("list_set_operation", method!(RbExpr::list_set_operation, 2))?;
|
470
474
|
class.define_method("list_get", method!(RbExpr::list_get, 2))?;
|
471
475
|
class.define_method("list_join", method!(RbExpr::list_join, 2))?;
|
472
476
|
class.define_method("list_arg_min", method!(RbExpr::list_arg_min, 0))?;
|
@@ -476,8 +480,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
476
480
|
class.define_method("list_diff", method!(RbExpr::list_diff, 2))?;
|
477
481
|
class.define_method("list_shift", method!(RbExpr::list_shift, 1))?;
|
478
482
|
class.define_method("list_slice", method!(RbExpr::list_slice, 2))?;
|
479
|
-
class.define_method("list_eval", method!(RbExpr::list_eval,
|
480
|
-
class.define_method("
|
483
|
+
class.define_method("list_eval", method!(RbExpr::list_eval, 1))?;
|
484
|
+
class.define_method("list_filter", method!(RbExpr::list_filter, 1))?;
|
485
|
+
class.define_method("cumulative_eval", method!(RbExpr::cumulative_eval, 2))?;
|
481
486
|
class.define_method("list_to_struct", method!(RbExpr::list_to_struct, 3))?;
|
482
487
|
class.define_method("rank", method!(RbExpr::rank, 3))?;
|
483
488
|
class.define_method("diff", method!(RbExpr::diff, 2))?;
|
@@ -518,9 +523,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
518
523
|
class.define_method("set_sorted_flag", method!(RbExpr::set_sorted_flag, 1))?;
|
519
524
|
class.define_method("replace", method!(RbExpr::replace, 2))?;
|
520
525
|
class.define_method("replace_strict", method!(RbExpr::replace_strict, 4))?;
|
526
|
+
class.define_method("into_selector", method!(RbExpr::into_selector, 0))?;
|
527
|
+
class.define_singleton_method("new_selector", function!(RbExpr::new_selector, 1))?;
|
521
528
|
|
522
529
|
// meta
|
523
|
-
class.define_method("meta_pop", method!(RbExpr::meta_pop,
|
530
|
+
class.define_method("meta_pop", method!(RbExpr::meta_pop, 1))?;
|
524
531
|
class.define_method("meta_eq", method!(RbExpr::meta_eq, 1))?;
|
525
532
|
class.define_method("meta_roots", method!(RbExpr::meta_root_names, 0))?;
|
526
533
|
class.define_method("meta_output_name", method!(RbExpr::meta_output_name, 0))?;
|
@@ -534,11 +541,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
534
541
|
"meta_is_regex_projection",
|
535
542
|
method!(RbExpr::meta_is_regex_projection, 0),
|
536
543
|
)?;
|
537
|
-
class.define_method("
|
538
|
-
class.define_method("_meta_selector_sub", method!(RbExpr::_meta_selector_sub, 1))?;
|
539
|
-
class.define_method("_meta_selector_and", method!(RbExpr::_meta_selector_and, 1))?;
|
540
|
-
class.define_method("_meta_as_selector", method!(RbExpr::_meta_as_selector, 0))?;
|
541
|
-
class.define_method("meta_tree_format", method!(RbExpr::meta_tree_format, 0))?;
|
544
|
+
class.define_method("meta_tree_format", method!(RbExpr::meta_tree_format, 1))?;
|
542
545
|
|
543
546
|
// name
|
544
547
|
class.define_method("name_keep", method!(RbExpr::name_keep, 0))?;
|
@@ -550,15 +553,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
550
553
|
|
551
554
|
// maybe add to different class
|
552
555
|
let class = module.define_module("Plr")?;
|
553
|
-
class.define_singleton_method("dtype_cols", function!(functions::lazy::dtype_cols, 1))?;
|
554
|
-
class.define_singleton_method("index_cols", function!(functions::lazy::index_cols, 1))?;
|
555
556
|
class.define_singleton_method("col", function!(functions::lazy::col, 1))?;
|
556
557
|
class.define_singleton_method("len", function!(functions::lazy::len, 0))?;
|
557
|
-
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
558
|
-
class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
|
559
|
-
class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
|
560
558
|
class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
|
561
|
-
class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold,
|
559
|
+
class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 6))?;
|
562
560
|
class.define_singleton_method("lit", function!(functions::lazy::lit, 3))?;
|
563
561
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
564
562
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
@@ -729,7 +727,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
729
727
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
730
728
|
class.define_singleton_method(
|
731
729
|
"new_from_parquet",
|
732
|
-
function!(RbLazyFrame::new_from_parquet,
|
730
|
+
function!(RbLazyFrame::new_from_parquet, 6),
|
733
731
|
)?;
|
734
732
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
735
733
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -750,7 +748,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
750
748
|
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
751
749
|
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
|
752
750
|
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
753
|
-
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
754
751
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
755
752
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
756
753
|
class.define_method("select_seq", method!(RbLazyFrame::select_seq, 1))?;
|
@@ -908,9 +905,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
908
905
|
class.define_method("median", method!(RbSeries::median, 0))?;
|
909
906
|
class.define_method("quantile", method!(RbSeries::quantile, 2))?;
|
910
907
|
class.define_method("_clone", method!(RbSeries::clone, 0))?;
|
911
|
-
class.define_method("
|
908
|
+
class.define_method("map_elements", method!(RbSeries::map_elements, 3))?;
|
912
909
|
class.define_method("zip_with", method!(RbSeries::zip_with, 2))?;
|
913
|
-
class.define_method("to_dummies", method!(RbSeries::to_dummies,
|
910
|
+
class.define_method("to_dummies", method!(RbSeries::to_dummies, 3))?;
|
914
911
|
class.define_method("n_unique", method!(RbSeries::n_unique, 0))?;
|
915
912
|
class.define_method("floor", method!(RbSeries::floor, 0))?;
|
916
913
|
class.define_method("shrink_to_fit", method!(RbSeries::shrink_to_fit, 0))?;
|
@@ -1111,5 +1108,86 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1111
1108
|
let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
|
1112
1109
|
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1113
1110
|
|
1111
|
+
// catalog
|
1112
|
+
let class = module.define_class("RbCatalogClient", ruby.class_object())?;
|
1113
|
+
class.define_singleton_method("new", function!(RbCatalogClient::new, 2))?;
|
1114
|
+
class.define_singleton_method(
|
1115
|
+
"type_json_to_polars_type",
|
1116
|
+
function!(RbCatalogClient::type_json_to_polars_type, 1),
|
1117
|
+
)?;
|
1118
|
+
class.define_method("list_catalogs", method!(RbCatalogClient::list_catalogs, 0))?;
|
1119
|
+
class.define_method(
|
1120
|
+
"list_namespaces",
|
1121
|
+
method!(RbCatalogClient::list_namespaces, 1),
|
1122
|
+
)?;
|
1123
|
+
class.define_method("list_tables", method!(RbCatalogClient::list_tables, 2))?;
|
1124
|
+
class.define_method(
|
1125
|
+
"get_table_info",
|
1126
|
+
method!(RbCatalogClient::get_table_info, 3),
|
1127
|
+
)?;
|
1128
|
+
class.define_method(
|
1129
|
+
"create_catalog",
|
1130
|
+
method!(RbCatalogClient::create_catalog, 3),
|
1131
|
+
)?;
|
1132
|
+
class.define_method(
|
1133
|
+
"delete_catalog",
|
1134
|
+
method!(RbCatalogClient::delete_catalog, 2),
|
1135
|
+
)?;
|
1136
|
+
class.define_method(
|
1137
|
+
"create_namespace",
|
1138
|
+
method!(RbCatalogClient::create_namespace, 4),
|
1139
|
+
)?;
|
1140
|
+
class.define_method(
|
1141
|
+
"delete_namespace",
|
1142
|
+
method!(RbCatalogClient::delete_namespace, 3),
|
1143
|
+
)?;
|
1144
|
+
class.define_method("create_table", method!(RbCatalogClient::create_table, 9))?;
|
1145
|
+
class.define_method("delete_table", method!(RbCatalogClient::delete_table, 3))?;
|
1146
|
+
|
1147
|
+
// categories
|
1148
|
+
let class = module.define_class("RbCategories", ruby.class_object())?;
|
1149
|
+
class.define_singleton_method(
|
1150
|
+
"global_categories",
|
1151
|
+
function!(RbCategories::global_categories, 0),
|
1152
|
+
)?;
|
1153
|
+
|
1154
|
+
// data type expr
|
1155
|
+
let _class = module.define_class("RbDataTypeExpr", ruby.class_object())?;
|
1156
|
+
|
1157
|
+
// selector
|
1158
|
+
let class = module.define_class("RbSelector", ruby.class_object())?;
|
1159
|
+
class.define_method("union", method!(RbSelector::union, 1))?;
|
1160
|
+
class.define_method("difference", method!(RbSelector::difference, 1))?;
|
1161
|
+
class.define_method("exclusive_or", method!(RbSelector::exclusive_or, 1))?;
|
1162
|
+
class.define_method("intersect", method!(RbSelector::intersect, 1))?;
|
1163
|
+
class.define_singleton_method("by_dtype", function!(RbSelector::by_dtype, 1))?;
|
1164
|
+
class.define_singleton_method("by_name", function!(RbSelector::by_name, 2))?;
|
1165
|
+
class.define_singleton_method("by_index", function!(RbSelector::by_index, 2))?;
|
1166
|
+
class.define_singleton_method("first", function!(RbSelector::first, 1))?;
|
1167
|
+
class.define_singleton_method("last", function!(RbSelector::last, 1))?;
|
1168
|
+
class.define_singleton_method("matches", function!(RbSelector::matches, 1))?;
|
1169
|
+
class.define_singleton_method("enum_", function!(RbSelector::enum_, 0))?;
|
1170
|
+
class.define_singleton_method("categorical", function!(RbSelector::categorical, 0))?;
|
1171
|
+
class.define_singleton_method("nested", function!(RbSelector::nested, 0))?;
|
1172
|
+
class.define_singleton_method("list", function!(RbSelector::list, 1))?;
|
1173
|
+
class.define_singleton_method("array", function!(RbSelector::array, 2))?;
|
1174
|
+
class.define_singleton_method("struct_", function!(RbSelector::struct_, 0))?;
|
1175
|
+
class.define_singleton_method("integer", function!(RbSelector::integer, 0))?;
|
1176
|
+
class.define_singleton_method("signed_integer", function!(RbSelector::signed_integer, 0))?;
|
1177
|
+
class.define_singleton_method(
|
1178
|
+
"unsigned_integer",
|
1179
|
+
function!(RbSelector::unsigned_integer, 0),
|
1180
|
+
)?;
|
1181
|
+
class.define_singleton_method("float", function!(RbSelector::float, 0))?;
|
1182
|
+
class.define_singleton_method("decimal", function!(RbSelector::decimal, 0))?;
|
1183
|
+
class.define_singleton_method("numeric", function!(RbSelector::numeric, 0))?;
|
1184
|
+
class.define_singleton_method("temporal", function!(RbSelector::temporal, 0))?;
|
1185
|
+
class.define_singleton_method("datetime", function!(RbSelector::datetime, 2))?;
|
1186
|
+
class.define_singleton_method("duration", function!(RbSelector::duration, 1))?;
|
1187
|
+
class.define_singleton_method("object", function!(RbSelector::object, 0))?;
|
1188
|
+
class.define_singleton_method("empty", function!(RbSelector::empty, 0))?;
|
1189
|
+
class.define_singleton_method("all", function!(RbSelector::all, 0))?;
|
1190
|
+
class.define_method("_hash", method!(RbSelector::hash, 0))?;
|
1191
|
+
|
1114
1192
|
Ok(())
|
1115
1193
|
}
|