polars-df 0.20.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +19 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +275 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +2 -1
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +37 -0
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +106 -22
- data/ext/polars/src/expr/list.rs +45 -2
- data/ext/polars/src/expr/meta.rs +5 -28
- data/ext/polars/src/expr/mod.rs +4 -1
- data/ext/polars/src/expr/name.rs +10 -2
- data/ext/polars/src/expr/rolling.rs +21 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +73 -6
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +21 -11
- data/ext/polars/src/functions/lazy.rs +26 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +124 -111
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +216 -29
- data/ext/polars/src/map/dataframe.rs +9 -9
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +9 -9
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +75 -210
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +20 -2
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{IntoValue, RArray, RHash, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
|
2
2
|
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
@@ -6,16 +6,17 @@ use polars_plan::dsl::ScanSources;
|
|
6
6
|
use std::cell::RefCell;
|
7
7
|
use std::io::BufWriter;
|
8
8
|
use std::num::NonZeroUsize;
|
9
|
-
use std::path::PathBuf;
|
10
9
|
|
11
10
|
use super::SinkTarget;
|
12
11
|
use crate::conversion::*;
|
13
12
|
use crate::expr::rb_exprs_to_exprs;
|
13
|
+
use crate::expr::selector::RbSelector;
|
14
14
|
use crate::file::get_file_like;
|
15
|
+
use crate::io::RbScanOptions;
|
15
16
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
16
17
|
|
17
|
-
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<
|
18
|
-
use crate::file::{
|
18
|
+
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
|
19
|
+
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
19
20
|
Ok(match get_ruby_scan_source_input(obj, false)? {
|
20
21
|
RubyScanSourceInput::Path(path) => (Some(path.clone()), ScanSources::Paths([path].into())),
|
21
22
|
RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
|
@@ -43,7 +44,7 @@ impl RbLazyFrame {
|
|
43
44
|
|
44
45
|
let sources = sources.0;
|
45
46
|
let (_first_path, sources) = match source {
|
46
|
-
None => (sources.first_path().map(|p| p.
|
47
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
47
48
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
48
49
|
};
|
49
50
|
|
@@ -111,7 +112,7 @@ impl RbLazyFrame {
|
|
111
112
|
|
112
113
|
let sources = sources.0;
|
113
114
|
let (_first_path, sources) = match source {
|
114
|
-
None => (sources.first_path().map(|p| p.
|
115
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
115
116
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
116
117
|
};
|
117
118
|
|
@@ -147,72 +148,35 @@ impl RbLazyFrame {
|
|
147
148
|
Ok(r.finish().map_err(RbPolarsErr::from)?.into())
|
148
149
|
}
|
149
150
|
|
150
|
-
pub fn new_from_parquet(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[8])?;
|
160
|
-
let _credential_provider = Option::<Value>::try_convert(arguments[9])?;
|
161
|
-
let use_statistics = bool::try_convert(arguments[10])?;
|
162
|
-
let hive_partitioning = Option::<bool>::try_convert(arguments[11])?;
|
163
|
-
let schema = Option::<Wrap<Schema>>::try_convert(arguments[12])?;
|
164
|
-
let hive_schema = Option::<Wrap<Schema>>::try_convert(arguments[13])?;
|
165
|
-
let try_parse_hive_dates = bool::try_convert(arguments[14])?;
|
166
|
-
let retries = usize::try_convert(arguments[15])?;
|
167
|
-
let glob = bool::try_convert(arguments[16])?;
|
168
|
-
let include_file_paths = Option::<String>::try_convert(arguments[17])?;
|
169
|
-
let allow_missing_columns = bool::try_convert(arguments[18])?;
|
151
|
+
pub fn new_from_parquet(
|
152
|
+
sources: Wrap<ScanSources>,
|
153
|
+
schema: Option<Wrap<Schema>>,
|
154
|
+
scan_options: RbScanOptions,
|
155
|
+
parallel: Wrap<ParallelStrategy>,
|
156
|
+
low_memory: bool,
|
157
|
+
use_statistics: bool,
|
158
|
+
) -> RbResult<Self> {
|
159
|
+
use crate::utils::to_rb_err;
|
170
160
|
|
171
161
|
let parallel = parallel.0;
|
172
|
-
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
173
|
-
|
174
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
175
|
-
name: name.into(),
|
176
|
-
offset,
|
177
|
-
});
|
178
|
-
|
179
|
-
let hive_options = HiveOptions {
|
180
|
-
enabled: hive_partitioning,
|
181
|
-
hive_start_idx: 0,
|
182
|
-
schema: hive_schema,
|
183
|
-
try_parse_dates: try_parse_hive_dates,
|
184
|
-
};
|
185
162
|
|
186
|
-
let
|
187
|
-
|
188
|
-
cache,
|
163
|
+
let options = ParquetOptions {
|
164
|
+
schema: schema.map(|x| Arc::new(x.0)),
|
189
165
|
parallel,
|
190
|
-
rechunk,
|
191
|
-
row_index,
|
192
166
|
low_memory,
|
193
|
-
cloud_options: None,
|
194
167
|
use_statistics,
|
195
|
-
schema: schema.map(|x| Arc::new(x.0)),
|
196
|
-
hive_options,
|
197
|
-
glob,
|
198
|
-
include_file_paths: include_file_paths.map(|x| x.into()),
|
199
|
-
allow_missing_columns,
|
200
168
|
};
|
201
169
|
|
202
170
|
let sources = sources.0;
|
203
|
-
let
|
204
|
-
None => (sources.first_path().map(|p| p.to_path_buf()), sources),
|
205
|
-
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
206
|
-
};
|
171
|
+
let first_path = sources.first_path().map(|p| p.into_owned());
|
207
172
|
|
208
|
-
|
209
|
-
|
210
|
-
let cloud_options =
|
211
|
-
parse_cloud_options(&first_path_url, cloud_options.unwrap_or_default())?;
|
212
|
-
args.cloud_options = Some(cloud_options.with_max_retries(retries));
|
213
|
-
}
|
173
|
+
let unified_scan_args =
|
174
|
+
scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
|
214
175
|
|
215
|
-
let lf =
|
176
|
+
let lf: LazyFrame = DslBuilder::scan_parquet(sources, options, unified_scan_args)
|
177
|
+
.map_err(to_rb_err)?
|
178
|
+
.build()
|
179
|
+
.into();
|
216
180
|
|
217
181
|
Ok(lf.into())
|
218
182
|
}
|
@@ -254,7 +218,7 @@ impl RbLazyFrame {
|
|
254
218
|
|
255
219
|
let sources = sources.0;
|
256
220
|
let (_first_path, sources) = match source {
|
257
|
-
None => (sources.first_path().map(|p| p.
|
221
|
+
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
258
222
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
259
223
|
};
|
260
224
|
|
@@ -265,7 +229,7 @@ impl RbLazyFrame {
|
|
265
229
|
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
266
230
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
267
231
|
serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
|
268
|
-
.map_err(|err| RbValueError::new_err(format!("{:?}"
|
232
|
+
.map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
|
269
233
|
Ok(())
|
270
234
|
}
|
271
235
|
|
@@ -361,6 +325,30 @@ impl RbLazyFrame {
|
|
361
325
|
.into())
|
362
326
|
}
|
363
327
|
|
328
|
+
pub fn top_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
|
329
|
+
let ldf = self.ldf.borrow().clone();
|
330
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
331
|
+
Ok(ldf
|
332
|
+
.top_k(
|
333
|
+
k,
|
334
|
+
exprs,
|
335
|
+
SortMultipleOptions::new().with_order_descending_multi(reverse),
|
336
|
+
)
|
337
|
+
.into())
|
338
|
+
}
|
339
|
+
|
340
|
+
pub fn bottom_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
|
341
|
+
let ldf = self.ldf.borrow().clone();
|
342
|
+
let exprs = rb_exprs_to_exprs(by)?;
|
343
|
+
Ok(ldf
|
344
|
+
.bottom_k(
|
345
|
+
k,
|
346
|
+
exprs,
|
347
|
+
SortMultipleOptions::new().with_order_descending_multi(reverse),
|
348
|
+
)
|
349
|
+
.into())
|
350
|
+
}
|
351
|
+
|
364
352
|
pub fn cache(&self) -> Self {
|
365
353
|
let ldf = self.ldf.borrow().clone();
|
366
354
|
ldf.cache().into()
|
@@ -399,10 +387,8 @@ impl RbLazyFrame {
|
|
399
387
|
let cloud_options = match target.base_path() {
|
400
388
|
None => None,
|
401
389
|
Some(base_path) => {
|
402
|
-
let cloud_options =
|
403
|
-
base_path.to_str().
|
404
|
-
cloud_options.unwrap_or_default(),
|
405
|
-
)?;
|
390
|
+
let cloud_options =
|
391
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
406
392
|
Some(cloud_options.with_max_retries(retries))
|
407
393
|
}
|
408
394
|
};
|
@@ -434,10 +420,8 @@ impl RbLazyFrame {
|
|
434
420
|
let cloud_options = match target.base_path() {
|
435
421
|
None => None,
|
436
422
|
Some(base_path) => {
|
437
|
-
let cloud_options =
|
438
|
-
base_path.to_str().
|
439
|
-
cloud_options.unwrap_or_default(),
|
440
|
-
)?;
|
423
|
+
let cloud_options =
|
424
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
441
425
|
Some(cloud_options.with_max_retries(retries))
|
442
426
|
}
|
443
427
|
};
|
@@ -466,11 +450,12 @@ impl RbLazyFrame {
|
|
466
450
|
let time_format = Option::<String>::try_convert(arguments[9])?;
|
467
451
|
let float_scientific = Option::<bool>::try_convert(arguments[10])?;
|
468
452
|
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
469
|
-
let
|
470
|
-
let
|
471
|
-
let
|
472
|
-
let
|
473
|
-
let
|
453
|
+
let decimal_comma = bool::try_convert(arguments[12])?;
|
454
|
+
let null_value = Option::<String>::try_convert(arguments[13])?;
|
455
|
+
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
|
456
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
|
457
|
+
let retries = usize::try_convert(arguments[16])?;
|
458
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[17])?;
|
474
459
|
|
475
460
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
476
461
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -481,6 +466,7 @@ impl RbLazyFrame {
|
|
481
466
|
datetime_format,
|
482
467
|
float_scientific,
|
483
468
|
float_precision,
|
469
|
+
decimal_comma,
|
484
470
|
separator,
|
485
471
|
quote_char,
|
486
472
|
null: null_value,
|
@@ -498,10 +484,8 @@ impl RbLazyFrame {
|
|
498
484
|
let cloud_options = match target.base_path() {
|
499
485
|
None => None,
|
500
486
|
Some(base_path) => {
|
501
|
-
let cloud_options =
|
502
|
-
base_path.to_str().
|
503
|
-
cloud_options.unwrap_or_default(),
|
504
|
-
)?;
|
487
|
+
let cloud_options =
|
488
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
505
489
|
Some(cloud_options.with_max_retries(retries))
|
506
490
|
}
|
507
491
|
};
|
@@ -529,10 +513,8 @@ impl RbLazyFrame {
|
|
529
513
|
let cloud_options = match target.base_path() {
|
530
514
|
None => None,
|
531
515
|
Some(base_path) => {
|
532
|
-
let cloud_options =
|
533
|
-
base_path.to_str().
|
534
|
-
cloud_options.unwrap_or_default(),
|
535
|
-
)?;
|
516
|
+
let cloud_options =
|
517
|
+
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
536
518
|
Some(cloud_options.with_max_retries(retries))
|
537
519
|
}
|
538
520
|
};
|
@@ -546,15 +528,14 @@ impl RbLazyFrame {
|
|
546
528
|
.map_err(Into::into)
|
547
529
|
}
|
548
530
|
|
549
|
-
pub fn
|
531
|
+
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
550
532
|
let ldf = self.ldf.borrow().clone();
|
551
|
-
|
552
|
-
Ok(df.into())
|
533
|
+
ldf.filter(predicate.inner.clone()).into()
|
553
534
|
}
|
554
535
|
|
555
|
-
pub fn
|
536
|
+
pub fn remove(&self, predicate: &RbExpr) -> Self {
|
556
537
|
let ldf = self.ldf.borrow().clone();
|
557
|
-
ldf.
|
538
|
+
ldf.remove(predicate.inner.clone()).into()
|
558
539
|
}
|
559
540
|
|
560
541
|
pub fn select(&self, exprs: RArray) -> RbResult<Self> {
|
@@ -689,15 +670,19 @@ impl RbLazyFrame {
|
|
689
670
|
.allow_parallel(allow_parallel)
|
690
671
|
.force_parallel(force_parallel)
|
691
672
|
.coalesce(coalesce)
|
692
|
-
.how(JoinType::AsOf(AsOfOptions {
|
673
|
+
.how(JoinType::AsOf(Box::new(AsOfOptions {
|
693
674
|
strategy: strategy.0,
|
694
675
|
left_by: left_by.map(strings_to_pl_smallstr),
|
695
676
|
right_by: right_by.map(strings_to_pl_smallstr),
|
696
|
-
tolerance: tolerance.map(|t|
|
677
|
+
tolerance: tolerance.map(|t| {
|
678
|
+
let av = t.0.into_static();
|
679
|
+
let dtype = av.dtype();
|
680
|
+
Scalar::new(dtype, av)
|
681
|
+
}),
|
697
682
|
tolerance_str: tolerance_str.map(|s| s.into()),
|
698
683
|
allow_eq,
|
699
684
|
check_sortedness,
|
700
|
-
}))
|
685
|
+
})))
|
701
686
|
.suffix(suffix)
|
702
687
|
.finish()
|
703
688
|
.into())
|
@@ -745,6 +730,20 @@ impl RbLazyFrame {
|
|
745
730
|
.into())
|
746
731
|
}
|
747
732
|
|
733
|
+
pub fn join_where(&self, other: &Self, predicates: RArray, suffix: String) -> RbResult<Self> {
|
734
|
+
let ldf = self.ldf.borrow().clone();
|
735
|
+
let other = other.ldf.borrow().clone();
|
736
|
+
|
737
|
+
let predicates = rb_exprs_to_exprs(predicates)?;
|
738
|
+
|
739
|
+
Ok(ldf
|
740
|
+
.join_builder()
|
741
|
+
.with(other)
|
742
|
+
.suffix(suffix)
|
743
|
+
.join_where(predicates)
|
744
|
+
.into())
|
745
|
+
}
|
746
|
+
|
748
747
|
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
749
748
|
let ldf = self.ldf.borrow().clone();
|
750
749
|
ldf.with_column(expr.inner.clone()).into()
|
@@ -832,10 +831,12 @@ impl RbLazyFrame {
|
|
832
831
|
out.into()
|
833
832
|
}
|
834
833
|
|
835
|
-
pub fn explode(&self,
|
836
|
-
|
837
|
-
|
838
|
-
|
834
|
+
pub fn explode(&self, subset: &RbSelector) -> Self {
|
835
|
+
self.ldf
|
836
|
+
.borrow()
|
837
|
+
.clone()
|
838
|
+
.explode(subset.inner.clone())
|
839
|
+
.into()
|
839
840
|
}
|
840
841
|
|
841
842
|
pub fn null_count(&self) -> Self {
|
@@ -846,10 +847,11 @@ impl RbLazyFrame {
|
|
846
847
|
pub fn unique(
|
847
848
|
&self,
|
848
849
|
maintain_order: bool,
|
849
|
-
subset: Option
|
850
|
+
subset: Option<&RbSelector>,
|
850
851
|
keep: Wrap<UniqueKeepStrategy>,
|
851
852
|
) -> RbResult<Self> {
|
852
853
|
let ldf = self.ldf.borrow().clone();
|
854
|
+
let subset = subset.map(|e| e.inner.clone());
|
853
855
|
Ok(match maintain_order {
|
854
856
|
true => ldf.unique_stable_generic(subset, keep.0),
|
855
857
|
false => ldf.unique_generic(subset, keep.0),
|
@@ -857,9 +859,19 @@ impl RbLazyFrame {
|
|
857
859
|
.into())
|
858
860
|
}
|
859
861
|
|
860
|
-
pub fn
|
861
|
-
|
862
|
-
|
862
|
+
pub fn drop_nans(&self, subset: Option<&RbSelector>) -> Self {
|
863
|
+
self.ldf
|
864
|
+
.borrow()
|
865
|
+
.clone()
|
866
|
+
.drop_nans(subset.map(|e| e.inner.clone()))
|
867
|
+
.into()
|
868
|
+
}
|
869
|
+
|
870
|
+
pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
|
871
|
+
self.ldf
|
872
|
+
.borrow()
|
873
|
+
.clone()
|
874
|
+
.drop_nulls(subset.map(|e| e.inner.clone()))
|
863
875
|
.into()
|
864
876
|
}
|
865
877
|
|
@@ -875,16 +887,14 @@ impl RbLazyFrame {
|
|
875
887
|
|
876
888
|
pub fn unpivot(
|
877
889
|
&self,
|
878
|
-
on:
|
879
|
-
index:
|
890
|
+
on: &RbSelector,
|
891
|
+
index: &RbSelector,
|
880
892
|
value_name: Option<String>,
|
881
893
|
variable_name: Option<String>,
|
882
894
|
) -> RbResult<Self> {
|
883
|
-
let on = rb_exprs_to_exprs(on)?;
|
884
|
-
let index = rb_exprs_to_exprs(index)?;
|
885
895
|
let args = UnpivotArgsDSL {
|
886
|
-
on: on.
|
887
|
-
index: index.
|
896
|
+
on: on.inner.clone(),
|
897
|
+
index: index.inner.clone(),
|
888
898
|
value_name: value_name.map(|s| s.into()),
|
889
899
|
variable_name: variable_name.map(|s| s.into()),
|
890
900
|
};
|
@@ -898,9 +908,8 @@ impl RbLazyFrame {
|
|
898
908
|
ldf.with_row_index(&name, offset).into()
|
899
909
|
}
|
900
910
|
|
901
|
-
pub fn drop(&self,
|
902
|
-
|
903
|
-
ldf.drop(cols).into()
|
911
|
+
pub fn drop(&self, columns: &RbSelector) -> Self {
|
912
|
+
self.ldf.borrow().clone().drop(columns.inner.clone()).into()
|
904
913
|
}
|
905
914
|
|
906
915
|
pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
|
@@ -941,8 +950,12 @@ impl RbLazyFrame {
|
|
941
950
|
Ok(schema_dict)
|
942
951
|
}
|
943
952
|
|
944
|
-
pub fn unnest(&self,
|
945
|
-
self.ldf
|
953
|
+
pub fn unnest(&self, columns: &RbSelector) -> Self {
|
954
|
+
self.ldf
|
955
|
+
.borrow()
|
956
|
+
.clone()
|
957
|
+
.unnest(columns.inner.clone())
|
958
|
+
.into()
|
946
959
|
}
|
947
960
|
|
948
961
|
pub fn count(&self) -> Self {
|
@@ -25,7 +25,7 @@ impl RbLazyFrame {
|
|
25
25
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
26
|
|
27
27
|
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
-
.map_err(|err| RbValueError::new_err(format!("{:?}"
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
|
29
29
|
Ok(LazyFrame::from(lp).into())
|
30
30
|
}
|
31
31
|
}
|
@@ -1,9 +1,9 @@
|
|
1
|
-
use std::path::{Path, PathBuf};
|
2
1
|
use std::sync::{Arc, Mutex};
|
3
2
|
|
4
3
|
use magnus::{RHash, TryConvert, Value};
|
5
4
|
use polars::prelude::sync_on_close::SyncOnCloseType;
|
6
|
-
use polars::prelude::{SinkOptions, SpecialEq};
|
5
|
+
use polars::prelude::{PlPath, SinkOptions, SpecialEq};
|
6
|
+
use polars_utils::plpath::PlPathRef;
|
7
7
|
|
8
8
|
use crate::prelude::Wrap;
|
9
9
|
use crate::{RbResult, RbValueError};
|
@@ -15,8 +15,8 @@ pub enum SinkTarget {
|
|
15
15
|
|
16
16
|
impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
17
17
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
-
if let Ok(v) =
|
19
|
-
Ok(Wrap(polars::prelude::SinkTarget::Path(
|
18
|
+
if let Ok(v) = String::try_convert(ob) {
|
19
|
+
Ok(Wrap(polars::prelude::SinkTarget::Path(PlPath::new(&v))))
|
20
20
|
} else {
|
21
21
|
let writer = {
|
22
22
|
let rb_f = ob;
|
@@ -39,10 +39,10 @@ impl TryConvert for SinkTarget {
|
|
39
39
|
}
|
40
40
|
|
41
41
|
impl SinkTarget {
|
42
|
-
pub fn base_path(&self) -> Option
|
42
|
+
pub fn base_path(&self) -> Option<PlPathRef<'_>> {
|
43
43
|
match self {
|
44
44
|
Self::File(t) => match t {
|
45
|
-
polars::prelude::SinkTarget::Path(p) => Some(p.
|
45
|
+
polars::prelude::SinkTarget::Path(p) => Some(p.as_ref()),
|
46
46
|
polars::prelude::SinkTarget::Dyn(_) => None,
|
47
47
|
},
|
48
48
|
}
|