polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -1,16 +1,8 @@
1
- use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
- use polars::io::{HiveOptions, RowIndex};
1
+ mod general;
2
+ mod serde;
3
+
3
4
  use polars::lazy::frame::LazyFrame;
4
- use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::{BufWriter, Read};
7
- use std::num::NonZeroUsize;
8
- use std::path::PathBuf;
9
-
10
- use crate::conversion::*;
11
- use crate::expr::rb_exprs_to_exprs;
12
- use crate::file::get_file_like;
13
- use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
14
6
 
15
7
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
16
8
  #[derive(Clone)]
@@ -25,819 +17,3 @@ impl From<LazyFrame> for RbLazyFrame {
25
17
  }
26
18
  }
27
19
  }
28
-
29
- impl RbLazyFrame {
30
- pub fn read_json(rb_f: Value) -> RbResult<Self> {
31
- // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
32
- // so don't bother with files.
33
- let mut json = String::new();
34
- let _ = get_file_like(rb_f, false)?
35
- .read_to_string(&mut json)
36
- .unwrap();
37
-
38
- // Safety
39
- // we skipped the serializing/deserializing of the static in lifetime in `DataType`
40
- // so we actually don't have a lifetime at all when serializing.
41
-
42
- // &str still has a lifetime. Bit its ok, because we drop it immediately
43
- // in this scope
44
- let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
45
-
46
- let lp = serde_json::from_str::<DslPlan>(json)
47
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
48
- Ok(LazyFrame::from(lp).into())
49
- }
50
-
51
- pub fn new_from_ndjson(
52
- path: String,
53
- infer_schema_length: Option<usize>,
54
- batch_size: Option<Wrap<NonZeroUsize>>,
55
- n_rows: Option<usize>,
56
- low_memory: bool,
57
- rechunk: bool,
58
- row_index: Option<(String, IdxSize)>,
59
- ) -> RbResult<Self> {
60
- let batch_size = batch_size.map(|v| v.0);
61
- let row_index = row_index.map(|(name, offset)| RowIndex {
62
- name: name.into(),
63
- offset,
64
- });
65
-
66
- let lf = LazyJsonLineReader::new(path)
67
- .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
68
- .with_batch_size(batch_size)
69
- .with_n_rows(n_rows)
70
- .low_memory(low_memory)
71
- .with_rechunk(rechunk)
72
- .with_row_index(row_index)
73
- .finish()
74
- .map_err(RbPolarsErr::from)?;
75
- Ok(lf.into())
76
- }
77
-
78
- pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
79
- // start arguments
80
- // this pattern is needed for more than 16
81
- let path = String::try_convert(arguments[0])?;
82
- let separator = String::try_convert(arguments[1])?;
83
- let has_header = bool::try_convert(arguments[2])?;
84
- let ignore_errors = bool::try_convert(arguments[3])?;
85
- let skip_rows = usize::try_convert(arguments[4])?;
86
- let n_rows = Option::<usize>::try_convert(arguments[5])?;
87
- let cache = bool::try_convert(arguments[6])?;
88
- let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
- let low_memory = bool::try_convert(arguments[8])?;
90
- let comment_prefix = Option::<String>::try_convert(arguments[9])?;
91
- let quote_char = Option::<String>::try_convert(arguments[10])?;
92
- let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
- let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
94
- let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
95
- let rechunk = bool::try_convert(arguments[14])?;
96
- let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
- let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
- let try_parse_dates = bool::try_convert(arguments[18])?;
100
- let eol_char = String::try_convert(arguments[19])?;
101
- let truncate_ragged_lines = bool::try_convert(arguments[20])?;
102
- // end arguments
103
-
104
- let null_values = null_values.map(|w| w.0);
105
- let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
- let separator = separator.as_bytes()[0];
107
- let eol_char = eol_char.as_bytes()[0];
108
- let row_index = row_index.map(|(name, offset)| RowIndex {
109
- name: name.into(),
110
- offset,
111
- });
112
-
113
- let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
114
- overwrite_dtype
115
- .into_iter()
116
- .map(|(name, dtype)| Field::new((&*name).into(), dtype.0))
117
- .collect::<Schema>()
118
- });
119
-
120
- let r = LazyCsvReader::new(path)
121
- .with_infer_schema_length(infer_schema_length)
122
- .with_separator(separator)
123
- .with_has_header(has_header)
124
- .with_ignore_errors(ignore_errors)
125
- .with_skip_rows(skip_rows)
126
- .with_n_rows(n_rows)
127
- .with_cache(cache)
128
- .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
129
- // TODO add with_schema
130
- .with_low_memory(low_memory)
131
- .with_comment_prefix(comment_prefix.map(|x| x.into()))
132
- .with_quote_char(quote_char)
133
- .with_eol_char(eol_char)
134
- .with_rechunk(rechunk)
135
- .with_skip_rows_after_header(skip_rows_after_header)
136
- .with_encoding(encoding.0)
137
- .with_row_index(row_index)
138
- .with_try_parse_dates(try_parse_dates)
139
- .with_null_values(null_values)
140
- // TODO add with_missing_is_null
141
- .with_truncate_ragged_lines(truncate_ragged_lines);
142
-
143
- if let Some(_lambda) = with_schema_modify {
144
- todo!();
145
- }
146
-
147
- Ok(r.finish().map_err(RbPolarsErr::from)?.into())
148
- }
149
-
150
- #[allow(clippy::too_many_arguments)]
151
- pub fn new_from_parquet(
152
- path: Option<PathBuf>,
153
- paths: Vec<PathBuf>,
154
- n_rows: Option<usize>,
155
- cache: bool,
156
- parallel: Wrap<ParallelStrategy>,
157
- rechunk: bool,
158
- row_index: Option<(String, IdxSize)>,
159
- low_memory: bool,
160
- use_statistics: bool,
161
- hive_partitioning: Option<bool>,
162
- hive_schema: Option<Wrap<Schema>>,
163
- try_parse_hive_dates: bool,
164
- glob: bool,
165
- include_file_paths: Option<String>,
166
- ) -> RbResult<Self> {
167
- let parallel = parallel.0;
168
- let hive_schema = hive_schema.map(|s| Arc::new(s.0));
169
-
170
- let first_path = if let Some(path) = &path {
171
- path
172
- } else {
173
- paths
174
- .first()
175
- .ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
176
- };
177
-
178
- let row_index = row_index.map(|(name, offset)| RowIndex {
179
- name: name.into(),
180
- offset,
181
- });
182
- let hive_options = HiveOptions {
183
- enabled: hive_partitioning,
184
- hive_start_idx: 0,
185
- schema: hive_schema,
186
- try_parse_dates: try_parse_hive_dates,
187
- };
188
-
189
- let args = ScanArgsParquet {
190
- n_rows,
191
- cache,
192
- parallel,
193
- rechunk,
194
- row_index,
195
- low_memory,
196
- cloud_options: None,
197
- use_statistics,
198
- hive_options,
199
- glob,
200
- include_file_paths: include_file_paths.map(|x| x.into()),
201
- };
202
-
203
- let lf = if path.is_some() {
204
- LazyFrame::scan_parquet(first_path, args)
205
- } else {
206
- LazyFrame::scan_parquet_files(Arc::from(paths), args)
207
- }
208
- .map_err(RbPolarsErr::from)?;
209
- Ok(lf.into())
210
- }
211
-
212
- #[allow(clippy::too_many_arguments)]
213
- pub fn new_from_ipc(
214
- path: String,
215
- n_rows: Option<usize>,
216
- cache: bool,
217
- rechunk: bool,
218
- row_index: Option<(String, IdxSize)>,
219
- hive_partitioning: Option<bool>,
220
- hive_schema: Option<Wrap<Schema>>,
221
- try_parse_hive_dates: bool,
222
- include_file_paths: Option<String>,
223
- ) -> RbResult<Self> {
224
- let row_index = row_index.map(|(name, offset)| RowIndex {
225
- name: name.into(),
226
- offset,
227
- });
228
-
229
- let hive_options = HiveOptions {
230
- enabled: hive_partitioning,
231
- hive_start_idx: 0,
232
- schema: hive_schema.map(|x| Arc::new(x.0)),
233
- try_parse_dates: try_parse_hive_dates,
234
- };
235
-
236
- let args = ScanArgsIpc {
237
- n_rows,
238
- cache,
239
- rechunk,
240
- row_index,
241
- cloud_options: None,
242
- hive_options,
243
- include_file_paths: include_file_paths.map(|x| x.into()),
244
- };
245
- let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
246
- Ok(lf.into())
247
- }
248
-
249
- pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
250
- let file = BufWriter::new(get_file_like(rb_f, true)?);
251
- serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
252
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
253
- Ok(())
254
- }
255
-
256
- pub fn describe_plan(&self) -> RbResult<String> {
257
- self.ldf
258
- .borrow()
259
- .describe_plan()
260
- .map_err(RbPolarsErr::from)
261
- .map_err(Into::into)
262
- }
263
-
264
- pub fn describe_optimized_plan(&self) -> RbResult<String> {
265
- let result = self
266
- .ldf
267
- .borrow()
268
- .describe_optimized_plan()
269
- .map_err(RbPolarsErr::from)?;
270
- Ok(result)
271
- }
272
-
273
- #[allow(clippy::too_many_arguments)]
274
- pub fn optimization_toggle(
275
- &self,
276
- type_coercion: bool,
277
- predicate_pushdown: bool,
278
- projection_pushdown: bool,
279
- simplify_expr: bool,
280
- slice_pushdown: bool,
281
- comm_subplan_elim: bool,
282
- comm_subexpr_elim: bool,
283
- allow_streaming: bool,
284
- _eager: bool,
285
- ) -> RbLazyFrame {
286
- let ldf = self.ldf.borrow().clone();
287
- let mut ldf = ldf
288
- .with_type_coercion(type_coercion)
289
- .with_predicate_pushdown(predicate_pushdown)
290
- .with_simplify_expr(simplify_expr)
291
- .with_slice_pushdown(slice_pushdown)
292
- .with_streaming(allow_streaming)
293
- ._with_eager(_eager)
294
- .with_projection_pushdown(projection_pushdown);
295
-
296
- ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
297
- ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
298
-
299
- ldf.into()
300
- }
301
-
302
- pub fn sort(
303
- &self,
304
- by_column: String,
305
- descending: bool,
306
- nulls_last: bool,
307
- maintain_order: bool,
308
- multithreaded: bool,
309
- ) -> Self {
310
- let ldf = self.ldf.borrow().clone();
311
- ldf.sort(
312
- [&by_column],
313
- SortMultipleOptions {
314
- descending: vec![descending],
315
- nulls_last: vec![nulls_last],
316
- multithreaded,
317
- maintain_order,
318
- },
319
- )
320
- .into()
321
- }
322
-
323
- pub fn sort_by_exprs(
324
- &self,
325
- by: RArray,
326
- descending: Vec<bool>,
327
- nulls_last: Vec<bool>,
328
- maintain_order: bool,
329
- multithreaded: bool,
330
- ) -> RbResult<Self> {
331
- let ldf = self.ldf.borrow().clone();
332
- let exprs = rb_exprs_to_exprs(by)?;
333
- Ok(ldf
334
- .sort_by_exprs(
335
- exprs,
336
- SortMultipleOptions {
337
- descending,
338
- nulls_last,
339
- maintain_order,
340
- multithreaded,
341
- },
342
- )
343
- .into())
344
- }
345
-
346
- pub fn cache(&self) -> Self {
347
- let ldf = self.ldf.borrow().clone();
348
- ldf.cache().into()
349
- }
350
-
351
- pub fn collect(&self) -> RbResult<RbDataFrame> {
352
- let ldf = self.ldf.borrow().clone();
353
- let df = ldf.collect().map_err(RbPolarsErr::from)?;
354
- Ok(df.into())
355
- }
356
-
357
- #[allow(clippy::too_many_arguments)]
358
- pub fn sink_parquet(
359
- &self,
360
- path: PathBuf,
361
- compression: String,
362
- compression_level: Option<i32>,
363
- statistics: Wrap<StatisticsOptions>,
364
- row_group_size: Option<usize>,
365
- data_page_size: Option<usize>,
366
- maintain_order: bool,
367
- ) -> RbResult<()> {
368
- let compression = parse_parquet_compression(&compression, compression_level)?;
369
-
370
- let options = ParquetWriteOptions {
371
- compression,
372
- statistics: statistics.0,
373
- row_group_size,
374
- data_page_size,
375
- maintain_order,
376
- };
377
-
378
- let ldf = self.ldf.borrow().clone();
379
- ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
380
- Ok(())
381
- }
382
-
383
- pub fn sink_ipc(
384
- &self,
385
- path: PathBuf,
386
- compression: Option<Wrap<IpcCompression>>,
387
- maintain_order: bool,
388
- ) -> RbResult<()> {
389
- let options = IpcWriterOptions {
390
- compression: compression.map(|c| c.0),
391
- maintain_order,
392
- };
393
-
394
- let ldf = self.ldf.borrow().clone();
395
- ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
396
- Ok(())
397
- }
398
-
399
- #[allow(clippy::too_many_arguments)]
400
- pub fn sink_csv(
401
- &self,
402
- path: PathBuf,
403
- include_bom: bool,
404
- include_header: bool,
405
- separator: u8,
406
- line_terminator: String,
407
- quote_char: u8,
408
- batch_size: Wrap<NonZeroUsize>,
409
- datetime_format: Option<String>,
410
- date_format: Option<String>,
411
- time_format: Option<String>,
412
- float_scientific: Option<bool>,
413
- float_precision: Option<usize>,
414
- null_value: Option<String>,
415
- quote_style: Option<Wrap<QuoteStyle>>,
416
- maintain_order: bool,
417
- ) -> RbResult<()> {
418
- let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
419
- let null_value = null_value.unwrap_or(SerializeOptions::default().null);
420
-
421
- let serialize_options = SerializeOptions {
422
- date_format,
423
- time_format,
424
- datetime_format,
425
- float_scientific,
426
- float_precision,
427
- separator,
428
- quote_char,
429
- null: null_value,
430
- line_terminator,
431
- quote_style,
432
- };
433
-
434
- let options = CsvWriterOptions {
435
- include_bom,
436
- include_header,
437
- maintain_order,
438
- batch_size: batch_size.0,
439
- serialize_options,
440
- };
441
-
442
- let ldf = self.ldf.borrow().clone();
443
- ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
444
- Ok(())
445
- }
446
-
447
- pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
448
- let options = JsonWriterOptions { maintain_order };
449
-
450
- let ldf = self.ldf.borrow().clone();
451
- ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
452
- Ok(())
453
- }
454
-
455
- pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
456
- let ldf = self.ldf.borrow().clone();
457
- let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
458
- Ok(df.into())
459
- }
460
-
461
- pub fn filter(&self, predicate: &RbExpr) -> Self {
462
- let ldf = self.ldf.borrow().clone();
463
- ldf.filter(predicate.inner.clone()).into()
464
- }
465
-
466
- pub fn select(&self, exprs: RArray) -> RbResult<Self> {
467
- let ldf = self.ldf.borrow().clone();
468
- let exprs = rb_exprs_to_exprs(exprs)?;
469
- Ok(ldf.select(exprs).into())
470
- }
471
-
472
- pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
473
- let ldf = self.ldf.borrow().clone();
474
- let exprs = rb_exprs_to_exprs(exprs)?;
475
- Ok(ldf.select_seq(exprs).into())
476
- }
477
-
478
- pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
479
- let ldf = self.ldf.borrow().clone();
480
- let by = rb_exprs_to_exprs(by)?;
481
- let lazy_gb = if maintain_order {
482
- ldf.group_by_stable(by)
483
- } else {
484
- ldf.group_by(by)
485
- };
486
- Ok(RbLazyGroupBy {
487
- lgb: RefCell::new(Some(lazy_gb)),
488
- })
489
- }
490
-
491
- pub fn rolling(
492
- &self,
493
- index_column: &RbExpr,
494
- period: String,
495
- offset: String,
496
- closed: Wrap<ClosedWindow>,
497
- by: RArray,
498
- ) -> RbResult<RbLazyGroupBy> {
499
- let closed_window = closed.0;
500
- let ldf = self.ldf.borrow().clone();
501
- let by = rb_exprs_to_exprs(by)?;
502
- let lazy_gb = ldf.rolling(
503
- index_column.inner.clone(),
504
- by,
505
- RollingGroupOptions {
506
- index_column: "".into(),
507
- period: Duration::parse(&period),
508
- offset: Duration::parse(&offset),
509
- closed_window,
510
- },
511
- );
512
-
513
- Ok(RbLazyGroupBy {
514
- lgb: RefCell::new(Some(lazy_gb)),
515
- })
516
- }
517
-
518
- #[allow(clippy::too_many_arguments)]
519
- pub fn group_by_dynamic(
520
- &self,
521
- index_column: &RbExpr,
522
- every: String,
523
- period: String,
524
- offset: String,
525
- label: Wrap<Label>,
526
- include_boundaries: bool,
527
- closed: Wrap<ClosedWindow>,
528
- by: RArray,
529
- start_by: Wrap<StartBy>,
530
- ) -> RbResult<RbLazyGroupBy> {
531
- let closed_window = closed.0;
532
- let by = rb_exprs_to_exprs(by)?;
533
- let ldf = self.ldf.borrow().clone();
534
- let lazy_gb = ldf.group_by_dynamic(
535
- index_column.inner.clone(),
536
- by,
537
- DynamicGroupOptions {
538
- every: Duration::parse(&every),
539
- period: Duration::parse(&period),
540
- offset: Duration::parse(&offset),
541
- label: label.0,
542
- include_boundaries,
543
- closed_window,
544
- start_by: start_by.0,
545
- ..Default::default()
546
- },
547
- );
548
-
549
- Ok(RbLazyGroupBy {
550
- lgb: RefCell::new(Some(lazy_gb)),
551
- })
552
- }
553
-
554
- pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
555
- let contexts = contexts
556
- .into_iter()
557
- .map(TryConvert::try_convert)
558
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
559
- let contexts = contexts
560
- .into_iter()
561
- .map(|ldf| ldf.ldf.borrow().clone())
562
- .collect::<Vec<_>>();
563
- Ok(self.ldf.borrow().clone().with_context(contexts).into())
564
- }
565
-
566
- #[allow(clippy::too_many_arguments)]
567
- pub fn join_asof(
568
- &self,
569
- other: &RbLazyFrame,
570
- left_on: &RbExpr,
571
- right_on: &RbExpr,
572
- left_by: Option<Vec<String>>,
573
- right_by: Option<Vec<String>>,
574
- allow_parallel: bool,
575
- force_parallel: bool,
576
- suffix: String,
577
- strategy: Wrap<AsofStrategy>,
578
- tolerance: Option<Wrap<AnyValue<'_>>>,
579
- tolerance_str: Option<String>,
580
- ) -> RbResult<Self> {
581
- let ldf = self.ldf.borrow().clone();
582
- let other = other.ldf.borrow().clone();
583
- let left_on = left_on.inner.clone();
584
- let right_on = right_on.inner.clone();
585
- Ok(ldf
586
- .join_builder()
587
- .with(other)
588
- .left_on([left_on])
589
- .right_on([right_on])
590
- .allow_parallel(allow_parallel)
591
- .force_parallel(force_parallel)
592
- .how(JoinType::AsOf(AsOfOptions {
593
- strategy: strategy.0,
594
- left_by: left_by.map(strings_to_pl_smallstr),
595
- right_by: right_by.map(strings_to_pl_smallstr),
596
- tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
597
- tolerance_str: tolerance_str.map(|s| s.into()),
598
- }))
599
- .suffix(suffix)
600
- .finish()
601
- .into())
602
- }
603
-
604
- #[allow(clippy::too_many_arguments)]
605
- pub fn join(
606
- &self,
607
- other: &RbLazyFrame,
608
- left_on: RArray,
609
- right_on: RArray,
610
- allow_parallel: bool,
611
- force_parallel: bool,
612
- join_nulls: bool,
613
- how: Wrap<JoinType>,
614
- suffix: String,
615
- ) -> RbResult<Self> {
616
- let ldf = self.ldf.borrow().clone();
617
- let other = other.ldf.borrow().clone();
618
- let left_on = rb_exprs_to_exprs(left_on)?;
619
- let right_on = rb_exprs_to_exprs(right_on)?;
620
-
621
- Ok(ldf
622
- .join_builder()
623
- .with(other)
624
- .left_on(left_on)
625
- .right_on(right_on)
626
- .allow_parallel(allow_parallel)
627
- .force_parallel(force_parallel)
628
- .join_nulls(join_nulls)
629
- .how(how.0)
630
- .suffix(suffix)
631
- .finish()
632
- .into())
633
- }
634
-
635
- pub fn with_column(&self, expr: &RbExpr) -> Self {
636
- let ldf = self.ldf.borrow().clone();
637
- ldf.with_column(expr.inner.clone()).into()
638
- }
639
-
640
- pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
641
- let ldf = self.ldf.borrow().clone();
642
- Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
643
- }
644
-
645
- pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
646
- let ldf = self.ldf.borrow().clone();
647
- Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
648
- }
649
-
650
- pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
651
- let ldf = self.ldf.borrow().clone();
652
- ldf.rename(existing, new).into()
653
- }
654
-
655
- pub fn reverse(&self) -> Self {
656
- let ldf = self.ldf.borrow().clone();
657
- ldf.reverse().into()
658
- }
659
-
660
- pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
661
- let lf = self.ldf.borrow().clone();
662
- let out = match fill_value {
663
- Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
664
- None => lf.shift(n.inner.clone()),
665
- };
666
- out.into()
667
- }
668
-
669
- pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
670
- let ldf = self.ldf.borrow().clone();
671
- ldf.fill_nan(fill_value.inner.clone()).into()
672
- }
673
-
674
- pub fn min(&self) -> Self {
675
- let ldf = self.ldf.borrow().clone();
676
- let out = ldf.min();
677
- out.into()
678
- }
679
-
680
- pub fn max(&self) -> Self {
681
- let ldf = self.ldf.borrow().clone();
682
- let out = ldf.max();
683
- out.into()
684
- }
685
-
686
- pub fn sum(&self) -> Self {
687
- let ldf = self.ldf.borrow().clone();
688
- let out = ldf.sum();
689
- out.into()
690
- }
691
-
692
- pub fn mean(&self) -> Self {
693
- let ldf = self.ldf.borrow().clone();
694
- let out = ldf.mean();
695
- out.into()
696
- }
697
-
698
- pub fn std(&self, ddof: u8) -> Self {
699
- let ldf = self.ldf.borrow().clone();
700
- let out = ldf.std(ddof);
701
- out.into()
702
- }
703
-
704
- pub fn var(&self, ddof: u8) -> Self {
705
- let ldf = self.ldf.borrow().clone();
706
- let out = ldf.var(ddof);
707
- out.into()
708
- }
709
-
710
- pub fn median(&self) -> Self {
711
- let ldf = self.ldf.borrow().clone();
712
- let out = ldf.median();
713
- out.into()
714
- }
715
-
716
- pub fn quantile(
717
- &self,
718
- quantile: &RbExpr,
719
- interpolation: Wrap<QuantileInterpolOptions>,
720
- ) -> Self {
721
- let ldf = self.ldf.borrow().clone();
722
- let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
723
- out.into()
724
- }
725
-
726
- pub fn explode(&self, column: RArray) -> RbResult<Self> {
727
- let ldf = self.ldf.borrow().clone();
728
- let column = rb_exprs_to_exprs(column)?;
729
- Ok(ldf.explode(column).into())
730
- }
731
-
732
- pub fn null_count(&self) -> Self {
733
- let ldf = self.ldf.borrow().clone();
734
- ldf.null_count().into()
735
- }
736
-
737
- pub fn unique(
738
- &self,
739
- maintain_order: bool,
740
- subset: Option<Vec<String>>,
741
- keep: Wrap<UniqueKeepStrategy>,
742
- ) -> RbResult<Self> {
743
- let ldf = self.ldf.borrow().clone();
744
- Ok(match maintain_order {
745
- true => ldf.unique_stable_generic(subset, keep.0),
746
- false => ldf.unique_generic(subset, keep.0),
747
- }
748
- .into())
749
- }
750
-
751
- pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
752
- let ldf = self.ldf.borrow().clone();
753
- ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
754
- .into()
755
- }
756
-
757
- pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
758
- let ldf = self.ldf.borrow().clone();
759
- ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
760
- }
761
-
762
- pub fn tail(&self, n: IdxSize) -> Self {
763
- let ldf = self.ldf.borrow().clone();
764
- ldf.tail(n).into()
765
- }
766
-
767
- pub fn unpivot(
768
- &self,
769
- on: RArray,
770
- index: RArray,
771
- value_name: Option<String>,
772
- variable_name: Option<String>,
773
- ) -> RbResult<Self> {
774
- let on = rb_exprs_to_exprs(on)?;
775
- let index = rb_exprs_to_exprs(index)?;
776
- let args = UnpivotArgsDSL {
777
- on: on.into_iter().map(|e| e.into()).collect(),
778
- index: index.into_iter().map(|e| e.into()).collect(),
779
- value_name: value_name.map(|s| s.into()),
780
- variable_name: variable_name.map(|s| s.into()),
781
- };
782
-
783
- let ldf = self.ldf.borrow().clone();
784
- Ok(ldf.unpivot(args).into())
785
- }
786
-
787
- pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
788
- let ldf = self.ldf.borrow().clone();
789
- ldf.with_row_index(&name, offset).into()
790
- }
791
-
792
- pub fn drop(&self, cols: Vec<String>) -> Self {
793
- let ldf = self.ldf.borrow().clone();
794
- ldf.drop(cols).into()
795
- }
796
-
797
- pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
798
- self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
799
- }
800
-
801
- pub fn clone(&self) -> Self {
802
- self.ldf.borrow().clone().into()
803
- }
804
-
805
- pub fn collect_schema(&self) -> RbResult<RHash> {
806
- let schema = self
807
- .ldf
808
- .borrow_mut()
809
- .collect_schema()
810
- .map_err(RbPolarsErr::from)?;
811
-
812
- let schema_dict = RHash::new();
813
- schema.iter_fields().for_each(|fld| {
814
- // TODO remove unwrap
815
- schema_dict
816
- .aset::<String, Value>(
817
- fld.name().to_string(),
818
- Wrap(fld.dtype().clone()).into_value(),
819
- )
820
- .unwrap();
821
- });
822
- Ok(schema_dict)
823
- }
824
-
825
- pub fn unnest(&self, cols: Vec<String>) -> Self {
826
- self.ldf.borrow().clone().unnest(cols).into()
827
- }
828
-
829
- pub fn count(&self) -> Self {
830
- let ldf = self.ldf.borrow().clone();
831
- ldf.count().into()
832
- }
833
-
834
- pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
835
- let out = self
836
- .ldf
837
- .borrow()
838
- .clone()
839
- .merge_sorted(other.ldf.borrow().clone(), &key)
840
- .map_err(RbPolarsErr::from)?;
841
- Ok(out.into())
842
- }
843
- }