polars-df 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -1,16 +1,8 @@
1
- use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
- use polars::io::{HiveOptions, RowIndex};
1
+ mod general;
2
+ mod serde;
3
+
3
4
  use polars::lazy::frame::LazyFrame;
4
- use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
- use std::io::{BufWriter, Read};
7
- use std::num::NonZeroUsize;
8
- use std::path::PathBuf;
9
-
10
- use crate::conversion::*;
11
- use crate::expr::rb_exprs_to_exprs;
12
- use crate::file::get_file_like;
13
- use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
14
6
 
15
7
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
16
8
  #[derive(Clone)]
@@ -25,817 +17,3 @@ impl From<LazyFrame> for RbLazyFrame {
25
17
  }
26
18
  }
27
19
  }
28
-
29
- impl RbLazyFrame {
30
- pub fn read_json(rb_f: Value) -> RbResult<Self> {
31
- // it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
32
- // so don't bother with files.
33
- let mut json = String::new();
34
- let _ = get_file_like(rb_f, false)?
35
- .read_to_string(&mut json)
36
- .unwrap();
37
-
38
- // Safety
39
- // we skipped the serializing/deserializing of the static in lifetime in `DataType`
40
- // so we actually don't have a lifetime at all when serializing.
41
-
42
- // &str still has a lifetime. Bit its ok, because we drop it immediately
43
- // in this scope
44
- let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
45
-
46
- let lp = serde_json::from_str::<DslPlan>(json)
47
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
48
- Ok(LazyFrame::from(lp).into())
49
- }
50
-
51
- pub fn new_from_ndjson(
52
- path: String,
53
- infer_schema_length: Option<usize>,
54
- batch_size: Option<Wrap<NonZeroUsize>>,
55
- n_rows: Option<usize>,
56
- low_memory: bool,
57
- rechunk: bool,
58
- row_index: Option<(String, IdxSize)>,
59
- ) -> RbResult<Self> {
60
- let batch_size = batch_size.map(|v| v.0);
61
- let row_index = row_index.map(|(name, offset)| RowIndex {
62
- name: Arc::from(name.as_str()),
63
- offset,
64
- });
65
-
66
- let lf = LazyJsonLineReader::new(path)
67
- .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
68
- .with_batch_size(batch_size)
69
- .with_n_rows(n_rows)
70
- .low_memory(low_memory)
71
- .with_rechunk(rechunk)
72
- .with_row_index(row_index)
73
- .finish()
74
- .map_err(RbPolarsErr::from)?;
75
- Ok(lf.into())
76
- }
77
-
78
- pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
79
- // start arguments
80
- // this pattern is needed for more than 16
81
- let path = String::try_convert(arguments[0])?;
82
- let separator = String::try_convert(arguments[1])?;
83
- let has_header = bool::try_convert(arguments[2])?;
84
- let ignore_errors = bool::try_convert(arguments[3])?;
85
- let skip_rows = usize::try_convert(arguments[4])?;
86
- let n_rows = Option::<usize>::try_convert(arguments[5])?;
87
- let cache = bool::try_convert(arguments[6])?;
88
- let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
- let low_memory = bool::try_convert(arguments[8])?;
90
- let comment_prefix = Option::<String>::try_convert(arguments[9])?;
91
- let quote_char = Option::<String>::try_convert(arguments[10])?;
92
- let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
- let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
94
- let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
95
- let rechunk = bool::try_convert(arguments[14])?;
96
- let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
- let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
- let try_parse_dates = bool::try_convert(arguments[18])?;
100
- let eol_char = String::try_convert(arguments[19])?;
101
- let truncate_ragged_lines = bool::try_convert(arguments[20])?;
102
- // end arguments
103
-
104
- let null_values = null_values.map(|w| w.0);
105
- let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
- let separator = separator.as_bytes()[0];
107
- let eol_char = eol_char.as_bytes()[0];
108
- let row_index = row_index.map(|(name, offset)| RowIndex {
109
- name: Arc::from(name.as_str()),
110
- offset,
111
- });
112
-
113
- let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
114
- overwrite_dtype
115
- .into_iter()
116
- .map(|(name, dtype)| Field::new(&name, dtype.0))
117
- .collect::<Schema>()
118
- });
119
-
120
- let r = LazyCsvReader::new(path)
121
- .with_infer_schema_length(infer_schema_length)
122
- .with_separator(separator)
123
- .with_has_header(has_header)
124
- .with_ignore_errors(ignore_errors)
125
- .with_skip_rows(skip_rows)
126
- .with_n_rows(n_rows)
127
- .with_cache(cache)
128
- .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
129
- // TODO add with_schema
130
- .with_low_memory(low_memory)
131
- .with_comment_prefix(comment_prefix.as_deref())
132
- .with_quote_char(quote_char)
133
- .with_eol_char(eol_char)
134
- .with_rechunk(rechunk)
135
- .with_skip_rows_after_header(skip_rows_after_header)
136
- .with_encoding(encoding.0)
137
- .with_row_index(row_index)
138
- .with_try_parse_dates(try_parse_dates)
139
- .with_null_values(null_values)
140
- // TODO add with_missing_is_null
141
- .with_truncate_ragged_lines(truncate_ragged_lines);
142
-
143
- if let Some(_lambda) = with_schema_modify {
144
- todo!();
145
- }
146
-
147
- Ok(r.finish().map_err(RbPolarsErr::from)?.into())
148
- }
149
-
150
- #[allow(clippy::too_many_arguments)]
151
- pub fn new_from_parquet(
152
- path: Option<PathBuf>,
153
- paths: Vec<PathBuf>,
154
- n_rows: Option<usize>,
155
- cache: bool,
156
- parallel: Wrap<ParallelStrategy>,
157
- rechunk: bool,
158
- row_index: Option<(String, IdxSize)>,
159
- low_memory: bool,
160
- use_statistics: bool,
161
- hive_partitioning: Option<bool>,
162
- hive_schema: Option<Wrap<Schema>>,
163
- try_parse_hive_dates: bool,
164
- glob: bool,
165
- include_file_paths: Option<String>,
166
- ) -> RbResult<Self> {
167
- let parallel = parallel.0;
168
- let hive_schema = hive_schema.map(|s| Arc::new(s.0));
169
-
170
- let first_path = if let Some(path) = &path {
171
- path
172
- } else {
173
- paths
174
- .first()
175
- .ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
176
- };
177
-
178
- let row_index = row_index.map(|(name, offset)| RowIndex {
179
- name: Arc::from(name.as_str()),
180
- offset,
181
- });
182
- let hive_options = HiveOptions {
183
- enabled: hive_partitioning,
184
- hive_start_idx: 0,
185
- schema: hive_schema,
186
- try_parse_dates: try_parse_hive_dates,
187
- };
188
-
189
- let args = ScanArgsParquet {
190
- n_rows,
191
- cache,
192
- parallel,
193
- rechunk,
194
- row_index,
195
- low_memory,
196
- cloud_options: None,
197
- use_statistics,
198
- hive_options,
199
- glob,
200
- include_file_paths: include_file_paths.map(Arc::from),
201
- };
202
-
203
- let lf = if path.is_some() {
204
- LazyFrame::scan_parquet(first_path, args)
205
- } else {
206
- LazyFrame::scan_parquet_files(Arc::from(paths), args)
207
- }
208
- .map_err(RbPolarsErr::from)?;
209
- Ok(lf.into())
210
- }
211
-
212
- #[allow(clippy::too_many_arguments)]
213
- pub fn new_from_ipc(
214
- path: String,
215
- n_rows: Option<usize>,
216
- cache: bool,
217
- rechunk: bool,
218
- row_index: Option<(String, IdxSize)>,
219
- memory_map: bool,
220
- hive_partitioning: Option<bool>,
221
- hive_schema: Option<Wrap<Schema>>,
222
- try_parse_hive_dates: bool,
223
- include_file_paths: Option<String>,
224
- ) -> RbResult<Self> {
225
- let row_index = row_index.map(|(name, offset)| RowIndex {
226
- name: Arc::from(name.as_str()),
227
- offset,
228
- });
229
-
230
- let hive_options = HiveOptions {
231
- enabled: hive_partitioning,
232
- hive_start_idx: 0,
233
- schema: hive_schema.map(|x| Arc::new(x.0)),
234
- try_parse_dates: try_parse_hive_dates,
235
- };
236
-
237
- let args = ScanArgsIpc {
238
- n_rows,
239
- cache,
240
- rechunk,
241
- row_index,
242
- memory_map,
243
- cloud_options: None,
244
- hive_options,
245
- include_file_paths: include_file_paths.map(Arc::from),
246
- };
247
- let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
248
- Ok(lf.into())
249
- }
250
-
251
- pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
252
- let file = BufWriter::new(get_file_like(rb_f, true)?);
253
- serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
254
- .map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
255
- Ok(())
256
- }
257
-
258
- pub fn describe_plan(&self) -> RbResult<String> {
259
- self.ldf
260
- .borrow()
261
- .describe_plan()
262
- .map_err(RbPolarsErr::from)
263
- .map_err(Into::into)
264
- }
265
-
266
- pub fn describe_optimized_plan(&self) -> RbResult<String> {
267
- let result = self
268
- .ldf
269
- .borrow()
270
- .describe_optimized_plan()
271
- .map_err(RbPolarsErr::from)?;
272
- Ok(result)
273
- }
274
-
275
- #[allow(clippy::too_many_arguments)]
276
- pub fn optimization_toggle(
277
- &self,
278
- type_coercion: bool,
279
- predicate_pushdown: bool,
280
- projection_pushdown: bool,
281
- simplify_expr: bool,
282
- slice_pushdown: bool,
283
- comm_subplan_elim: bool,
284
- comm_subexpr_elim: bool,
285
- allow_streaming: bool,
286
- _eager: bool,
287
- ) -> RbLazyFrame {
288
- let ldf = self.ldf.borrow().clone();
289
- let mut ldf = ldf
290
- .with_type_coercion(type_coercion)
291
- .with_predicate_pushdown(predicate_pushdown)
292
- .with_simplify_expr(simplify_expr)
293
- .with_slice_pushdown(slice_pushdown)
294
- .with_streaming(allow_streaming)
295
- ._with_eager(_eager)
296
- .with_projection_pushdown(projection_pushdown);
297
-
298
- ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
299
- ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
300
-
301
- ldf.into()
302
- }
303
-
304
- pub fn sort(
305
- &self,
306
- by_column: String,
307
- descending: bool,
308
- nulls_last: bool,
309
- maintain_order: bool,
310
- multithreaded: bool,
311
- ) -> Self {
312
- let ldf = self.ldf.borrow().clone();
313
- ldf.sort(
314
- [&by_column],
315
- SortMultipleOptions {
316
- descending: vec![descending],
317
- nulls_last: vec![nulls_last],
318
- multithreaded,
319
- maintain_order,
320
- },
321
- )
322
- .into()
323
- }
324
-
325
- pub fn sort_by_exprs(
326
- &self,
327
- by: RArray,
328
- descending: Vec<bool>,
329
- nulls_last: Vec<bool>,
330
- maintain_order: bool,
331
- multithreaded: bool,
332
- ) -> RbResult<Self> {
333
- let ldf = self.ldf.borrow().clone();
334
- let exprs = rb_exprs_to_exprs(by)?;
335
- Ok(ldf
336
- .sort_by_exprs(
337
- exprs,
338
- SortMultipleOptions {
339
- descending,
340
- nulls_last,
341
- maintain_order,
342
- multithreaded,
343
- },
344
- )
345
- .into())
346
- }
347
-
348
- pub fn cache(&self) -> Self {
349
- let ldf = self.ldf.borrow().clone();
350
- ldf.cache().into()
351
- }
352
-
353
- pub fn collect(&self) -> RbResult<RbDataFrame> {
354
- let ldf = self.ldf.borrow().clone();
355
- let df = ldf.collect().map_err(RbPolarsErr::from)?;
356
- Ok(df.into())
357
- }
358
-
359
- #[allow(clippy::too_many_arguments)]
360
- pub fn sink_parquet(
361
- &self,
362
- path: PathBuf,
363
- compression: String,
364
- compression_level: Option<i32>,
365
- statistics: Wrap<StatisticsOptions>,
366
- row_group_size: Option<usize>,
367
- data_page_size: Option<usize>,
368
- maintain_order: bool,
369
- ) -> RbResult<()> {
370
- let compression = parse_parquet_compression(&compression, compression_level)?;
371
-
372
- let options = ParquetWriteOptions {
373
- compression,
374
- statistics: statistics.0,
375
- row_group_size,
376
- data_page_size,
377
- maintain_order,
378
- };
379
-
380
- let ldf = self.ldf.borrow().clone();
381
- ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
382
- Ok(())
383
- }
384
-
385
- pub fn sink_ipc(
386
- &self,
387
- path: PathBuf,
388
- compression: Option<Wrap<IpcCompression>>,
389
- maintain_order: bool,
390
- ) -> RbResult<()> {
391
- let options = IpcWriterOptions {
392
- compression: compression.map(|c| c.0),
393
- maintain_order,
394
- };
395
-
396
- let ldf = self.ldf.borrow().clone();
397
- ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
398
- Ok(())
399
- }
400
-
401
- #[allow(clippy::too_many_arguments)]
402
- pub fn sink_csv(
403
- &self,
404
- path: PathBuf,
405
- include_bom: bool,
406
- include_header: bool,
407
- separator: u8,
408
- line_terminator: String,
409
- quote_char: u8,
410
- batch_size: Wrap<NonZeroUsize>,
411
- datetime_format: Option<String>,
412
- date_format: Option<String>,
413
- time_format: Option<String>,
414
- float_scientific: Option<bool>,
415
- float_precision: Option<usize>,
416
- null_value: Option<String>,
417
- quote_style: Option<Wrap<QuoteStyle>>,
418
- maintain_order: bool,
419
- ) -> RbResult<()> {
420
- let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
421
- let null_value = null_value.unwrap_or(SerializeOptions::default().null);
422
-
423
- let serialize_options = SerializeOptions {
424
- date_format,
425
- time_format,
426
- datetime_format,
427
- float_scientific,
428
- float_precision,
429
- separator,
430
- quote_char,
431
- null: null_value,
432
- line_terminator,
433
- quote_style,
434
- };
435
-
436
- let options = CsvWriterOptions {
437
- include_bom,
438
- include_header,
439
- maintain_order,
440
- batch_size: batch_size.0,
441
- serialize_options,
442
- };
443
-
444
- let ldf = self.ldf.borrow().clone();
445
- ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
446
- Ok(())
447
- }
448
-
449
- pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
450
- let options = JsonWriterOptions { maintain_order };
451
-
452
- let ldf = self.ldf.borrow().clone();
453
- ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
454
- Ok(())
455
- }
456
-
457
- pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
458
- let ldf = self.ldf.borrow().clone();
459
- let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
460
- Ok(df.into())
461
- }
462
-
463
- pub fn filter(&self, predicate: &RbExpr) -> Self {
464
- let ldf = self.ldf.borrow().clone();
465
- ldf.filter(predicate.inner.clone()).into()
466
- }
467
-
468
- pub fn select(&self, exprs: RArray) -> RbResult<Self> {
469
- let ldf = self.ldf.borrow().clone();
470
- let exprs = rb_exprs_to_exprs(exprs)?;
471
- Ok(ldf.select(exprs).into())
472
- }
473
-
474
- pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
475
- let ldf = self.ldf.borrow().clone();
476
- let exprs = rb_exprs_to_exprs(exprs)?;
477
- Ok(ldf.select_seq(exprs).into())
478
- }
479
-
480
- pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
481
- let ldf = self.ldf.borrow().clone();
482
- let by = rb_exprs_to_exprs(by)?;
483
- let lazy_gb = if maintain_order {
484
- ldf.group_by_stable(by)
485
- } else {
486
- ldf.group_by(by)
487
- };
488
- Ok(RbLazyGroupBy {
489
- lgb: RefCell::new(Some(lazy_gb)),
490
- })
491
- }
492
-
493
- pub fn rolling(
494
- &self,
495
- index_column: &RbExpr,
496
- period: String,
497
- offset: String,
498
- closed: Wrap<ClosedWindow>,
499
- by: RArray,
500
- ) -> RbResult<RbLazyGroupBy> {
501
- let closed_window = closed.0;
502
- let ldf = self.ldf.borrow().clone();
503
- let by = rb_exprs_to_exprs(by)?;
504
- let lazy_gb = ldf.rolling(
505
- index_column.inner.clone(),
506
- by,
507
- RollingGroupOptions {
508
- index_column: "".into(),
509
- period: Duration::parse(&period),
510
- offset: Duration::parse(&offset),
511
- closed_window,
512
- },
513
- );
514
-
515
- Ok(RbLazyGroupBy {
516
- lgb: RefCell::new(Some(lazy_gb)),
517
- })
518
- }
519
-
520
- #[allow(clippy::too_many_arguments)]
521
- pub fn group_by_dynamic(
522
- &self,
523
- index_column: &RbExpr,
524
- every: String,
525
- period: String,
526
- offset: String,
527
- label: Wrap<Label>,
528
- include_boundaries: bool,
529
- closed: Wrap<ClosedWindow>,
530
- by: RArray,
531
- start_by: Wrap<StartBy>,
532
- ) -> RbResult<RbLazyGroupBy> {
533
- let closed_window = closed.0;
534
- let by = rb_exprs_to_exprs(by)?;
535
- let ldf = self.ldf.borrow().clone();
536
- let lazy_gb = ldf.group_by_dynamic(
537
- index_column.inner.clone(),
538
- by,
539
- DynamicGroupOptions {
540
- every: Duration::parse(&every),
541
- period: Duration::parse(&period),
542
- offset: Duration::parse(&offset),
543
- label: label.0,
544
- include_boundaries,
545
- closed_window,
546
- start_by: start_by.0,
547
- ..Default::default()
548
- },
549
- );
550
-
551
- Ok(RbLazyGroupBy {
552
- lgb: RefCell::new(Some(lazy_gb)),
553
- })
554
- }
555
-
556
- pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
557
- let contexts = contexts
558
- .into_iter()
559
- .map(TryConvert::try_convert)
560
- .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
561
- let contexts = contexts
562
- .into_iter()
563
- .map(|ldf| ldf.ldf.borrow().clone())
564
- .collect::<Vec<_>>();
565
- Ok(self.ldf.borrow().clone().with_context(contexts).into())
566
- }
567
-
568
- #[allow(clippy::too_many_arguments)]
569
- pub fn join_asof(
570
- &self,
571
- other: &RbLazyFrame,
572
- left_on: &RbExpr,
573
- right_on: &RbExpr,
574
- left_by: Option<Vec<String>>,
575
- right_by: Option<Vec<String>>,
576
- allow_parallel: bool,
577
- force_parallel: bool,
578
- suffix: String,
579
- strategy: Wrap<AsofStrategy>,
580
- tolerance: Option<Wrap<AnyValue<'_>>>,
581
- tolerance_str: Option<String>,
582
- ) -> RbResult<Self> {
583
- let ldf = self.ldf.borrow().clone();
584
- let other = other.ldf.borrow().clone();
585
- let left_on = left_on.inner.clone();
586
- let right_on = right_on.inner.clone();
587
- Ok(ldf
588
- .join_builder()
589
- .with(other)
590
- .left_on([left_on])
591
- .right_on([right_on])
592
- .allow_parallel(allow_parallel)
593
- .force_parallel(force_parallel)
594
- .how(JoinType::AsOf(AsOfOptions {
595
- strategy: strategy.0,
596
- left_by: left_by.map(strings_to_smartstrings),
597
- right_by: right_by.map(strings_to_smartstrings),
598
- tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
599
- tolerance_str: tolerance_str.map(|s| s.into()),
600
- }))
601
- .suffix(suffix)
602
- .finish()
603
- .into())
604
- }
605
-
606
- #[allow(clippy::too_many_arguments)]
607
- pub fn join(
608
- &self,
609
- other: &RbLazyFrame,
610
- left_on: RArray,
611
- right_on: RArray,
612
- allow_parallel: bool,
613
- force_parallel: bool,
614
- join_nulls: bool,
615
- how: Wrap<JoinType>,
616
- suffix: String,
617
- ) -> RbResult<Self> {
618
- let ldf = self.ldf.borrow().clone();
619
- let other = other.ldf.borrow().clone();
620
- let left_on = rb_exprs_to_exprs(left_on)?;
621
- let right_on = rb_exprs_to_exprs(right_on)?;
622
-
623
- Ok(ldf
624
- .join_builder()
625
- .with(other)
626
- .left_on(left_on)
627
- .right_on(right_on)
628
- .allow_parallel(allow_parallel)
629
- .force_parallel(force_parallel)
630
- .join_nulls(join_nulls)
631
- .how(how.0)
632
- .suffix(suffix)
633
- .finish()
634
- .into())
635
- }
636
-
637
- pub fn with_column(&self, expr: &RbExpr) -> Self {
638
- let ldf = self.ldf.borrow().clone();
639
- ldf.with_column(expr.inner.clone()).into()
640
- }
641
-
642
- pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
643
- let ldf = self.ldf.borrow().clone();
644
- Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
645
- }
646
-
647
- pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
648
- let ldf = self.ldf.borrow().clone();
649
- Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
650
- }
651
-
652
- pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
653
- let ldf = self.ldf.borrow().clone();
654
- ldf.rename(existing, new).into()
655
- }
656
-
657
- pub fn reverse(&self) -> Self {
658
- let ldf = self.ldf.borrow().clone();
659
- ldf.reverse().into()
660
- }
661
-
662
- pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
663
- let lf = self.ldf.borrow().clone();
664
- let out = match fill_value {
665
- Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
666
- None => lf.shift(n.inner.clone()),
667
- };
668
- out.into()
669
- }
670
-
671
- pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
672
- let ldf = self.ldf.borrow().clone();
673
- ldf.fill_nan(fill_value.inner.clone()).into()
674
- }
675
-
676
- pub fn min(&self) -> Self {
677
- let ldf = self.ldf.borrow().clone();
678
- let out = ldf.min();
679
- out.into()
680
- }
681
-
682
- pub fn max(&self) -> Self {
683
- let ldf = self.ldf.borrow().clone();
684
- let out = ldf.max();
685
- out.into()
686
- }
687
-
688
- pub fn sum(&self) -> Self {
689
- let ldf = self.ldf.borrow().clone();
690
- let out = ldf.sum();
691
- out.into()
692
- }
693
-
694
- pub fn mean(&self) -> Self {
695
- let ldf = self.ldf.borrow().clone();
696
- let out = ldf.mean();
697
- out.into()
698
- }
699
-
700
- pub fn std(&self, ddof: u8) -> Self {
701
- let ldf = self.ldf.borrow().clone();
702
- let out = ldf.std(ddof);
703
- out.into()
704
- }
705
-
706
- pub fn var(&self, ddof: u8) -> Self {
707
- let ldf = self.ldf.borrow().clone();
708
- let out = ldf.var(ddof);
709
- out.into()
710
- }
711
-
712
- pub fn median(&self) -> Self {
713
- let ldf = self.ldf.borrow().clone();
714
- let out = ldf.median();
715
- out.into()
716
- }
717
-
718
- pub fn quantile(
719
- &self,
720
- quantile: &RbExpr,
721
- interpolation: Wrap<QuantileInterpolOptions>,
722
- ) -> Self {
723
- let ldf = self.ldf.borrow().clone();
724
- let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
725
- out.into()
726
- }
727
-
728
- pub fn explode(&self, column: RArray) -> RbResult<Self> {
729
- let ldf = self.ldf.borrow().clone();
730
- let column = rb_exprs_to_exprs(column)?;
731
- Ok(ldf.explode(column).into())
732
- }
733
-
734
- pub fn null_count(&self) -> Self {
735
- let ldf = self.ldf.borrow().clone();
736
- ldf.null_count().into()
737
- }
738
-
739
- pub fn unique(
740
- &self,
741
- maintain_order: bool,
742
- subset: Option<Vec<String>>,
743
- keep: Wrap<UniqueKeepStrategy>,
744
- ) -> RbResult<Self> {
745
- let ldf = self.ldf.borrow().clone();
746
- Ok(match maintain_order {
747
- true => ldf.unique_stable(subset, keep.0),
748
- false => ldf.unique(subset, keep.0),
749
- }
750
- .into())
751
- }
752
-
753
- pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> Self {
754
- let ldf = self.ldf.borrow().clone();
755
- ldf.drop_nulls(subset.map(|v| v.into_iter().map(|s| col(&s)).collect()))
756
- .into()
757
- }
758
-
759
- pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
760
- let ldf = self.ldf.borrow().clone();
761
- ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
762
- }
763
-
764
- pub fn tail(&self, n: IdxSize) -> Self {
765
- let ldf = self.ldf.borrow().clone();
766
- ldf.tail(n).into()
767
- }
768
-
769
- pub fn unpivot(
770
- &self,
771
- on: RArray,
772
- index: RArray,
773
- value_name: Option<String>,
774
- variable_name: Option<String>,
775
- ) -> RbResult<Self> {
776
- let on = rb_exprs_to_exprs(on)?;
777
- let index = rb_exprs_to_exprs(index)?;
778
- let args = UnpivotArgsDSL {
779
- on: on.into_iter().map(|e| e.into()).collect(),
780
- index: index.into_iter().map(|e| e.into()).collect(),
781
- value_name: value_name.map(|s| s.into()),
782
- variable_name: variable_name.map(|s| s.into()),
783
- };
784
-
785
- let ldf = self.ldf.borrow().clone();
786
- Ok(ldf.unpivot(args).into())
787
- }
788
-
789
- pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
790
- let ldf = self.ldf.borrow().clone();
791
- ldf.with_row_index(&name, offset).into()
792
- }
793
-
794
- pub fn drop(&self, cols: Vec<String>) -> Self {
795
- let ldf = self.ldf.borrow().clone();
796
- ldf.drop(cols).into()
797
- }
798
-
799
- pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
800
- self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
801
- }
802
-
803
- pub fn clone(&self) -> Self {
804
- self.ldf.borrow().clone().into()
805
- }
806
-
807
- pub fn collect_schema(&self) -> RbResult<RHash> {
808
- let schema = self.ldf.borrow_mut().schema().map_err(RbPolarsErr::from)?;
809
-
810
- let schema_dict = RHash::new();
811
- schema.iter_fields().for_each(|fld| {
812
- // TODO remove unwrap
813
- schema_dict
814
- .aset::<String, Value>(
815
- fld.name().to_string(),
816
- Wrap(fld.data_type().clone()).into_value(),
817
- )
818
- .unwrap();
819
- });
820
- Ok(schema_dict)
821
- }
822
-
823
- pub fn unnest(&self, cols: Vec<String>) -> Self {
824
- self.ldf.borrow().clone().unnest(cols).into()
825
- }
826
-
827
- pub fn count(&self) -> Self {
828
- let ldf = self.ldf.borrow().clone();
829
- ldf.count().into()
830
- }
831
-
832
- pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
833
- let out = self
834
- .ldf
835
- .borrow()
836
- .clone()
837
- .merge_sorted(other.ldf.borrow().clone(), &key)
838
- .map_err(RbPolarsErr::from)?;
839
- Ok(out.into())
840
- }
841
- }