polars-df 0.5.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +595 -709
  4. data/Cargo.toml +1 -0
  5. data/README.md +11 -9
  6. data/ext/polars/Cargo.toml +18 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +272 -136
  9. data/ext/polars/src/dataframe.rs +135 -94
  10. data/ext/polars/src/error.rs +8 -5
  11. data/ext/polars/src/expr/array.rs +15 -0
  12. data/ext/polars/src/expr/binary.rs +18 -6
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +78 -264
  15. data/ext/polars/src/expr/list.rs +41 -28
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +196 -0
  19. data/ext/polars/src/expr/string.rs +94 -66
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +119 -54
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +46 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +61 -44
  33. data/ext/polars/src/lib.rs +173 -84
  34. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  35. data/ext/polars/src/{apply → map}/mod.rs +10 -6
  36. data/ext/polars/src/{apply → map}/series.rs +12 -16
  37. data/ext/polars/src/object.rs +2 -2
  38. data/ext/polars/src/rb_modules.rs +25 -6
  39. data/ext/polars/src/series/construction.rs +32 -6
  40. data/ext/polars/src/series/export.rs +2 -2
  41. data/ext/polars/src/series/set_at_idx.rs +33 -17
  42. data/ext/polars/src/series.rs +62 -42
  43. data/ext/polars/src/sql.rs +46 -0
  44. data/lib/polars/array_expr.rb +84 -0
  45. data/lib/polars/array_name_space.rb +77 -0
  46. data/lib/polars/batched_csv_reader.rb +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +206 -131
  49. data/lib/polars/data_types.rb +163 -29
  50. data/lib/polars/date_time_expr.rb +13 -18
  51. data/lib/polars/date_time_name_space.rb +22 -28
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +241 -151
  54. data/lib/polars/functions.rb +29 -38
  55. data/lib/polars/group_by.rb +38 -76
  56. data/lib/polars/io.rb +37 -2
  57. data/lib/polars/lazy_frame.rb +174 -95
  58. data/lib/polars/lazy_functions.rb +87 -63
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +40 -36
  61. data/lib/polars/list_name_space.rb +15 -15
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +6 -4
  64. data/lib/polars/series.rb +95 -28
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +249 -69
  67. data/lib/polars/string_name_space.rb +155 -25
  68. data/lib/polars/utils.rb +119 -57
  69. data/lib/polars/version.rb +1 -1
  70. data/lib/polars.rb +6 -0
  71. metadata +21 -7
  72. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,4 +1,4 @@
1
- use magnus::{IntoValue, RArray, RHash, Value};
1
+ use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
2
  use polars::io::RowCount;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
@@ -78,32 +78,32 @@ impl RbLazyFrame {
78
78
  pub fn new_from_csv(arguments: &[Value]) -> RbResult<Self> {
79
79
  // start arguments
80
80
  // this pattern is needed for more than 16
81
- let path: String = arguments[0].try_convert()?;
82
- let sep: String = arguments[1].try_convert()?;
83
- let has_header: bool = arguments[2].try_convert()?;
84
- let ignore_errors: bool = arguments[3].try_convert()?;
85
- let skip_rows: usize = arguments[4].try_convert()?;
86
- let n_rows: Option<usize> = arguments[5].try_convert()?;
87
- let cache: bool = arguments[6].try_convert()?;
88
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[7].try_convert()?;
89
- let low_memory: bool = arguments[8].try_convert()?;
90
- let comment_char: Option<String> = arguments[9].try_convert()?;
91
- let quote_char: Option<String> = arguments[10].try_convert()?;
92
- let null_values: Option<Wrap<NullValues>> = arguments[11].try_convert()?;
93
- let infer_schema_length: Option<usize> = arguments[12].try_convert()?;
94
- let with_schema_modify: Option<Value> = arguments[13].try_convert()?;
95
- let rechunk: bool = arguments[14].try_convert()?;
96
- let skip_rows_after_header: usize = arguments[15].try_convert()?;
97
- let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
98
- let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
99
- let try_parse_dates: bool = arguments[18].try_convert()?;
100
- let eol_char: String = arguments[19].try_convert()?;
81
+ let path = String::try_convert(arguments[0])?;
82
+ let separator = String::try_convert(arguments[1])?;
83
+ let has_header = bool::try_convert(arguments[2])?;
84
+ let ignore_errors = bool::try_convert(arguments[3])?;
85
+ let skip_rows = usize::try_convert(arguments[4])?;
86
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
87
+ let cache = bool::try_convert(arguments[6])?;
88
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
89
+ let low_memory = bool::try_convert(arguments[8])?;
90
+ let comment_char = Option::<String>::try_convert(arguments[9])?;
91
+ let quote_char = Option::<String>::try_convert(arguments[10])?;
92
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
93
+ let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
94
+ let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
95
+ let rechunk = bool::try_convert(arguments[14])?;
96
+ let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
+ let try_parse_dates = bool::try_convert(arguments[18])?;
100
+ let eol_char = String::try_convert(arguments[19])?;
101
101
  // end arguments
102
102
 
103
103
  let null_values = null_values.map(|w| w.0);
104
104
  let comment_char = comment_char.map(|s| s.as_bytes()[0]);
105
105
  let quote_char = quote_char.map(|s| s.as_bytes()[0]);
106
- let delimiter = sep.as_bytes()[0];
106
+ let separator = separator.as_bytes()[0];
107
107
  let eol_char = eol_char.as_bytes()[0];
108
108
 
109
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -116,7 +116,7 @@ impl RbLazyFrame {
116
116
  });
117
117
  let r = LazyCsvReader::new(path)
118
118
  .with_infer_schema_length(infer_schema_length)
119
- .with_delimiter(delimiter)
119
+ .with_separator(separator)
120
120
  .has_header(has_header)
121
121
  .with_ignore_errors(ignore_errors)
122
122
  .with_skip_rows(skip_rows)
@@ -151,6 +151,7 @@ impl RbLazyFrame {
151
151
  row_count: Option<(String, IdxSize)>,
152
152
  low_memory: bool,
153
153
  use_statistics: bool,
154
+ hive_partitioning: bool,
154
155
  ) -> RbResult<Self> {
155
156
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
156
157
  let args = ScanArgsParquet {
@@ -163,6 +164,7 @@ impl RbLazyFrame {
163
164
  // TODO support cloud options
164
165
  cloud_options: None,
165
166
  use_statistics,
167
+ hive_partitioning,
166
168
  };
167
169
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
168
170
  Ok(lf.into())
@@ -217,6 +219,7 @@ impl RbLazyFrame {
217
219
  slice_pushdown: bool,
218
220
  cse: bool,
219
221
  allow_streaming: bool,
222
+ _eager: bool,
220
223
  ) -> RbLazyFrame {
221
224
  let ldf = self.ldf.clone();
222
225
  let ldf = ldf
@@ -224,13 +227,20 @@ impl RbLazyFrame {
224
227
  .with_predicate_pushdown(predicate_pushdown)
225
228
  .with_simplify_expr(simplify_expr)
226
229
  .with_slice_pushdown(slice_pushdown)
227
- .with_common_subplan_elimination(cse)
230
+ .with_comm_subplan_elim(cse)
228
231
  .with_streaming(allow_streaming)
232
+ ._with_eager(_eager)
229
233
  .with_projection_pushdown(projection_pushdown);
230
234
  ldf.into()
231
235
  }
232
236
 
233
- pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> Self {
237
+ pub fn sort(
238
+ &self,
239
+ by_column: String,
240
+ reverse: bool,
241
+ nulls_last: bool,
242
+ maintain_order: bool,
243
+ ) -> Self {
234
244
  let ldf = self.ldf.clone();
235
245
  ldf.sort(
236
246
  &by_column,
@@ -238,6 +248,7 @@ impl RbLazyFrame {
238
248
  descending: reverse,
239
249
  nulls_last,
240
250
  multithreaded: true,
251
+ maintain_order,
241
252
  },
242
253
  )
243
254
  .into()
@@ -248,10 +259,13 @@ impl RbLazyFrame {
248
259
  by_column: RArray,
249
260
  reverse: Vec<bool>,
250
261
  nulls_last: bool,
262
+ maintain_order: bool,
251
263
  ) -> RbResult<Self> {
252
264
  let ldf = self.ldf.clone();
253
265
  let exprs = rb_exprs_to_exprs(by_column)?;
254
- Ok(ldf.sort_by_exprs(exprs, reverse, nulls_last).into())
266
+ Ok(ldf
267
+ .sort_by_exprs(exprs, reverse, nulls_last, maintain_order)
268
+ .into())
255
269
  }
256
270
 
257
271
  pub fn cache(&self) -> Self {
@@ -308,31 +322,32 @@ impl RbLazyFrame {
308
322
  Ok(ldf.select(exprs).into())
309
323
  }
310
324
 
311
- pub fn groupby(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
325
+ pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
312
326
  let ldf = self.ldf.clone();
313
327
  let by = rb_exprs_to_exprs(by)?;
314
328
  let lazy_gb = if maintain_order {
315
- ldf.groupby_stable(by)
329
+ ldf.group_by_stable(by)
316
330
  } else {
317
- ldf.groupby(by)
331
+ ldf.group_by(by)
318
332
  };
319
333
  Ok(RbLazyGroupBy {
320
334
  lgb: RefCell::new(Some(lazy_gb)),
321
335
  })
322
336
  }
323
337
 
324
- pub fn groupby_rolling(
338
+ pub fn group_by_rolling(
325
339
  &self,
326
340
  index_column: &RbExpr,
327
341
  period: String,
328
342
  offset: String,
329
343
  closed: Wrap<ClosedWindow>,
330
344
  by: RArray,
345
+ check_sorted: bool,
331
346
  ) -> RbResult<RbLazyGroupBy> {
332
347
  let closed_window = closed.0;
333
348
  let ldf = self.ldf.clone();
334
349
  let by = rb_exprs_to_exprs(by)?;
335
- let lazy_gb = ldf.groupby_rolling(
350
+ let lazy_gb = ldf.group_by_rolling(
336
351
  index_column.inner.clone(),
337
352
  by,
338
353
  RollingGroupOptions {
@@ -340,6 +355,7 @@ impl RbLazyFrame {
340
355
  period: Duration::parse(&period),
341
356
  offset: Duration::parse(&offset),
342
357
  closed_window,
358
+ check_sorted,
343
359
  },
344
360
  );
345
361
 
@@ -349,32 +365,34 @@ impl RbLazyFrame {
349
365
  }
350
366
 
351
367
  #[allow(clippy::too_many_arguments)]
352
- pub fn groupby_dynamic(
368
+ pub fn group_by_dynamic(
353
369
  &self,
354
370
  index_column: &RbExpr,
355
371
  every: String,
356
372
  period: String,
357
373
  offset: String,
358
- truncate: bool,
374
+ label: Wrap<Label>,
359
375
  include_boundaries: bool,
360
376
  closed: Wrap<ClosedWindow>,
361
377
  by: RArray,
362
378
  start_by: Wrap<StartBy>,
379
+ check_sorted: bool,
363
380
  ) -> RbResult<RbLazyGroupBy> {
364
381
  let closed_window = closed.0;
365
382
  let by = rb_exprs_to_exprs(by)?;
366
383
  let ldf = self.ldf.clone();
367
- let lazy_gb = ldf.groupby_dynamic(
384
+ let lazy_gb = ldf.group_by_dynamic(
368
385
  index_column.inner.clone(),
369
386
  by,
370
387
  DynamicGroupOptions {
371
388
  every: Duration::parse(&every),
372
389
  period: Duration::parse(&period),
373
390
  offset: Duration::parse(&offset),
374
- truncate,
391
+ label: label.0,
375
392
  include_boundaries,
376
393
  closed_window,
377
394
  start_by: start_by.0,
395
+ check_sorted,
378
396
  ..Default::default()
379
397
  },
380
398
  );
@@ -387,7 +405,7 @@ impl RbLazyFrame {
387
405
  pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
388
406
  let contexts = contexts
389
407
  .each()
390
- .map(|v| v.unwrap().try_convert())
408
+ .map(|v| TryConvert::try_convert(v.unwrap()))
391
409
  .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
392
410
  let contexts = contexts
393
411
  .into_iter()
@@ -478,14 +496,13 @@ impl RbLazyFrame {
478
496
  ldf.reverse().into()
479
497
  }
480
498
 
481
- pub fn shift(&self, periods: i64) -> Self {
482
- let ldf = self.ldf.clone();
483
- ldf.shift(periods).into()
484
- }
485
-
486
- pub fn shift_and_fill(&self, periods: i64, fill_value: &RbExpr) -> Self {
487
- let ldf = self.ldf.clone();
488
- ldf.shift_and_fill(periods, fill_value.inner.clone()).into()
499
+ pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
500
+ let lf = self.ldf.clone();
501
+ let out = match fill_value {
502
+ Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
503
+ None => lf.shift(n.inner.clone()),
504
+ };
505
+ out.into()
489
506
  }
490
507
 
491
508
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {