polars-df 0.5.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +595 -709
  4. data/Cargo.toml +1 -0
  5. data/README.md +11 -9
  6. data/ext/polars/Cargo.toml +18 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +272 -136
  9. data/ext/polars/src/dataframe.rs +135 -94
  10. data/ext/polars/src/error.rs +8 -5
  11. data/ext/polars/src/expr/array.rs +15 -0
  12. data/ext/polars/src/expr/binary.rs +18 -6
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +78 -264
  15. data/ext/polars/src/expr/list.rs +41 -28
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +196 -0
  19. data/ext/polars/src/expr/string.rs +94 -66
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +119 -54
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +46 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +61 -44
  33. data/ext/polars/src/lib.rs +173 -84
  34. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  35. data/ext/polars/src/{apply → map}/mod.rs +10 -6
  36. data/ext/polars/src/{apply → map}/series.rs +12 -16
  37. data/ext/polars/src/object.rs +2 -2
  38. data/ext/polars/src/rb_modules.rs +25 -6
  39. data/ext/polars/src/series/construction.rs +32 -6
  40. data/ext/polars/src/series/export.rs +2 -2
  41. data/ext/polars/src/series/set_at_idx.rs +33 -17
  42. data/ext/polars/src/series.rs +62 -42
  43. data/ext/polars/src/sql.rs +46 -0
  44. data/lib/polars/array_expr.rb +84 -0
  45. data/lib/polars/array_name_space.rb +77 -0
  46. data/lib/polars/batched_csv_reader.rb +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +206 -131
  49. data/lib/polars/data_types.rb +163 -29
  50. data/lib/polars/date_time_expr.rb +13 -18
  51. data/lib/polars/date_time_name_space.rb +22 -28
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +241 -151
  54. data/lib/polars/functions.rb +29 -38
  55. data/lib/polars/group_by.rb +38 -76
  56. data/lib/polars/io.rb +37 -2
  57. data/lib/polars/lazy_frame.rb +174 -95
  58. data/lib/polars/lazy_functions.rb +87 -63
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +40 -36
  61. data/lib/polars/list_name_space.rb +15 -15
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +6 -4
  64. data/lib/polars/series.rb +95 -28
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +249 -69
  67. data/lib/polars/string_name_space.rb +155 -25
  68. data/lib/polars/utils.rb +119 -57
  69. data/lib/polars/version.rb +1 -1
  70. data/lib/polars.rb +6 -0
  71. metadata +21 -7
  72. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -5,15 +5,14 @@ mod construction;
5
5
  mod export;
6
6
  mod set_at_idx;
7
7
 
8
- use magnus::{exception, Error, IntoValue, RArray, Value, QNIL};
8
+ use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
9
9
  use polars::prelude::*;
10
10
  use polars::series::IsSorted;
11
11
  use std::cell::RefCell;
12
12
 
13
- use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
14
13
  use crate::apply_method_all_arrow_series2;
15
14
  use crate::conversion::*;
16
- use crate::series::set_at_idx::set_at_idx;
15
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
17
16
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
18
17
 
19
18
  #[magnus::wrap(class = "Polars::RbSeries")]
@@ -38,7 +37,7 @@ impl RbSeries {
38
37
  pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
39
38
  let mut series = Vec::new();
40
39
  for item in rs.each() {
41
- series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
40
+ series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
42
41
  }
43
42
  Ok(series)
44
43
  }
@@ -303,7 +302,7 @@ impl RbSeries {
303
302
  pub fn to_a(&self) -> Value {
304
303
  let series = &self.series.borrow();
305
304
 
306
- fn to_list_recursive(series: &Series) -> Value {
305
+ fn to_a_recursive(series: &Series) -> Value {
307
306
  let rblist = match series.dtype() {
308
307
  DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
309
308
  DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
@@ -325,7 +324,7 @@ impl RbSeries {
325
324
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
326
325
  match obj {
327
326
  Some(val) => v.push(val.to_object()).unwrap(),
328
- None => v.push(QNIL).unwrap(),
327
+ None => v.push(qnil()).unwrap(),
329
328
  };
330
329
  }
331
330
  v.into_value()
@@ -333,13 +332,29 @@ impl RbSeries {
333
332
  DataType::List(_) => {
334
333
  let v = RArray::new();
335
334
  let ca = series.list().unwrap();
335
+ for opt_s in unsafe { ca.amortized_iter() } {
336
+ match opt_s {
337
+ None => {
338
+ v.push(qnil()).unwrap();
339
+ }
340
+ Some(s) => {
341
+ let rblst = to_a_recursive(s.as_ref());
342
+ v.push(rblst).unwrap();
343
+ }
344
+ }
345
+ }
346
+ v.into_value()
347
+ }
348
+ DataType::Array(_, _) => {
349
+ let v = RArray::new();
350
+ let ca = series.array().unwrap();
336
351
  for opt_s in ca.amortized_iter() {
337
352
  match opt_s {
338
353
  None => {
339
- v.push(QNIL).unwrap();
354
+ v.push(qnil()).unwrap();
340
355
  }
341
356
  Some(s) => {
342
- let rblst = to_list_recursive(s.as_ref());
357
+ let rblst = to_a_recursive(s.as_ref());
343
358
  v.push(rblst).unwrap();
344
359
  }
345
360
  }
@@ -347,18 +362,20 @@ impl RbSeries {
347
362
  v.into_value()
348
363
  }
349
364
  DataType::Date => {
350
- let a = RArray::with_capacity(series.len());
351
- for v in series.iter() {
352
- a.push::<Value>(Wrap(v).into_value()).unwrap();
353
- }
354
- return a.into_value();
365
+ let ca = series.date().unwrap();
366
+ return Wrap(ca).into_value();
367
+ }
368
+ DataType::Time => {
369
+ let ca = series.time().unwrap();
370
+ return Wrap(ca).into_value();
355
371
  }
356
372
  DataType::Datetime(_, _) => {
357
- let a = RArray::with_capacity(series.len());
358
- for v in series.iter() {
359
- a.push::<Value>(Wrap(v).into_value()).unwrap();
360
- }
361
- return a.into_value();
373
+ let ca = series.datetime().unwrap();
374
+ return Wrap(ca).into_value();
375
+ }
376
+ DataType::Decimal(_, _) => {
377
+ let ca = series.decimal().unwrap();
378
+ return Wrap(ca).into_value();
362
379
  }
363
380
  DataType::Utf8 => {
364
381
  let ca = series.utf8().unwrap();
@@ -376,15 +393,37 @@ impl RbSeries {
376
393
  let ca = series.binary().unwrap();
377
394
  return Wrap(ca).into_value();
378
395
  }
379
- DataType::Null | DataType::Unknown => {
396
+ DataType::Null => {
397
+ let null: Option<u8> = None;
398
+ let n = series.len();
399
+ let iter = std::iter::repeat(null).take(n);
400
+ use std::iter::{Repeat, Take};
401
+ struct NullIter {
402
+ iter: Take<Repeat<Option<u8>>>,
403
+ n: usize,
404
+ }
405
+ impl Iterator for NullIter {
406
+ type Item = Option<u8>;
407
+
408
+ fn next(&mut self) -> Option<Self::Item> {
409
+ self.iter.next()
410
+ }
411
+ fn size_hint(&self) -> (usize, Option<usize>) {
412
+ (self.n, Some(self.n))
413
+ }
414
+ }
415
+ impl ExactSizeIterator for NullIter {}
416
+
417
+ RArray::from_iter(NullIter { iter, n }).into_value()
418
+ }
419
+ DataType::Unknown => {
380
420
  panic!("to_a not implemented for null/unknown")
381
421
  }
382
- _ => todo!(),
383
422
  };
384
423
  rblist
385
424
  }
386
425
 
387
- to_list_recursive(series)
426
+ to_a_recursive(series)
388
427
  }
389
428
 
390
429
  pub fn clone(&self) -> Self {
@@ -594,23 +633,15 @@ impl RbSeries {
594
633
  Ok(RbSeries::new(s))
595
634
  }
596
635
 
597
- pub fn to_dummies(&self, sep: Option<String>) -> RbResult<RbDataFrame> {
636
+ pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
598
637
  let df = self
599
638
  .series
600
639
  .borrow()
601
- .to_dummies(sep.as_deref())
640
+ .to_dummies(sep.as_deref(), drop_first)
602
641
  .map_err(RbPolarsErr::from)?;
603
642
  Ok(df.into())
604
643
  }
605
644
 
606
- pub fn peak_max(&self) -> Self {
607
- self.series.borrow().peak_max().into_series().into()
608
- }
609
-
610
- pub fn peak_min(&self) -> Self {
611
- self.series.borrow().peak_min().into_series().into()
612
- }
613
-
614
645
  pub fn n_unique(&self) -> RbResult<usize> {
615
646
  let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
616
647
  Ok(n)
@@ -668,17 +699,6 @@ impl RbSeries {
668
699
  None
669
700
  }
670
701
  }
671
-
672
- pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
673
- let mut s = self.series.borrow_mut();
674
- match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
675
- Ok(out) => {
676
- *s = out;
677
- Ok(())
678
- }
679
- Err(e) => Err(RbPolarsErr::from(e)),
680
- }
681
- }
682
702
  }
683
703
 
684
704
  macro_rules! impl_set_with_mask {
@@ -0,0 +1,46 @@
1
+ use polars::sql::SQLContext;
2
+ use std::cell::RefCell;
3
+
4
+ use crate::{RbLazyFrame, RbPolarsErr, RbResult};
5
+
6
+ #[magnus::wrap(class = "Polars::RbSQLContext")]
7
+ #[repr(transparent)]
8
+ #[derive(Clone)]
9
+ pub struct RbSQLContext {
10
+ pub context: RefCell<SQLContext>,
11
+ }
12
+
13
+ #[allow(
14
+ clippy::wrong_self_convention,
15
+ clippy::should_implement_trait,
16
+ clippy::len_without_is_empty
17
+ )]
18
+ impl RbSQLContext {
19
+ #[allow(clippy::new_without_default)]
20
+ pub fn new() -> RbSQLContext {
21
+ RbSQLContext {
22
+ context: SQLContext::new().into(),
23
+ }
24
+ }
25
+
26
+ pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
27
+ Ok(self
28
+ .context
29
+ .borrow_mut()
30
+ .execute(&query)
31
+ .map_err(RbPolarsErr::from)?
32
+ .into())
33
+ }
34
+
35
+ pub fn get_tables(&self) -> RbResult<Vec<String>> {
36
+ Ok(self.context.borrow().get_tables())
37
+ }
38
+
39
+ pub fn register(&self, name: String, lf: &RbLazyFrame) {
40
+ self.context.borrow_mut().register(&name, lf.ldf.clone())
41
+ }
42
+
43
+ pub fn unregister(&self, name: String) {
44
+ self.context.borrow_mut().unregister(&name)
45
+ }
46
+ }
@@ -0,0 +1,84 @@
1
+ module Polars
2
+ # Namespace for array related expressions.
3
+ class ArrayExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Compute the min values of the sub-arrays.
13
+ #
14
+ # @return [Expr]
15
+ #
16
+ # @example
17
+ # df = Polars::DataFrame.new(
18
+ # {"a" => [[1, 2], [4, 3]]},
19
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
20
+ # )
21
+ # df.select(Polars.col("a").arr.min)
22
+ # # =>
23
+ # # shape: (2, 1)
24
+ # # ┌─────┐
25
+ # # │ a │
26
+ # # │ --- │
27
+ # # │ i64 │
28
+ # # ╞═════╡
29
+ # # │ 1 │
30
+ # # │ 3 │
31
+ # # └─────┘
32
+ def min
33
+ Utils.wrap_expr(_rbexpr.array_min)
34
+ end
35
+
36
+ # Compute the max values of the sub-arrays.
37
+ #
38
+ # @return [Expr]
39
+ #
40
+ # @example
41
+ # df = Polars::DataFrame.new(
42
+ # {"a" => [[1, 2], [4, 3]]},
43
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
44
+ # )
45
+ # df.select(Polars.col("a").arr.max)
46
+ # # =>
47
+ # # shape: (2, 1)
48
+ # # ┌─────┐
49
+ # # │ a │
50
+ # # │ --- │
51
+ # # │ i64 │
52
+ # # ╞═════╡
53
+ # # │ 2 │
54
+ # # │ 4 │
55
+ # # └─────┘
56
+ def max
57
+ Utils.wrap_expr(_rbexpr.array_max)
58
+ end
59
+
60
+ # Compute the sum values of the sub-arrays.
61
+ #
62
+ # @return [Expr]
63
+ #
64
+ # @example
65
+ # df = Polars::DataFrame.new(
66
+ # {"a" => [[1, 2], [4, 3]]},
67
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
68
+ # )
69
+ # df.select(Polars.col("a").arr.sum)
70
+ # # =>
71
+ # # shape: (2, 1)
72
+ # # ┌─────┐
73
+ # # │ a │
74
+ # # │ --- │
75
+ # # │ i64 │
76
+ # # ╞═════╡
77
+ # # │ 3 │
78
+ # # │ 7 │
79
+ # # └─────┘
80
+ def sum
81
+ Utils.wrap_expr(_rbexpr.array_sum)
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Series.arr namespace.
3
+ class ArrayNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "arr"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Compute the min values of the sub-arrays.
14
+ #
15
+ # @return [Series]
16
+ #
17
+ # @example
18
+ # s = Polars::Series.new(
19
+ # "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
20
+ # )
21
+ # s.arr.min
22
+ # # =>
23
+ # # shape: (2,)
24
+ # # Series: 'a' [i64]
25
+ # # [
26
+ # # 1
27
+ # # 3
28
+ # # ]
29
+ def min
30
+ super
31
+ end
32
+
33
+ # Compute the max values of the sub-arrays.
34
+ #
35
+ # @return [Series]
36
+ #
37
+ # @example
38
+ # s = Polars::Series.new(
39
+ # "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
40
+ # )
41
+ # s.arr.max
42
+ # # =>
43
+ # # shape: (2,)
44
+ # # Series: 'a' [i64]
45
+ # # [
46
+ # # 2
47
+ # # 4
48
+ # # ]
49
+ def max
50
+ super
51
+ end
52
+
53
+ # Compute the sum values of the sub-arrays.
54
+ #
55
+ # @return [Series]
56
+ #
57
+ # @example
58
+ # df = Polars::DataFrame.new(
59
+ # {"a" => [[1, 2], [4, 3]]},
60
+ # schema: {"a" => Polars::Array.new(2, Polars::Int64)}
61
+ # )
62
+ # df.select(Polars.col("a").arr.sum)
63
+ # # =>
64
+ # # shape: (2, 1)
65
+ # # ┌─────┐
66
+ # # │ a │
67
+ # # │ --- │
68
+ # # │ i64 │
69
+ # # ╞═════╡
70
+ # # │ 3 │
71
+ # # │ 7 │
72
+ # # └─────┘
73
+ def sum
74
+ super
75
+ end
76
+ end
77
+ end
@@ -41,7 +41,7 @@ module Polars
41
41
  dtypes.each do|k, v|
42
42
  dtype_list << [k, Utils.rb_type_to_dtype(v)]
43
43
  end
44
- elsif dtypes.is_a?(Array)
44
+ elsif dtypes.is_a?(::Array)
45
45
  dtype_slice = dtypes
46
46
  else
47
47
  raise ArgumentError, "dtype arg should be list or dict"