polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -25,6 +25,48 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
25
25
  }
26
26
 
27
27
  pub fn date_range(
28
+ start: &RbExpr,
29
+ end: &RbExpr,
30
+ interval: String,
31
+ closed: Wrap<ClosedWindow>,
32
+ ) -> RbExpr {
33
+ let start = start.inner.clone();
34
+ let end = end.inner.clone();
35
+ let interval = Duration::parse(&interval);
36
+ let closed = closed.0;
37
+ dsl::date_range(start, end, interval, closed).into()
38
+ }
39
+
40
+ pub fn date_ranges(
41
+ start: &RbExpr,
42
+ end: &RbExpr,
43
+ interval: String,
44
+ closed: Wrap<ClosedWindow>,
45
+ ) -> RbExpr {
46
+ let start = start.inner.clone();
47
+ let end = end.inner.clone();
48
+ let interval = Duration::parse(&interval);
49
+ let closed = closed.0;
50
+ dsl::date_ranges(start, end, interval, closed).into()
51
+ }
52
+
53
+ pub fn datetime_range(
54
+ start: &RbExpr,
55
+ end: &RbExpr,
56
+ every: String,
57
+ closed: Wrap<ClosedWindow>,
58
+ time_unit: Option<Wrap<TimeUnit>>,
59
+ time_zone: Option<TimeZone>,
60
+ ) -> RbExpr {
61
+ let start = start.inner.clone();
62
+ let end = end.inner.clone();
63
+ let every = Duration::parse(&every);
64
+ let closed = closed.0;
65
+ let time_unit = time_unit.map(|x| x.0);
66
+ dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
67
+ }
68
+
69
+ pub fn datetime_ranges(
28
70
  start: &RbExpr,
29
71
  end: &RbExpr,
30
72
  every: String,
@@ -37,5 +79,31 @@ pub fn date_range(
37
79
  let every = Duration::parse(&every);
38
80
  let closed = closed.0;
39
81
  let time_unit = time_unit.map(|x| x.0);
40
- dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
82
+ dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
83
+ }
84
+
85
+ pub fn time_range(
86
+ start: &RbExpr,
87
+ end: &RbExpr,
88
+ every: String,
89
+ closed: Wrap<ClosedWindow>,
90
+ ) -> RbExpr {
91
+ let start = start.inner.clone();
92
+ let end = end.inner.clone();
93
+ let every = Duration::parse(&every);
94
+ let closed = closed.0;
95
+ dsl::time_range(start, end, every, closed).into()
96
+ }
97
+
98
+ pub fn time_ranges(
99
+ start: &RbExpr,
100
+ end: &RbExpr,
101
+ every: String,
102
+ closed: Wrap<ClosedWindow>,
103
+ ) -> RbExpr {
104
+ let start = start.inner.clone();
105
+ let end = end.inner.clone();
106
+ let every = Duration::parse(&every);
107
+ let closed = closed.0;
108
+ dsl::time_ranges(start, end, every, closed).into()
41
109
  }
@@ -0,0 +1 @@
1
+ pub mod numo;
@@ -0,0 +1,2 @@
1
+ pub mod to_numo_df;
2
+ pub mod to_numo_series;
@@ -0,0 +1,23 @@
1
+ use magnus::Value;
2
+ use polars_core::utils::try_get_supertype;
3
+
4
+ use crate::dataframe::RbDataFrame;
5
+
6
+ impl RbDataFrame {
7
+ pub fn to_numo(&self) -> Option<Value> {
8
+ let mut st = None;
9
+ for s in self.df.borrow().iter() {
10
+ let dt_i = s.dtype();
11
+ match st {
12
+ None => st = Some(dt_i.clone()),
13
+ Some(ref mut st) => {
14
+ *st = try_get_supertype(st, dt_i).ok()?;
15
+ }
16
+ }
17
+ }
18
+ let _st = st?;
19
+
20
+ // TODO
21
+ None
22
+ }
23
+ }
@@ -0,0 +1,61 @@
1
+ use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
+ use polars::series::BitRepr;
3
+ use polars_core::prelude::*;
4
+
5
+ use crate::error::RbPolarsErr;
6
+ use crate::raise_err;
7
+ use crate::series::RbSeries;
8
+ use crate::RbResult;
9
+
10
+ impl RbSeries {
11
+ /// For numeric types, this should only be called for Series with null types.
12
+ /// This will cast to floats so that `nil = NAN`
13
+ pub fn to_numo(&self) -> RbResult<Value> {
14
+ let s = &self.series.borrow();
15
+ match s.dtype() {
16
+ DataType::String => {
17
+ let ca = s.str().unwrap();
18
+
19
+ // TODO make more efficient
20
+ let np_arr = RArray::from_iter(ca);
21
+ class::object()
22
+ .const_get::<_, RModule>("Numo")?
23
+ .const_get::<_, RClass>("RObject")?
24
+ .funcall("cast", (np_arr,))
25
+ }
26
+ dt if dt.is_numeric() => {
27
+ if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
+ let s = s.cast(&DataType::Float64).unwrap();
29
+ let ca = s.f64().unwrap();
30
+ // TODO make more efficient
31
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
32
+ Some(v) => v,
33
+ None => f64::NAN,
34
+ }));
35
+ class::object()
36
+ .const_get::<_, RModule>("Numo")?
37
+ .const_get::<_, RClass>("DFloat")?
38
+ .funcall("cast", (np_arr,))
39
+ } else {
40
+ let s = s.cast(&DataType::Float32).unwrap();
41
+ let ca = s.f32().unwrap();
42
+ // TODO make more efficient
43
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
44
+ Some(v) => v,
45
+ None => f32::NAN,
46
+ }));
47
+ class::object()
48
+ .const_get::<_, RModule>("Numo")?
49
+ .const_get::<_, RClass>("SFloat")?
50
+ .funcall("cast", (np_arr,))
51
+ }
52
+ }
53
+ dt => {
54
+ raise_err!(
55
+ format!("'to_numo' not supported for dtype: {dt:?}"),
56
+ ComputeError
57
+ );
58
+ }
59
+ }
60
+ }
61
+ }