polars-df 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -25,6 +25,48 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
25
25
  }
26
26
 
27
27
  pub fn date_range(
28
+ start: &RbExpr,
29
+ end: &RbExpr,
30
+ interval: String,
31
+ closed: Wrap<ClosedWindow>,
32
+ ) -> RbExpr {
33
+ let start = start.inner.clone();
34
+ let end = end.inner.clone();
35
+ let interval = Duration::parse(&interval);
36
+ let closed = closed.0;
37
+ dsl::date_range(start, end, interval, closed).into()
38
+ }
39
+
40
+ pub fn date_ranges(
41
+ start: &RbExpr,
42
+ end: &RbExpr,
43
+ interval: String,
44
+ closed: Wrap<ClosedWindow>,
45
+ ) -> RbExpr {
46
+ let start = start.inner.clone();
47
+ let end = end.inner.clone();
48
+ let interval = Duration::parse(&interval);
49
+ let closed = closed.0;
50
+ dsl::date_ranges(start, end, interval, closed).into()
51
+ }
52
+
53
+ pub fn datetime_range(
54
+ start: &RbExpr,
55
+ end: &RbExpr,
56
+ every: String,
57
+ closed: Wrap<ClosedWindow>,
58
+ time_unit: Option<Wrap<TimeUnit>>,
59
+ time_zone: Option<TimeZone>,
60
+ ) -> RbExpr {
61
+ let start = start.inner.clone();
62
+ let end = end.inner.clone();
63
+ let every = Duration::parse(&every);
64
+ let closed = closed.0;
65
+ let time_unit = time_unit.map(|x| x.0);
66
+ dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
67
+ }
68
+
69
+ pub fn datetime_ranges(
28
70
  start: &RbExpr,
29
71
  end: &RbExpr,
30
72
  every: String,
@@ -37,5 +79,31 @@ pub fn date_range(
37
79
  let every = Duration::parse(&every);
38
80
  let closed = closed.0;
39
81
  let time_unit = time_unit.map(|x| x.0);
40
- dsl::date_range(start, end, every, closed, time_unit, time_zone).into()
82
+ dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
83
+ }
84
+
85
+ pub fn time_range(
86
+ start: &RbExpr,
87
+ end: &RbExpr,
88
+ every: String,
89
+ closed: Wrap<ClosedWindow>,
90
+ ) -> RbExpr {
91
+ let start = start.inner.clone();
92
+ let end = end.inner.clone();
93
+ let every = Duration::parse(&every);
94
+ let closed = closed.0;
95
+ dsl::time_range(start, end, every, closed).into()
96
+ }
97
+
98
+ pub fn time_ranges(
99
+ start: &RbExpr,
100
+ end: &RbExpr,
101
+ every: String,
102
+ closed: Wrap<ClosedWindow>,
103
+ ) -> RbExpr {
104
+ let start = start.inner.clone();
105
+ let end = end.inner.clone();
106
+ let every = Duration::parse(&every);
107
+ let closed = closed.0;
108
+ dsl::time_ranges(start, end, every, closed).into()
41
109
  }
@@ -0,0 +1 @@
1
+ pub mod numo;
@@ -0,0 +1,2 @@
1
+ pub mod to_numo_df;
2
+ pub mod to_numo_series;
@@ -0,0 +1,23 @@
1
+ use magnus::Value;
2
+ use polars_core::utils::try_get_supertype;
3
+
4
+ use crate::dataframe::RbDataFrame;
5
+
6
+ impl RbDataFrame {
7
+ pub fn to_numo(&self) -> Option<Value> {
8
+ let mut st = None;
9
+ for s in self.df.borrow().iter() {
10
+ let dt_i = s.dtype();
11
+ match st {
12
+ None => st = Some(dt_i.clone()),
13
+ Some(ref mut st) => {
14
+ *st = try_get_supertype(st, dt_i).ok()?;
15
+ }
16
+ }
17
+ }
18
+ let _st = st?;
19
+
20
+ // TODO
21
+ None
22
+ }
23
+ }
@@ -0,0 +1,61 @@
1
+ use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
+ use polars::series::BitRepr;
3
+ use polars_core::prelude::*;
4
+
5
+ use crate::error::RbPolarsErr;
6
+ use crate::raise_err;
7
+ use crate::series::RbSeries;
8
+ use crate::RbResult;
9
+
10
+ impl RbSeries {
11
+ /// For numeric types, this should only be called for Series with null types.
12
+ /// This will cast to floats so that `nil = NAN`
13
+ pub fn to_numo(&self) -> RbResult<Value> {
14
+ let s = &self.series.borrow();
15
+ match s.dtype() {
16
+ DataType::String => {
17
+ let ca = s.str().unwrap();
18
+
19
+ // TODO make more efficient
20
+ let np_arr = RArray::from_iter(ca);
21
+ class::object()
22
+ .const_get::<_, RModule>("Numo")?
23
+ .const_get::<_, RClass>("RObject")?
24
+ .funcall("cast", (np_arr,))
25
+ }
26
+ dt if dt.is_numeric() => {
27
+ if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
+ let s = s.cast(&DataType::Float64).unwrap();
29
+ let ca = s.f64().unwrap();
30
+ // TODO make more efficient
31
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
32
+ Some(v) => v,
33
+ None => f64::NAN,
34
+ }));
35
+ class::object()
36
+ .const_get::<_, RModule>("Numo")?
37
+ .const_get::<_, RClass>("DFloat")?
38
+ .funcall("cast", (np_arr,))
39
+ } else {
40
+ let s = s.cast(&DataType::Float32).unwrap();
41
+ let ca = s.f32().unwrap();
42
+ // TODO make more efficient
43
+ let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
44
+ Some(v) => v,
45
+ None => f32::NAN,
46
+ }));
47
+ class::object()
48
+ .const_get::<_, RModule>("Numo")?
49
+ .const_get::<_, RClass>("SFloat")?
50
+ .funcall("cast", (np_arr,))
51
+ }
52
+ }
53
+ dt => {
54
+ raise_err!(
55
+ format!("'to_numo' not supported for dtype: {dt:?}"),
56
+ ComputeError
57
+ );
58
+ }
59
+ }
60
+ }
61
+ }