polars-df 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +272 -191
  4. data/Cargo.toml +0 -1
  5. data/README.md +2 -2
  6. data/ext/polars/Cargo.toml +8 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +106 -4
  12. data/ext/polars/src/dataframe.rs +19 -17
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/expr/general.rs +933 -0
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +205 -303
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +99 -539
  38. data/lib/polars/convert.rb +2 -2
  39. data/lib/polars/data_frame.rb +201 -50
  40. data/lib/polars/data_types.rb +6 -4
  41. data/lib/polars/date_time_expr.rb +142 -2
  42. data/lib/polars/expr.rb +70 -10
  43. data/lib/polars/lazy_frame.rb +4 -3
  44. data/lib/polars/lazy_functions.rb +4 -1
  45. data/lib/polars/list_expr.rb +68 -19
  46. data/lib/polars/series.rb +181 -73
  47. data/lib/polars/string_expr.rb +149 -43
  48. data/lib/polars/string_name_space.rb +4 -4
  49. data/lib/polars/struct_name_space.rb +32 -0
  50. data/lib/polars/utils.rb +41 -7
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +2 -2
  53. metadata +26 -11
  54. data/ext/polars/src/lazy/dsl.rs +0 -1775
  55. data/ext/polars/src/lazy/mod.rs +0 -5
  56. data/ext/polars/src/lazy/utils.rs +0 -13
  57. data/ext/polars/src/list_construction.rs +0 -100
  58. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  59. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
1
+ use magnus::{class, RArray, RString, Value};
2
+ use polars::lazy::dsl;
3
+ use polars::prelude::*;
4
+
5
+ use crate::apply::lazy::binary_lambda;
6
+ use crate::conversion::{get_lf, get_rbseq, Wrap};
7
+ use crate::prelude::vec_extract_wrapped;
8
+ use crate::rb_exprs_to_exprs;
9
+ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
10
+
11
+ macro_rules! set_unwrapped_or_0 {
12
+ ($($var:ident),+ $(,)?) => {
13
+ $(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
14
+ };
15
+ }
16
+
17
+ pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
18
+ dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
19
+ }
20
+
21
+ pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
22
+ let by = rb_exprs_to_exprs(by)?;
23
+ Ok(dsl::arg_sort_by(by, &descending).into())
24
+ }
25
+
26
+ pub fn arg_where(condition: &RbExpr) -> RbExpr {
27
+ dsl::arg_where(condition.inner.clone()).into()
28
+ }
29
+
30
+ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
31
+ let exprs = rb_exprs_to_exprs(exprs)?;
32
+ Ok(dsl::as_struct(&exprs).into())
33
+ }
34
+
35
+ pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
36
+ let exprs = rb_exprs_to_exprs(exprs)?;
37
+ Ok(dsl::coalesce(&exprs).into())
38
+ }
39
+
40
+ pub fn col(name: String) -> RbExpr {
41
+ dsl::col(&name).into()
42
+ }
43
+
44
+ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
45
+ let lfs = lfs
46
+ .each()
47
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
48
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
49
+
50
+ Ok(RArray::from_iter(lfs.iter().map(|lf| {
51
+ let df = lf.ldf.clone().collect().unwrap();
52
+ RbDataFrame::new(df)
53
+ })))
54
+ }
55
+
56
+ pub fn cols(names: Vec<String>) -> RbExpr {
57
+ dsl::cols(names).into()
58
+ }
59
+
60
+ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
61
+ let (seq, len) = get_rbseq(lfs)?;
62
+ let mut lfs = Vec::with_capacity(len);
63
+
64
+ for res in seq.each() {
65
+ let item = res?;
66
+ let lf = get_lf(item)?;
67
+ lfs.push(lf);
68
+ }
69
+
70
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
71
+ Ok(lf.into())
72
+ }
73
+
74
+ #[allow(clippy::too_many_arguments)]
75
+ pub fn duration(
76
+ days: Option<&RbExpr>,
77
+ seconds: Option<&RbExpr>,
78
+ nanoseconds: Option<&RbExpr>,
79
+ microseconds: Option<&RbExpr>,
80
+ milliseconds: Option<&RbExpr>,
81
+ minutes: Option<&RbExpr>,
82
+ hours: Option<&RbExpr>,
83
+ weeks: Option<&RbExpr>,
84
+ ) -> RbExpr {
85
+ set_unwrapped_or_0!(
86
+ days,
87
+ seconds,
88
+ nanoseconds,
89
+ microseconds,
90
+ milliseconds,
91
+ minutes,
92
+ hours,
93
+ weeks,
94
+ );
95
+ let args = DurationArgs {
96
+ days,
97
+ seconds,
98
+ nanoseconds,
99
+ microseconds,
100
+ milliseconds,
101
+ minutes,
102
+ hours,
103
+ weeks,
104
+ };
105
+ dsl::duration(args).into()
106
+ }
107
+
108
+ pub fn count() -> RbExpr {
109
+ dsl::count().into()
110
+ }
111
+
112
+ pub fn first() -> RbExpr {
113
+ dsl::first().into()
114
+ }
115
+
116
+ pub fn last() -> RbExpr {
117
+ dsl::last().into()
118
+ }
119
+
120
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
121
+ dsl::dtype_cols(dtypes).into()
122
+ }
123
+
124
+ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
125
+ let exprs = rb_exprs_to_exprs(exprs)?;
126
+
127
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
128
+ Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
129
+ }
130
+
131
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
132
+ let exprs = rb_exprs_to_exprs(exprs)?;
133
+
134
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
135
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
136
+ }
137
+
138
+ // TODO improve
139
+ pub fn lit(value: Value) -> RbResult<RbExpr> {
140
+ if value.is_nil() {
141
+ Ok(dsl::lit(Null {}).into())
142
+ } else if let Ok(series) = value.try_convert::<&RbSeries>() {
143
+ Ok(dsl::lit(series.series.borrow().clone()).into())
144
+ } else if let Some(v) = RString::from_value(value) {
145
+ Ok(dsl::lit(v.try_convert::<String>()?).into())
146
+ } else if value.is_kind_of(class::integer()) {
147
+ match value.try_convert::<i64>() {
148
+ Ok(val) => {
149
+ if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
150
+ Ok(dsl::lit(val as i32).into())
151
+ } else {
152
+ Ok(dsl::lit(val).into())
153
+ }
154
+ }
155
+ _ => {
156
+ let val = value.try_convert::<u64>()?;
157
+ Ok(dsl::lit(val).into())
158
+ }
159
+ }
160
+ } else {
161
+ Ok(dsl::lit(value.try_convert::<f64>()?).into())
162
+ }
163
+ }
164
+
165
+ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
166
+ if value.is_nil() {
167
+ Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
168
+ } else {
169
+ todo!();
170
+ }
171
+ }
172
+
173
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
174
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
175
+ }
176
+
177
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
178
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
179
+ .into()
180
+ }
181
+
182
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
183
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
184
+ }
185
+
186
+ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
187
+ let s = rb_exprs_to_exprs(s)?;
188
+ Ok(dsl::concat_str(s, &sep).into())
189
+ }
190
+
191
+ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
192
+ let s = rb_exprs_to_exprs(s)?;
193
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
194
+ Ok(expr.into())
195
+ }
196
+
197
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
198
+ let dtypes = dtypes
199
+ .each()
200
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
201
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
202
+ let dtypes = vec_extract_wrapped(dtypes);
203
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
204
+ }
205
+
206
+ pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
207
+ let exprs = rb_exprs_to_exprs(exprs)?;
208
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
209
+ }
@@ -0,0 +1,8 @@
1
+ use magnus::{IntoValue, Value};
2
+ use polars_core::prelude::IDX_DTYPE;
3
+
4
+ use crate::conversion::Wrap;
5
+
6
+ pub fn get_idx_type() -> Value {
7
+ Wrap(IDX_DTYPE).into_value()
8
+ }
@@ -0,0 +1,5 @@
1
+ pub mod eager;
2
+ pub mod io;
3
+ pub mod lazy;
4
+ pub mod meta;
5
+ pub mod whenthen;
@@ -0,0 +1,43 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ #[magnus::wrap(class = "Polars::RbWhen")]
6
+ #[derive(Clone)]
7
+ pub struct RbWhen {
8
+ pub inner: dsl::When,
9
+ }
10
+
11
+ impl From<dsl::When> for RbWhen {
12
+ fn from(inner: dsl::When) -> Self {
13
+ RbWhen { inner }
14
+ }
15
+ }
16
+
17
+ #[magnus::wrap(class = "Polars::RbWhenThen")]
18
+ #[derive(Clone)]
19
+ pub struct RbWhenThen {
20
+ pub inner: dsl::WhenThen,
21
+ }
22
+
23
+ impl From<dsl::WhenThen> for RbWhenThen {
24
+ fn from(inner: dsl::WhenThen) -> Self {
25
+ RbWhenThen { inner }
26
+ }
27
+ }
28
+
29
+ impl RbWhen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
31
+ self.inner.clone().then(expr.inner.clone()).into()
32
+ }
33
+ }
34
+
35
+ impl RbWhenThen {
36
+ pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
+ self.inner.clone().otherwise(expr.inner.clone()).into()
38
+ }
39
+ }
40
+
41
+ pub fn when(predicate: &RbExpr) -> RbWhen {
42
+ dsl::when(predicate.inner.clone()).into()
43
+ }
@@ -1,38 +1,15 @@
1
1
  use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
- use polars::lazy::frame::{LazyFrame, LazyGroupBy};
3
+ use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::expr::rb_exprs_to_exprs;
10
11
  use crate::file::get_file_like;
11
- use crate::lazy::utils::rb_exprs_to_exprs;
12
- use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
13
-
14
- #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
15
- pub struct RbLazyGroupBy {
16
- lgb: RefCell<Option<LazyGroupBy>>,
17
- }
18
-
19
- impl RbLazyGroupBy {
20
- pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
21
- let lgb = self.lgb.borrow_mut().take().unwrap();
22
- let aggs = rb_exprs_to_exprs(aggs)?;
23
- Ok(lgb.agg(aggs).into())
24
- }
25
-
26
- pub fn head(&self, n: usize) -> RbLazyFrame {
27
- let lgb = self.lgb.take().unwrap();
28
- lgb.head(Some(n)).into()
29
- }
30
-
31
- pub fn tail(&self, n: usize) -> RbLazyFrame {
32
- let lgb = self.lgb.take().unwrap();
33
- lgb.tail(Some(n)).into()
34
- }
35
- }
12
+ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
36
13
 
37
14
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
38
15
  #[derive(Clone)]
@@ -132,10 +109,10 @@ impl RbLazyFrame {
132
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
133
110
 
134
111
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
135
- let fields = overwrite_dtype
112
+ overwrite_dtype
136
113
  .into_iter()
137
- .map(|(name, dtype)| Field::new(&name, dtype.0));
138
- Schema::from(fields)
114
+ .map(|(name, dtype)| Field::new(&name, dtype.0))
115
+ .collect::<Schema>()
139
116
  });
140
117
  let r = LazyCsvReader::new(path)
141
118
  .with_infer_schema_length(infer_schema_length)
@@ -346,7 +323,7 @@ impl RbLazyFrame {
346
323
 
347
324
  pub fn groupby_rolling(
348
325
  &self,
349
- index_column: String,
326
+ index_column: &RbExpr,
350
327
  period: String,
351
328
  offset: String,
352
329
  closed: Wrap<ClosedWindow>,
@@ -356,9 +333,10 @@ impl RbLazyFrame {
356
333
  let ldf = self.ldf.clone();
357
334
  let by = rb_exprs_to_exprs(by)?;
358
335
  let lazy_gb = ldf.groupby_rolling(
336
+ index_column.inner.clone(),
359
337
  by,
360
338
  RollingGroupOptions {
361
- index_column: index_column.into(),
339
+ index_column: "".into(),
362
340
  period: Duration::parse(&period),
363
341
  offset: Duration::parse(&offset),
364
342
  closed_window,
@@ -373,7 +351,7 @@ impl RbLazyFrame {
373
351
  #[allow(clippy::too_many_arguments)]
374
352
  pub fn groupby_dynamic(
375
353
  &self,
376
- index_column: String,
354
+ index_column: &RbExpr,
377
355
  every: String,
378
356
  period: String,
379
357
  offset: String,
@@ -387,9 +365,9 @@ impl RbLazyFrame {
387
365
  let by = rb_exprs_to_exprs(by)?;
388
366
  let ldf = self.ldf.clone();
389
367
  let lazy_gb = ldf.groupby_dynamic(
368
+ index_column.inner.clone(),
390
369
  by,
391
370
  DynamicGroupOptions {
392
- index_column: index_column.into(),
393
371
  every: Duration::parse(&every),
394
372
  period: Duration::parse(&period),
395
373
  offset: Duration::parse(&offset),
@@ -397,6 +375,7 @@ impl RbLazyFrame {
397
375
  include_boundaries,
398
376
  closed_window,
399
377
  start_by: start_by.0,
378
+ ..Default::default()
400
379
  },
401
380
  );
402
381
 
@@ -0,0 +1,29 @@
1
+ use magnus::RArray;
2
+ use polars::lazy::frame::LazyGroupBy;
3
+ use std::cell::RefCell;
4
+
5
+ use crate::expr::rb_exprs_to_exprs;
6
+ use crate::{RbLazyFrame, RbResult};
7
+
8
+ #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
9
+ pub struct RbLazyGroupBy {
10
+ pub lgb: RefCell<Option<LazyGroupBy>>,
11
+ }
12
+
13
+ impl RbLazyGroupBy {
14
+ pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
15
+ let lgb = self.lgb.borrow_mut().take().unwrap();
16
+ let aggs = rb_exprs_to_exprs(aggs)?;
17
+ Ok(lgb.agg(aggs).into())
18
+ }
19
+
20
+ pub fn head(&self, n: usize) -> RbLazyFrame {
21
+ let lgb = self.lgb.take().unwrap();
22
+ lgb.head(Some(n)).into()
23
+ }
24
+
25
+ pub fn tail(&self, n: usize) -> RbLazyFrame {
26
+ let lgb = self.lgb.take().unwrap();
27
+ lgb.tail(Some(n)).into()
28
+ }
29
+ }