polars-df 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/Cargo.lock +272 -191
  4. data/Cargo.toml +0 -1
  5. data/README.md +2 -2
  6. data/ext/polars/Cargo.toml +8 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +106 -4
  12. data/ext/polars/src/dataframe.rs +19 -17
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/expr/general.rs +933 -0
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +205 -303
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +99 -539
  38. data/lib/polars/convert.rb +2 -2
  39. data/lib/polars/data_frame.rb +201 -50
  40. data/lib/polars/data_types.rb +6 -4
  41. data/lib/polars/date_time_expr.rb +142 -2
  42. data/lib/polars/expr.rb +70 -10
  43. data/lib/polars/lazy_frame.rb +4 -3
  44. data/lib/polars/lazy_functions.rb +4 -1
  45. data/lib/polars/list_expr.rb +68 -19
  46. data/lib/polars/series.rb +181 -73
  47. data/lib/polars/string_expr.rb +149 -43
  48. data/lib/polars/string_name_space.rb +4 -4
  49. data/lib/polars/struct_name_space.rb +32 -0
  50. data/lib/polars/utils.rb +41 -7
  51. data/lib/polars/version.rb +1 -1
  52. data/lib/polars.rb +2 -2
  53. metadata +26 -11
  54. data/ext/polars/src/lazy/dsl.rs +0 -1775
  55. data/ext/polars/src/lazy/mod.rs +0 -5
  56. data/ext/polars/src/lazy/utils.rs +0 -13
  57. data/ext/polars/src/list_construction.rs +0 -100
  58. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  59. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
1
+ use magnus::{class, RArray, RString, Value};
2
+ use polars::lazy::dsl;
3
+ use polars::prelude::*;
4
+
5
+ use crate::apply::lazy::binary_lambda;
6
+ use crate::conversion::{get_lf, get_rbseq, Wrap};
7
+ use crate::prelude::vec_extract_wrapped;
8
+ use crate::rb_exprs_to_exprs;
9
+ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
10
+
11
+ macro_rules! set_unwrapped_or_0 {
12
+ ($($var:ident),+ $(,)?) => {
13
+ $(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
14
+ };
15
+ }
16
+
17
+ pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
18
+ dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
19
+ }
20
+
21
+ pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
22
+ let by = rb_exprs_to_exprs(by)?;
23
+ Ok(dsl::arg_sort_by(by, &descending).into())
24
+ }
25
+
26
+ pub fn arg_where(condition: &RbExpr) -> RbExpr {
27
+ dsl::arg_where(condition.inner.clone()).into()
28
+ }
29
+
30
+ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
31
+ let exprs = rb_exprs_to_exprs(exprs)?;
32
+ Ok(dsl::as_struct(&exprs).into())
33
+ }
34
+
35
+ pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
36
+ let exprs = rb_exprs_to_exprs(exprs)?;
37
+ Ok(dsl::coalesce(&exprs).into())
38
+ }
39
+
40
+ pub fn col(name: String) -> RbExpr {
41
+ dsl::col(&name).into()
42
+ }
43
+
44
+ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
45
+ let lfs = lfs
46
+ .each()
47
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
48
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
49
+
50
+ Ok(RArray::from_iter(lfs.iter().map(|lf| {
51
+ let df = lf.ldf.clone().collect().unwrap();
52
+ RbDataFrame::new(df)
53
+ })))
54
+ }
55
+
56
+ pub fn cols(names: Vec<String>) -> RbExpr {
57
+ dsl::cols(names).into()
58
+ }
59
+
60
+ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
61
+ let (seq, len) = get_rbseq(lfs)?;
62
+ let mut lfs = Vec::with_capacity(len);
63
+
64
+ for res in seq.each() {
65
+ let item = res?;
66
+ let lf = get_lf(item)?;
67
+ lfs.push(lf);
68
+ }
69
+
70
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
71
+ Ok(lf.into())
72
+ }
73
+
74
+ #[allow(clippy::too_many_arguments)]
75
+ pub fn duration(
76
+ days: Option<&RbExpr>,
77
+ seconds: Option<&RbExpr>,
78
+ nanoseconds: Option<&RbExpr>,
79
+ microseconds: Option<&RbExpr>,
80
+ milliseconds: Option<&RbExpr>,
81
+ minutes: Option<&RbExpr>,
82
+ hours: Option<&RbExpr>,
83
+ weeks: Option<&RbExpr>,
84
+ ) -> RbExpr {
85
+ set_unwrapped_or_0!(
86
+ days,
87
+ seconds,
88
+ nanoseconds,
89
+ microseconds,
90
+ milliseconds,
91
+ minutes,
92
+ hours,
93
+ weeks,
94
+ );
95
+ let args = DurationArgs {
96
+ days,
97
+ seconds,
98
+ nanoseconds,
99
+ microseconds,
100
+ milliseconds,
101
+ minutes,
102
+ hours,
103
+ weeks,
104
+ };
105
+ dsl::duration(args).into()
106
+ }
107
+
108
+ pub fn count() -> RbExpr {
109
+ dsl::count().into()
110
+ }
111
+
112
+ pub fn first() -> RbExpr {
113
+ dsl::first().into()
114
+ }
115
+
116
+ pub fn last() -> RbExpr {
117
+ dsl::last().into()
118
+ }
119
+
120
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
121
+ dsl::dtype_cols(dtypes).into()
122
+ }
123
+
124
+ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
125
+ let exprs = rb_exprs_to_exprs(exprs)?;
126
+
127
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
128
+ Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
129
+ }
130
+
131
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
132
+ let exprs = rb_exprs_to_exprs(exprs)?;
133
+
134
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
135
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
136
+ }
137
+
138
+ // TODO improve
139
+ pub fn lit(value: Value) -> RbResult<RbExpr> {
140
+ if value.is_nil() {
141
+ Ok(dsl::lit(Null {}).into())
142
+ } else if let Ok(series) = value.try_convert::<&RbSeries>() {
143
+ Ok(dsl::lit(series.series.borrow().clone()).into())
144
+ } else if let Some(v) = RString::from_value(value) {
145
+ Ok(dsl::lit(v.try_convert::<String>()?).into())
146
+ } else if value.is_kind_of(class::integer()) {
147
+ match value.try_convert::<i64>() {
148
+ Ok(val) => {
149
+ if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
150
+ Ok(dsl::lit(val as i32).into())
151
+ } else {
152
+ Ok(dsl::lit(val).into())
153
+ }
154
+ }
155
+ _ => {
156
+ let val = value.try_convert::<u64>()?;
157
+ Ok(dsl::lit(val).into())
158
+ }
159
+ }
160
+ } else {
161
+ Ok(dsl::lit(value.try_convert::<f64>()?).into())
162
+ }
163
+ }
164
+
165
+ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
166
+ if value.is_nil() {
167
+ Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
168
+ } else {
169
+ todo!();
170
+ }
171
+ }
172
+
173
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
174
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
175
+ }
176
+
177
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
178
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
179
+ .into()
180
+ }
181
+
182
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
183
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
184
+ }
185
+
186
+ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
187
+ let s = rb_exprs_to_exprs(s)?;
188
+ Ok(dsl::concat_str(s, &sep).into())
189
+ }
190
+
191
+ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
192
+ let s = rb_exprs_to_exprs(s)?;
193
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
194
+ Ok(expr.into())
195
+ }
196
+
197
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
198
+ let dtypes = dtypes
199
+ .each()
200
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
201
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
202
+ let dtypes = vec_extract_wrapped(dtypes);
203
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
204
+ }
205
+
206
+ pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
207
+ let exprs = rb_exprs_to_exprs(exprs)?;
208
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
209
+ }
@@ -0,0 +1,8 @@
1
+ use magnus::{IntoValue, Value};
2
+ use polars_core::prelude::IDX_DTYPE;
3
+
4
+ use crate::conversion::Wrap;
5
+
6
+ pub fn get_idx_type() -> Value {
7
+ Wrap(IDX_DTYPE).into_value()
8
+ }
@@ -0,0 +1,5 @@
1
+ pub mod eager;
2
+ pub mod io;
3
+ pub mod lazy;
4
+ pub mod meta;
5
+ pub mod whenthen;
@@ -0,0 +1,43 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ #[magnus::wrap(class = "Polars::RbWhen")]
6
+ #[derive(Clone)]
7
+ pub struct RbWhen {
8
+ pub inner: dsl::When,
9
+ }
10
+
11
+ impl From<dsl::When> for RbWhen {
12
+ fn from(inner: dsl::When) -> Self {
13
+ RbWhen { inner }
14
+ }
15
+ }
16
+
17
+ #[magnus::wrap(class = "Polars::RbWhenThen")]
18
+ #[derive(Clone)]
19
+ pub struct RbWhenThen {
20
+ pub inner: dsl::WhenThen,
21
+ }
22
+
23
+ impl From<dsl::WhenThen> for RbWhenThen {
24
+ fn from(inner: dsl::WhenThen) -> Self {
25
+ RbWhenThen { inner }
26
+ }
27
+ }
28
+
29
+ impl RbWhen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
31
+ self.inner.clone().then(expr.inner.clone()).into()
32
+ }
33
+ }
34
+
35
+ impl RbWhenThen {
36
+ pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
+ self.inner.clone().otherwise(expr.inner.clone()).into()
38
+ }
39
+ }
40
+
41
+ pub fn when(predicate: &RbExpr) -> RbWhen {
42
+ dsl::when(predicate.inner.clone()).into()
43
+ }
@@ -1,38 +1,15 @@
1
1
  use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
- use polars::lazy::frame::{LazyFrame, LazyGroupBy};
3
+ use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::expr::rb_exprs_to_exprs;
10
11
  use crate::file::get_file_like;
11
- use crate::lazy::utils::rb_exprs_to_exprs;
12
- use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
13
-
14
- #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
15
- pub struct RbLazyGroupBy {
16
- lgb: RefCell<Option<LazyGroupBy>>,
17
- }
18
-
19
- impl RbLazyGroupBy {
20
- pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
21
- let lgb = self.lgb.borrow_mut().take().unwrap();
22
- let aggs = rb_exprs_to_exprs(aggs)?;
23
- Ok(lgb.agg(aggs).into())
24
- }
25
-
26
- pub fn head(&self, n: usize) -> RbLazyFrame {
27
- let lgb = self.lgb.take().unwrap();
28
- lgb.head(Some(n)).into()
29
- }
30
-
31
- pub fn tail(&self, n: usize) -> RbLazyFrame {
32
- let lgb = self.lgb.take().unwrap();
33
- lgb.tail(Some(n)).into()
34
- }
35
- }
12
+ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
36
13
 
37
14
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
38
15
  #[derive(Clone)]
@@ -132,10 +109,10 @@ impl RbLazyFrame {
132
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
133
110
 
134
111
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
135
- let fields = overwrite_dtype
112
+ overwrite_dtype
136
113
  .into_iter()
137
- .map(|(name, dtype)| Field::new(&name, dtype.0));
138
- Schema::from(fields)
114
+ .map(|(name, dtype)| Field::new(&name, dtype.0))
115
+ .collect::<Schema>()
139
116
  });
140
117
  let r = LazyCsvReader::new(path)
141
118
  .with_infer_schema_length(infer_schema_length)
@@ -346,7 +323,7 @@ impl RbLazyFrame {
346
323
 
347
324
  pub fn groupby_rolling(
348
325
  &self,
349
- index_column: String,
326
+ index_column: &RbExpr,
350
327
  period: String,
351
328
  offset: String,
352
329
  closed: Wrap<ClosedWindow>,
@@ -356,9 +333,10 @@ impl RbLazyFrame {
356
333
  let ldf = self.ldf.clone();
357
334
  let by = rb_exprs_to_exprs(by)?;
358
335
  let lazy_gb = ldf.groupby_rolling(
336
+ index_column.inner.clone(),
359
337
  by,
360
338
  RollingGroupOptions {
361
- index_column: index_column.into(),
339
+ index_column: "".into(),
362
340
  period: Duration::parse(&period),
363
341
  offset: Duration::parse(&offset),
364
342
  closed_window,
@@ -373,7 +351,7 @@ impl RbLazyFrame {
373
351
  #[allow(clippy::too_many_arguments)]
374
352
  pub fn groupby_dynamic(
375
353
  &self,
376
- index_column: String,
354
+ index_column: &RbExpr,
377
355
  every: String,
378
356
  period: String,
379
357
  offset: String,
@@ -387,9 +365,9 @@ impl RbLazyFrame {
387
365
  let by = rb_exprs_to_exprs(by)?;
388
366
  let ldf = self.ldf.clone();
389
367
  let lazy_gb = ldf.groupby_dynamic(
368
+ index_column.inner.clone(),
390
369
  by,
391
370
  DynamicGroupOptions {
392
- index_column: index_column.into(),
393
371
  every: Duration::parse(&every),
394
372
  period: Duration::parse(&period),
395
373
  offset: Duration::parse(&offset),
@@ -397,6 +375,7 @@ impl RbLazyFrame {
397
375
  include_boundaries,
398
376
  closed_window,
399
377
  start_by: start_by.0,
378
+ ..Default::default()
400
379
  },
401
380
  );
402
381
 
@@ -0,0 +1,29 @@
1
+ use magnus::RArray;
2
+ use polars::lazy::frame::LazyGroupBy;
3
+ use std::cell::RefCell;
4
+
5
+ use crate::expr::rb_exprs_to_exprs;
6
+ use crate::{RbLazyFrame, RbResult};
7
+
8
+ #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
9
+ pub struct RbLazyGroupBy {
10
+ pub lgb: RefCell<Option<LazyGroupBy>>,
11
+ }
12
+
13
+ impl RbLazyGroupBy {
14
+ pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
15
+ let lgb = self.lgb.borrow_mut().take().unwrap();
16
+ let aggs = rb_exprs_to_exprs(aggs)?;
17
+ Ok(lgb.agg(aggs).into())
18
+ }
19
+
20
+ pub fn head(&self, n: usize) -> RbLazyFrame {
21
+ let lgb = self.lgb.take().unwrap();
22
+ lgb.head(Some(n)).into()
23
+ }
24
+
25
+ pub fn tail(&self, n: usize) -> RbLazyFrame {
26
+ let lgb = self.lgb.take().unwrap();
27
+ lgb.tail(Some(n)).into()
28
+ }
29
+ }