polars-df 0.3.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
1
+ use magnus::{class, RArray, RString, Value};
2
+ use polars::lazy::dsl;
3
+ use polars::prelude::*;
4
+
5
+ use crate::apply::lazy::binary_lambda;
6
+ use crate::conversion::{get_lf, get_rbseq, Wrap};
7
+ use crate::prelude::vec_extract_wrapped;
8
+ use crate::rb_exprs_to_exprs;
9
+ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
10
+
11
+ macro_rules! set_unwrapped_or_0 {
12
+ ($($var:ident),+ $(,)?) => {
13
+ $(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
14
+ };
15
+ }
16
+
17
+ pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
18
+ dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
19
+ }
20
+
21
+ pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
22
+ let by = rb_exprs_to_exprs(by)?;
23
+ Ok(dsl::arg_sort_by(by, &descending).into())
24
+ }
25
+
26
+ pub fn arg_where(condition: &RbExpr) -> RbExpr {
27
+ dsl::arg_where(condition.inner.clone()).into()
28
+ }
29
+
30
+ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
31
+ let exprs = rb_exprs_to_exprs(exprs)?;
32
+ Ok(dsl::as_struct(&exprs).into())
33
+ }
34
+
35
+ pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
36
+ let exprs = rb_exprs_to_exprs(exprs)?;
37
+ Ok(dsl::coalesce(&exprs).into())
38
+ }
39
+
40
+ pub fn col(name: String) -> RbExpr {
41
+ dsl::col(&name).into()
42
+ }
43
+
44
+ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
45
+ let lfs = lfs
46
+ .each()
47
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
48
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
49
+
50
+ Ok(RArray::from_iter(lfs.iter().map(|lf| {
51
+ let df = lf.ldf.clone().collect().unwrap();
52
+ RbDataFrame::new(df)
53
+ })))
54
+ }
55
+
56
+ pub fn cols(names: Vec<String>) -> RbExpr {
57
+ dsl::cols(names).into()
58
+ }
59
+
60
+ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
61
+ let (seq, len) = get_rbseq(lfs)?;
62
+ let mut lfs = Vec::with_capacity(len);
63
+
64
+ for res in seq.each() {
65
+ let item = res?;
66
+ let lf = get_lf(item)?;
67
+ lfs.push(lf);
68
+ }
69
+
70
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
71
+ Ok(lf.into())
72
+ }
73
+
74
+ #[allow(clippy::too_many_arguments)]
75
+ pub fn duration(
76
+ days: Option<&RbExpr>,
77
+ seconds: Option<&RbExpr>,
78
+ nanoseconds: Option<&RbExpr>,
79
+ microseconds: Option<&RbExpr>,
80
+ milliseconds: Option<&RbExpr>,
81
+ minutes: Option<&RbExpr>,
82
+ hours: Option<&RbExpr>,
83
+ weeks: Option<&RbExpr>,
84
+ ) -> RbExpr {
85
+ set_unwrapped_or_0!(
86
+ days,
87
+ seconds,
88
+ nanoseconds,
89
+ microseconds,
90
+ milliseconds,
91
+ minutes,
92
+ hours,
93
+ weeks,
94
+ );
95
+ let args = DurationArgs {
96
+ days,
97
+ seconds,
98
+ nanoseconds,
99
+ microseconds,
100
+ milliseconds,
101
+ minutes,
102
+ hours,
103
+ weeks,
104
+ };
105
+ dsl::duration(args).into()
106
+ }
107
+
108
+ pub fn count() -> RbExpr {
109
+ dsl::count().into()
110
+ }
111
+
112
+ pub fn first() -> RbExpr {
113
+ dsl::first().into()
114
+ }
115
+
116
+ pub fn last() -> RbExpr {
117
+ dsl::last().into()
118
+ }
119
+
120
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
121
+ dsl::dtype_cols(dtypes).into()
122
+ }
123
+
124
+ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
125
+ let exprs = rb_exprs_to_exprs(exprs)?;
126
+
127
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
128
+ Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
129
+ }
130
+
131
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
132
+ let exprs = rb_exprs_to_exprs(exprs)?;
133
+
134
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
135
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
136
+ }
137
+
138
+ // TODO improve
139
+ pub fn lit(value: Value) -> RbResult<RbExpr> {
140
+ if value.is_nil() {
141
+ Ok(dsl::lit(Null {}).into())
142
+ } else if let Ok(series) = value.try_convert::<&RbSeries>() {
143
+ Ok(dsl::lit(series.series.borrow().clone()).into())
144
+ } else if let Some(v) = RString::from_value(value) {
145
+ Ok(dsl::lit(v.try_convert::<String>()?).into())
146
+ } else if value.is_kind_of(class::integer()) {
147
+ match value.try_convert::<i64>() {
148
+ Ok(val) => {
149
+ if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
150
+ Ok(dsl::lit(val as i32).into())
151
+ } else {
152
+ Ok(dsl::lit(val).into())
153
+ }
154
+ }
155
+ _ => {
156
+ let val = value.try_convert::<u64>()?;
157
+ Ok(dsl::lit(val).into())
158
+ }
159
+ }
160
+ } else {
161
+ Ok(dsl::lit(value.try_convert::<f64>()?).into())
162
+ }
163
+ }
164
+
165
+ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
166
+ if value.is_nil() {
167
+ Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
168
+ } else {
169
+ todo!();
170
+ }
171
+ }
172
+
173
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
174
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
175
+ }
176
+
177
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
178
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
179
+ .into()
180
+ }
181
+
182
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
183
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
184
+ }
185
+
186
+ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
187
+ let s = rb_exprs_to_exprs(s)?;
188
+ Ok(dsl::concat_str(s, &sep).into())
189
+ }
190
+
191
+ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
192
+ let s = rb_exprs_to_exprs(s)?;
193
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
194
+ Ok(expr.into())
195
+ }
196
+
197
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
198
+ let dtypes = dtypes
199
+ .each()
200
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
201
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
202
+ let dtypes = vec_extract_wrapped(dtypes);
203
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
204
+ }
205
+
206
+ pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
207
+ let exprs = rb_exprs_to_exprs(exprs)?;
208
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
209
+ }
@@ -0,0 +1,8 @@
1
+ use magnus::{IntoValue, Value};
2
+ use polars_core::prelude::IDX_DTYPE;
3
+
4
+ use crate::conversion::Wrap;
5
+
6
+ pub fn get_idx_type() -> Value {
7
+ Wrap(IDX_DTYPE).into_value()
8
+ }
@@ -0,0 +1,5 @@
1
+ pub mod eager;
2
+ pub mod io;
3
+ pub mod lazy;
4
+ pub mod meta;
5
+ pub mod whenthen;
@@ -0,0 +1,43 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ #[magnus::wrap(class = "Polars::RbWhen")]
6
+ #[derive(Clone)]
7
+ pub struct RbWhen {
8
+ pub inner: dsl::When,
9
+ }
10
+
11
+ impl From<dsl::When> for RbWhen {
12
+ fn from(inner: dsl::When) -> Self {
13
+ RbWhen { inner }
14
+ }
15
+ }
16
+
17
+ #[magnus::wrap(class = "Polars::RbWhenThen")]
18
+ #[derive(Clone)]
19
+ pub struct RbWhenThen {
20
+ pub inner: dsl::WhenThen,
21
+ }
22
+
23
+ impl From<dsl::WhenThen> for RbWhenThen {
24
+ fn from(inner: dsl::WhenThen) -> Self {
25
+ RbWhenThen { inner }
26
+ }
27
+ }
28
+
29
+ impl RbWhen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
31
+ self.inner.clone().then(expr.inner.clone()).into()
32
+ }
33
+ }
34
+
35
+ impl RbWhenThen {
36
+ pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
+ self.inner.clone().otherwise(expr.inner.clone()).into()
38
+ }
39
+ }
40
+
41
+ pub fn when(predicate: &RbExpr) -> RbWhen {
42
+ dsl::when(predicate.inner.clone()).into()
43
+ }
@@ -1,37 +1,15 @@
1
1
  use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
- use polars::lazy::frame::{LazyFrame, LazyGroupBy};
3
+ use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::path::PathBuf;
7
8
 
8
9
  use crate::conversion::*;
10
+ use crate::expr::rb_exprs_to_exprs;
9
11
  use crate::file::get_file_like;
10
- use crate::lazy::utils::rb_exprs_to_exprs;
11
- use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
12
-
13
- #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
14
- pub struct RbLazyGroupBy {
15
- lgb: RefCell<Option<LazyGroupBy>>,
16
- }
17
-
18
- impl RbLazyGroupBy {
19
- pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
20
- let lgb = self.lgb.borrow_mut().take().unwrap();
21
- let aggs = rb_exprs_to_exprs(aggs)?;
22
- Ok(lgb.agg(aggs).into())
23
- }
24
-
25
- pub fn head(&self, n: usize) -> RbLazyFrame {
26
- let lgb = self.lgb.take().unwrap();
27
- lgb.head(Some(n)).into()
28
- }
29
-
30
- pub fn tail(&self, n: usize) -> RbLazyFrame {
31
- let lgb = self.lgb.take().unwrap();
32
- lgb.tail(Some(n)).into()
33
- }
34
- }
12
+ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
35
13
 
36
14
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
37
15
  #[derive(Clone)]
@@ -118,7 +96,7 @@ impl RbLazyFrame {
118
96
  let skip_rows_after_header: usize = arguments[15].try_convert()?;
119
97
  let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
120
98
  let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
121
- let parse_dates: bool = arguments[18].try_convert()?;
99
+ let try_parse_dates: bool = arguments[18].try_convert()?;
122
100
  let eol_char: String = arguments[19].try_convert()?;
123
101
  // end arguments
124
102
 
@@ -131,10 +109,10 @@ impl RbLazyFrame {
131
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
132
110
 
133
111
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
134
- let fields = overwrite_dtype
112
+ overwrite_dtype
135
113
  .into_iter()
136
- .map(|(name, dtype)| Field::new(&name, dtype.0));
137
- Schema::from(fields)
114
+ .map(|(name, dtype)| Field::new(&name, dtype.0))
115
+ .collect::<Schema>()
138
116
  });
139
117
  let r = LazyCsvReader::new(path)
140
118
  .with_infer_schema_length(infer_schema_length)
@@ -153,7 +131,7 @@ impl RbLazyFrame {
153
131
  .with_skip_rows_after_header(skip_rows_after_header)
154
132
  .with_encoding(encoding.0)
155
133
  .with_row_count(row_count)
156
- .with_parse_dates(parse_dates)
134
+ .with_try_parse_dates(try_parse_dates)
157
135
  .with_null_values(null_values);
158
136
 
159
137
  if let Some(_lambda) = with_schema_modify {
@@ -163,6 +141,7 @@ impl RbLazyFrame {
163
141
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
164
142
  }
165
143
 
144
+ #[allow(clippy::too_many_arguments)]
166
145
  pub fn new_from_parquet(
167
146
  path: String,
168
147
  n_rows: Option<usize>,
@@ -171,6 +150,7 @@ impl RbLazyFrame {
171
150
  rechunk: bool,
172
151
  row_count: Option<(String, IdxSize)>,
173
152
  low_memory: bool,
153
+ use_statistics: bool,
174
154
  ) -> RbResult<Self> {
175
155
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
176
156
  let args = ScanArgsParquet {
@@ -182,6 +162,7 @@ impl RbLazyFrame {
182
162
  low_memory,
183
163
  // TODO support cloud options
184
164
  cloud_options: None,
165
+ use_statistics,
185
166
  };
186
167
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
187
168
  Ok(lf.into())
@@ -284,6 +265,32 @@ impl RbLazyFrame {
284
265
  Ok(df.into())
285
266
  }
286
267
 
268
+ #[allow(clippy::too_many_arguments)]
269
+ pub fn sink_parquet(
270
+ &self,
271
+ path: PathBuf,
272
+ compression: String,
273
+ compression_level: Option<i32>,
274
+ statistics: bool,
275
+ row_group_size: Option<usize>,
276
+ data_pagesize_limit: Option<usize>,
277
+ maintain_order: bool,
278
+ ) -> RbResult<()> {
279
+ let compression = parse_parquet_compression(&compression, compression_level)?;
280
+
281
+ let options = ParquetWriteOptions {
282
+ compression,
283
+ statistics,
284
+ row_group_size,
285
+ data_pagesize_limit,
286
+ maintain_order,
287
+ };
288
+
289
+ let ldf = self.ldf.clone();
290
+ ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
291
+ Ok(())
292
+ }
293
+
287
294
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
288
295
  let ldf = self.ldf.clone();
289
296
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -316,7 +323,7 @@ impl RbLazyFrame {
316
323
 
317
324
  pub fn groupby_rolling(
318
325
  &self,
319
- index_column: String,
326
+ index_column: &RbExpr,
320
327
  period: String,
321
328
  offset: String,
322
329
  closed: Wrap<ClosedWindow>,
@@ -326,9 +333,10 @@ impl RbLazyFrame {
326
333
  let ldf = self.ldf.clone();
327
334
  let by = rb_exprs_to_exprs(by)?;
328
335
  let lazy_gb = ldf.groupby_rolling(
336
+ index_column.inner.clone(),
329
337
  by,
330
338
  RollingGroupOptions {
331
- index_column,
339
+ index_column: "".into(),
332
340
  period: Duration::parse(&period),
333
341
  offset: Duration::parse(&offset),
334
342
  closed_window,
@@ -343,7 +351,7 @@ impl RbLazyFrame {
343
351
  #[allow(clippy::too_many_arguments)]
344
352
  pub fn groupby_dynamic(
345
353
  &self,
346
- index_column: String,
354
+ index_column: &RbExpr,
347
355
  every: String,
348
356
  period: String,
349
357
  offset: String,
@@ -357,9 +365,9 @@ impl RbLazyFrame {
357
365
  let by = rb_exprs_to_exprs(by)?;
358
366
  let ldf = self.ldf.clone();
359
367
  let lazy_gb = ldf.groupby_dynamic(
368
+ index_column.inner.clone(),
360
369
  by,
361
370
  DynamicGroupOptions {
362
- index_column,
363
371
  every: Duration::parse(&every),
364
372
  period: Duration::parse(&period),
365
373
  offset: Duration::parse(&offset),
@@ -367,6 +375,7 @@ impl RbLazyFrame {
367
375
  include_boundaries,
368
376
  closed_window,
369
377
  start_by: start_by.0,
378
+ ..Default::default()
370
379
  },
371
380
  );
372
381
 
@@ -415,10 +424,10 @@ impl RbLazyFrame {
415
424
  .force_parallel(force_parallel)
416
425
  .how(JoinType::AsOf(AsOfOptions {
417
426
  strategy: strategy.0,
418
- left_by,
419
- right_by,
427
+ left_by: left_by.map(strings_to_smartstrings),
428
+ right_by: right_by.map(strings_to_smartstrings),
420
429
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
421
- tolerance_str,
430
+ tolerance_str: tolerance_str.map(|s| s.into()),
422
431
  }))
423
432
  .suffix(suffix)
424
433
  .finish()
@@ -570,12 +579,14 @@ impl RbLazyFrame {
570
579
  value_vars: Vec<String>,
571
580
  value_name: Option<String>,
572
581
  variable_name: Option<String>,
582
+ streamable: bool,
573
583
  ) -> Self {
574
584
  let args = MeltArgs {
575
- id_vars,
576
- value_vars,
577
- value_name,
578
- variable_name,
585
+ id_vars: strings_to_smartstrings(id_vars),
586
+ value_vars: strings_to_smartstrings(value_vars),
587
+ value_name: value_name.map(|s| s.into()),
588
+ variable_name: variable_name.map(|s| s.into()),
589
+ streamable,
579
590
  };
580
591
 
581
592
  let ldf = self.ldf.clone();
@@ -596,8 +607,10 @@ impl RbLazyFrame {
596
607
  self.ldf.clone().into()
597
608
  }
598
609
 
599
- pub fn columns(&self) -> RbResult<Vec<String>> {
600
- Ok(self.get_schema()?.iter_names().cloned().collect())
610
+ pub fn columns(&self) -> RbResult<RArray> {
611
+ let schema = self.get_schema()?;
612
+ let iter = schema.iter_names().map(|s| s.as_str());
613
+ Ok(RArray::from_iter(iter))
601
614
  }
602
615
 
603
616
  pub fn dtypes(&self) -> RbResult<RArray> {
@@ -614,7 +627,7 @@ impl RbLazyFrame {
614
627
  // TODO remove unwrap
615
628
  schema_dict
616
629
  .aset::<String, Value>(
617
- fld.name().clone(),
630
+ fld.name().to_string(),
618
631
  Wrap(fld.data_type().clone()).into_value(),
619
632
  )
620
633
  .unwrap();
@@ -0,0 +1,29 @@
1
+ use magnus::RArray;
2
+ use polars::lazy::frame::LazyGroupBy;
3
+ use std::cell::RefCell;
4
+
5
+ use crate::expr::rb_exprs_to_exprs;
6
+ use crate::{RbLazyFrame, RbResult};
7
+
8
+ #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
9
+ pub struct RbLazyGroupBy {
10
+ pub lgb: RefCell<Option<LazyGroupBy>>,
11
+ }
12
+
13
+ impl RbLazyGroupBy {
14
+ pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
15
+ let lgb = self.lgb.borrow_mut().take().unwrap();
16
+ let aggs = rb_exprs_to_exprs(aggs)?;
17
+ Ok(lgb.agg(aggs).into())
18
+ }
19
+
20
+ pub fn head(&self, n: usize) -> RbLazyFrame {
21
+ let lgb = self.lgb.take().unwrap();
22
+ lgb.head(Some(n)).into()
23
+ }
24
+
25
+ pub fn tail(&self, n: usize) -> RbLazyFrame {
26
+ let lgb = self.lgb.take().unwrap();
27
+ lgb.tail(Some(n)).into()
28
+ }
29
+ }