polars-df 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,209 @@
1
+ use magnus::{class, RArray, RString, Value};
2
+ use polars::lazy::dsl;
3
+ use polars::prelude::*;
4
+
5
+ use crate::apply::lazy::binary_lambda;
6
+ use crate::conversion::{get_lf, get_rbseq, Wrap};
7
+ use crate::prelude::vec_extract_wrapped;
8
+ use crate::rb_exprs_to_exprs;
9
+ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
10
+
11
+ macro_rules! set_unwrapped_or_0 {
12
+ ($($var:ident),+ $(,)?) => {
13
+ $(let $var = $var.map(|e| e.inner.clone()).unwrap_or(polars::lazy::dsl::lit(0));)+
14
+ };
15
+ }
16
+
17
+ pub fn arange(low: &RbExpr, high: &RbExpr, step: i64) -> RbExpr {
18
+ dsl::arange(low.inner.clone(), high.inner.clone(), step).into()
19
+ }
20
+
21
+ pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
22
+ let by = rb_exprs_to_exprs(by)?;
23
+ Ok(dsl::arg_sort_by(by, &descending).into())
24
+ }
25
+
26
+ pub fn arg_where(condition: &RbExpr) -> RbExpr {
27
+ dsl::arg_where(condition.inner.clone()).into()
28
+ }
29
+
30
+ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
31
+ let exprs = rb_exprs_to_exprs(exprs)?;
32
+ Ok(dsl::as_struct(&exprs).into())
33
+ }
34
+
35
+ pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
36
+ let exprs = rb_exprs_to_exprs(exprs)?;
37
+ Ok(dsl::coalesce(&exprs).into())
38
+ }
39
+
40
+ pub fn col(name: String) -> RbExpr {
41
+ dsl::col(&name).into()
42
+ }
43
+
44
+ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
45
+ let lfs = lfs
46
+ .each()
47
+ .map(|v| v?.try_convert::<&RbLazyFrame>())
48
+ .collect::<RbResult<Vec<&RbLazyFrame>>>()?;
49
+
50
+ Ok(RArray::from_iter(lfs.iter().map(|lf| {
51
+ let df = lf.ldf.clone().collect().unwrap();
52
+ RbDataFrame::new(df)
53
+ })))
54
+ }
55
+
56
+ pub fn cols(names: Vec<String>) -> RbExpr {
57
+ dsl::cols(names).into()
58
+ }
59
+
60
+ pub fn concat_lf(lfs: Value, rechunk: bool, parallel: bool) -> RbResult<RbLazyFrame> {
61
+ let (seq, len) = get_rbseq(lfs)?;
62
+ let mut lfs = Vec::with_capacity(len);
63
+
64
+ for res in seq.each() {
65
+ let item = res?;
66
+ let lf = get_lf(item)?;
67
+ lfs.push(lf);
68
+ }
69
+
70
+ let lf = polars::lazy::dsl::concat(lfs, rechunk, parallel).map_err(RbPolarsErr::from)?;
71
+ Ok(lf.into())
72
+ }
73
+
74
+ #[allow(clippy::too_many_arguments)]
75
+ pub fn duration(
76
+ days: Option<&RbExpr>,
77
+ seconds: Option<&RbExpr>,
78
+ nanoseconds: Option<&RbExpr>,
79
+ microseconds: Option<&RbExpr>,
80
+ milliseconds: Option<&RbExpr>,
81
+ minutes: Option<&RbExpr>,
82
+ hours: Option<&RbExpr>,
83
+ weeks: Option<&RbExpr>,
84
+ ) -> RbExpr {
85
+ set_unwrapped_or_0!(
86
+ days,
87
+ seconds,
88
+ nanoseconds,
89
+ microseconds,
90
+ milliseconds,
91
+ minutes,
92
+ hours,
93
+ weeks,
94
+ );
95
+ let args = DurationArgs {
96
+ days,
97
+ seconds,
98
+ nanoseconds,
99
+ microseconds,
100
+ milliseconds,
101
+ minutes,
102
+ hours,
103
+ weeks,
104
+ };
105
+ dsl::duration(args).into()
106
+ }
107
+
108
+ pub fn count() -> RbExpr {
109
+ dsl::count().into()
110
+ }
111
+
112
+ pub fn first() -> RbExpr {
113
+ dsl::first().into()
114
+ }
115
+
116
+ pub fn last() -> RbExpr {
117
+ dsl::last().into()
118
+ }
119
+
120
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
121
+ dsl::dtype_cols(dtypes).into()
122
+ }
123
+
124
+ pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
125
+ let exprs = rb_exprs_to_exprs(exprs)?;
126
+
127
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
128
+ Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
129
+ }
130
+
131
+ pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
132
+ let exprs = rb_exprs_to_exprs(exprs)?;
133
+
134
+ let func = move |a: Series, b: Series| binary_lambda(lambda, a, b);
135
+ Ok(polars::lazy::dsl::cumfold_exprs(acc.inner.clone(), func, exprs, include_init).into())
136
+ }
137
+
138
+ // TODO improve
139
+ pub fn lit(value: Value) -> RbResult<RbExpr> {
140
+ if value.is_nil() {
141
+ Ok(dsl::lit(Null {}).into())
142
+ } else if let Ok(series) = value.try_convert::<&RbSeries>() {
143
+ Ok(dsl::lit(series.series.borrow().clone()).into())
144
+ } else if let Some(v) = RString::from_value(value) {
145
+ Ok(dsl::lit(v.try_convert::<String>()?).into())
146
+ } else if value.is_kind_of(class::integer()) {
147
+ match value.try_convert::<i64>() {
148
+ Ok(val) => {
149
+ if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
150
+ Ok(dsl::lit(val as i32).into())
151
+ } else {
152
+ Ok(dsl::lit(val).into())
153
+ }
154
+ }
155
+ _ => {
156
+ let val = value.try_convert::<u64>()?;
157
+ Ok(dsl::lit(val).into())
158
+ }
159
+ }
160
+ } else {
161
+ Ok(dsl::lit(value.try_convert::<f64>()?).into())
162
+ }
163
+ }
164
+
165
+ pub fn repeat(value: Value, n_times: &RbExpr) -> RbResult<RbExpr> {
166
+ if value.is_nil() {
167
+ Ok(polars::lazy::dsl::repeat(Null {}, n_times.inner.clone()).into())
168
+ } else {
169
+ todo!();
170
+ }
171
+ }
172
+
173
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
174
+ polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
175
+ }
176
+
177
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
178
+ polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
179
+ .into()
180
+ }
181
+
182
+ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
183
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
184
+ }
185
+
186
+ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
187
+ let s = rb_exprs_to_exprs(s)?;
188
+ Ok(dsl::concat_str(s, &sep).into())
189
+ }
190
+
191
+ pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
192
+ let s = rb_exprs_to_exprs(s)?;
193
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
194
+ Ok(expr.into())
195
+ }
196
+
197
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
198
+ let dtypes = dtypes
199
+ .each()
200
+ .map(|v| v?.try_convert::<Wrap<DataType>>())
201
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
202
+ let dtypes = vec_extract_wrapped(dtypes);
203
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
204
+ }
205
+
206
+ pub fn sum_exprs(exprs: RArray) -> RbResult<RbExpr> {
207
+ let exprs = rb_exprs_to_exprs(exprs)?;
208
+ Ok(polars::lazy::dsl::sum_exprs(exprs).into())
209
+ }
@@ -0,0 +1,8 @@
1
+ use magnus::{IntoValue, Value};
2
+ use polars_core::prelude::IDX_DTYPE;
3
+
4
+ use crate::conversion::Wrap;
5
+
6
+ pub fn get_idx_type() -> Value {
7
+ Wrap(IDX_DTYPE).into_value()
8
+ }
@@ -0,0 +1,5 @@
1
+ pub mod eager;
2
+ pub mod io;
3
+ pub mod lazy;
4
+ pub mod meta;
5
+ pub mod whenthen;
@@ -0,0 +1,43 @@
1
+ use polars::lazy::dsl;
2
+
3
+ use crate::RbExpr;
4
+
5
+ #[magnus::wrap(class = "Polars::RbWhen")]
6
+ #[derive(Clone)]
7
+ pub struct RbWhen {
8
+ pub inner: dsl::When,
9
+ }
10
+
11
+ impl From<dsl::When> for RbWhen {
12
+ fn from(inner: dsl::When) -> Self {
13
+ RbWhen { inner }
14
+ }
15
+ }
16
+
17
+ #[magnus::wrap(class = "Polars::RbWhenThen")]
18
+ #[derive(Clone)]
19
+ pub struct RbWhenThen {
20
+ pub inner: dsl::WhenThen,
21
+ }
22
+
23
+ impl From<dsl::WhenThen> for RbWhenThen {
24
+ fn from(inner: dsl::WhenThen) -> Self {
25
+ RbWhenThen { inner }
26
+ }
27
+ }
28
+
29
+ impl RbWhen {
30
+ pub fn then(&self, expr: &RbExpr) -> RbWhenThen {
31
+ self.inner.clone().then(expr.inner.clone()).into()
32
+ }
33
+ }
34
+
35
+ impl RbWhenThen {
36
+ pub fn overwise(&self, expr: &RbExpr) -> RbExpr {
37
+ self.inner.clone().otherwise(expr.inner.clone()).into()
38
+ }
39
+ }
40
+
41
+ pub fn when(predicate: &RbExpr) -> RbWhen {
42
+ dsl::when(predicate.inner.clone()).into()
43
+ }
@@ -1,37 +1,15 @@
1
1
  use magnus::{IntoValue, RArray, RHash, Value};
2
2
  use polars::io::RowCount;
3
- use polars::lazy::frame::{LazyFrame, LazyGroupBy};
3
+ use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::path::PathBuf;
7
8
 
8
9
  use crate::conversion::*;
10
+ use crate::expr::rb_exprs_to_exprs;
9
11
  use crate::file::get_file_like;
10
- use crate::lazy::utils::rb_exprs_to_exprs;
11
- use crate::{RbDataFrame, RbExpr, RbPolarsErr, RbResult, RbValueError};
12
-
13
- #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
14
- pub struct RbLazyGroupBy {
15
- lgb: RefCell<Option<LazyGroupBy>>,
16
- }
17
-
18
- impl RbLazyGroupBy {
19
- pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
20
- let lgb = self.lgb.borrow_mut().take().unwrap();
21
- let aggs = rb_exprs_to_exprs(aggs)?;
22
- Ok(lgb.agg(aggs).into())
23
- }
24
-
25
- pub fn head(&self, n: usize) -> RbLazyFrame {
26
- let lgb = self.lgb.take().unwrap();
27
- lgb.head(Some(n)).into()
28
- }
29
-
30
- pub fn tail(&self, n: usize) -> RbLazyFrame {
31
- let lgb = self.lgb.take().unwrap();
32
- lgb.tail(Some(n)).into()
33
- }
34
- }
12
+ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
35
13
 
36
14
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
37
15
  #[derive(Clone)]
@@ -118,7 +96,7 @@ impl RbLazyFrame {
118
96
  let skip_rows_after_header: usize = arguments[15].try_convert()?;
119
97
  let encoding: Wrap<CsvEncoding> = arguments[16].try_convert()?;
120
98
  let row_count: Option<(String, IdxSize)> = arguments[17].try_convert()?;
121
- let parse_dates: bool = arguments[18].try_convert()?;
99
+ let try_parse_dates: bool = arguments[18].try_convert()?;
122
100
  let eol_char: String = arguments[19].try_convert()?;
123
101
  // end arguments
124
102
 
@@ -131,10 +109,10 @@ impl RbLazyFrame {
131
109
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
132
110
 
133
111
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
134
- let fields = overwrite_dtype
112
+ overwrite_dtype
135
113
  .into_iter()
136
- .map(|(name, dtype)| Field::new(&name, dtype.0));
137
- Schema::from(fields)
114
+ .map(|(name, dtype)| Field::new(&name, dtype.0))
115
+ .collect::<Schema>()
138
116
  });
139
117
  let r = LazyCsvReader::new(path)
140
118
  .with_infer_schema_length(infer_schema_length)
@@ -153,7 +131,7 @@ impl RbLazyFrame {
153
131
  .with_skip_rows_after_header(skip_rows_after_header)
154
132
  .with_encoding(encoding.0)
155
133
  .with_row_count(row_count)
156
- .with_parse_dates(parse_dates)
134
+ .with_try_parse_dates(try_parse_dates)
157
135
  .with_null_values(null_values);
158
136
 
159
137
  if let Some(_lambda) = with_schema_modify {
@@ -163,6 +141,7 @@ impl RbLazyFrame {
163
141
  Ok(r.finish().map_err(RbPolarsErr::from)?.into())
164
142
  }
165
143
 
144
+ #[allow(clippy::too_many_arguments)]
166
145
  pub fn new_from_parquet(
167
146
  path: String,
168
147
  n_rows: Option<usize>,
@@ -171,6 +150,7 @@ impl RbLazyFrame {
171
150
  rechunk: bool,
172
151
  row_count: Option<(String, IdxSize)>,
173
152
  low_memory: bool,
153
+ use_statistics: bool,
174
154
  ) -> RbResult<Self> {
175
155
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
176
156
  let args = ScanArgsParquet {
@@ -182,6 +162,7 @@ impl RbLazyFrame {
182
162
  low_memory,
183
163
  // TODO support cloud options
184
164
  cloud_options: None,
165
+ use_statistics,
185
166
  };
186
167
  let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
187
168
  Ok(lf.into())
@@ -284,6 +265,32 @@ impl RbLazyFrame {
284
265
  Ok(df.into())
285
266
  }
286
267
 
268
+ #[allow(clippy::too_many_arguments)]
269
+ pub fn sink_parquet(
270
+ &self,
271
+ path: PathBuf,
272
+ compression: String,
273
+ compression_level: Option<i32>,
274
+ statistics: bool,
275
+ row_group_size: Option<usize>,
276
+ data_pagesize_limit: Option<usize>,
277
+ maintain_order: bool,
278
+ ) -> RbResult<()> {
279
+ let compression = parse_parquet_compression(&compression, compression_level)?;
280
+
281
+ let options = ParquetWriteOptions {
282
+ compression,
283
+ statistics,
284
+ row_group_size,
285
+ data_pagesize_limit,
286
+ maintain_order,
287
+ };
288
+
289
+ let ldf = self.ldf.clone();
290
+ ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
291
+ Ok(())
292
+ }
293
+
287
294
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
288
295
  let ldf = self.ldf.clone();
289
296
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -316,7 +323,7 @@ impl RbLazyFrame {
316
323
 
317
324
  pub fn groupby_rolling(
318
325
  &self,
319
- index_column: String,
326
+ index_column: &RbExpr,
320
327
  period: String,
321
328
  offset: String,
322
329
  closed: Wrap<ClosedWindow>,
@@ -326,9 +333,10 @@ impl RbLazyFrame {
326
333
  let ldf = self.ldf.clone();
327
334
  let by = rb_exprs_to_exprs(by)?;
328
335
  let lazy_gb = ldf.groupby_rolling(
336
+ index_column.inner.clone(),
329
337
  by,
330
338
  RollingGroupOptions {
331
- index_column,
339
+ index_column: "".into(),
332
340
  period: Duration::parse(&period),
333
341
  offset: Duration::parse(&offset),
334
342
  closed_window,
@@ -343,7 +351,7 @@ impl RbLazyFrame {
343
351
  #[allow(clippy::too_many_arguments)]
344
352
  pub fn groupby_dynamic(
345
353
  &self,
346
- index_column: String,
354
+ index_column: &RbExpr,
347
355
  every: String,
348
356
  period: String,
349
357
  offset: String,
@@ -357,9 +365,9 @@ impl RbLazyFrame {
357
365
  let by = rb_exprs_to_exprs(by)?;
358
366
  let ldf = self.ldf.clone();
359
367
  let lazy_gb = ldf.groupby_dynamic(
368
+ index_column.inner.clone(),
360
369
  by,
361
370
  DynamicGroupOptions {
362
- index_column,
363
371
  every: Duration::parse(&every),
364
372
  period: Duration::parse(&period),
365
373
  offset: Duration::parse(&offset),
@@ -367,6 +375,7 @@ impl RbLazyFrame {
367
375
  include_boundaries,
368
376
  closed_window,
369
377
  start_by: start_by.0,
378
+ ..Default::default()
370
379
  },
371
380
  );
372
381
 
@@ -415,10 +424,10 @@ impl RbLazyFrame {
415
424
  .force_parallel(force_parallel)
416
425
  .how(JoinType::AsOf(AsOfOptions {
417
426
  strategy: strategy.0,
418
- left_by,
419
- right_by,
427
+ left_by: left_by.map(strings_to_smartstrings),
428
+ right_by: right_by.map(strings_to_smartstrings),
420
429
  tolerance: tolerance.map(|t| t.0.into_static().unwrap()),
421
- tolerance_str,
430
+ tolerance_str: tolerance_str.map(|s| s.into()),
422
431
  }))
423
432
  .suffix(suffix)
424
433
  .finish()
@@ -570,12 +579,14 @@ impl RbLazyFrame {
570
579
  value_vars: Vec<String>,
571
580
  value_name: Option<String>,
572
581
  variable_name: Option<String>,
582
+ streamable: bool,
573
583
  ) -> Self {
574
584
  let args = MeltArgs {
575
- id_vars,
576
- value_vars,
577
- value_name,
578
- variable_name,
585
+ id_vars: strings_to_smartstrings(id_vars),
586
+ value_vars: strings_to_smartstrings(value_vars),
587
+ value_name: value_name.map(|s| s.into()),
588
+ variable_name: variable_name.map(|s| s.into()),
589
+ streamable,
579
590
  };
580
591
 
581
592
  let ldf = self.ldf.clone();
@@ -596,8 +607,10 @@ impl RbLazyFrame {
596
607
  self.ldf.clone().into()
597
608
  }
598
609
 
599
- pub fn columns(&self) -> RbResult<Vec<String>> {
600
- Ok(self.get_schema()?.iter_names().cloned().collect())
610
+ pub fn columns(&self) -> RbResult<RArray> {
611
+ let schema = self.get_schema()?;
612
+ let iter = schema.iter_names().map(|s| s.as_str());
613
+ Ok(RArray::from_iter(iter))
601
614
  }
602
615
 
603
616
  pub fn dtypes(&self) -> RbResult<RArray> {
@@ -614,7 +627,7 @@ impl RbLazyFrame {
614
627
  // TODO remove unwrap
615
628
  schema_dict
616
629
  .aset::<String, Value>(
617
- fld.name().clone(),
630
+ fld.name().to_string(),
618
631
  Wrap(fld.data_type().clone()).into_value(),
619
632
  )
620
633
  .unwrap();
@@ -0,0 +1,29 @@
1
+ use magnus::RArray;
2
+ use polars::lazy::frame::LazyGroupBy;
3
+ use std::cell::RefCell;
4
+
5
+ use crate::expr::rb_exprs_to_exprs;
6
+ use crate::{RbLazyFrame, RbResult};
7
+
8
+ #[magnus::wrap(class = "Polars::RbLazyGroupBy")]
9
+ pub struct RbLazyGroupBy {
10
+ pub lgb: RefCell<Option<LazyGroupBy>>,
11
+ }
12
+
13
+ impl RbLazyGroupBy {
14
+ pub fn agg(&self, aggs: RArray) -> RbResult<RbLazyFrame> {
15
+ let lgb = self.lgb.borrow_mut().take().unwrap();
16
+ let aggs = rb_exprs_to_exprs(aggs)?;
17
+ Ok(lgb.agg(aggs).into())
18
+ }
19
+
20
+ pub fn head(&self, n: usize) -> RbLazyFrame {
21
+ let lgb = self.lgb.take().unwrap();
22
+ lgb.head(Some(n)).into()
23
+ }
24
+
25
+ pub fn tail(&self, n: usize) -> RbLazyFrame {
26
+ let lgb = self.lgb.take().unwrap();
27
+ lgb.tail(Some(n)).into()
28
+ }
29
+ }