polars-df 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +15 -7
  8. data/ext/polars/src/batched_csv.rs +4 -4
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
  12. data/ext/polars/src/dataframe.rs +69 -53
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/datetime.rs +22 -56
  15. data/ext/polars/src/expr/general.rs +61 -33
  16. data/ext/polars/src/expr/list.rs +52 -4
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +59 -8
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/functions/aggregation.rs +6 -0
  22. data/ext/polars/src/functions/lazy.rs +103 -48
  23. data/ext/polars/src/functions/meta.rs +45 -1
  24. data/ext/polars/src/functions/string_cache.rs +14 -0
  25. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
  26. data/ext/polars/src/lib.rs +226 -168
  27. data/ext/polars/src/series/aggregation.rs +20 -0
  28. data/ext/polars/src/series/mod.rs +25 -4
  29. data/lib/polars/array_expr.rb +449 -0
  30. data/lib/polars/array_name_space.rb +346 -0
  31. data/lib/polars/cat_expr.rb +24 -0
  32. data/lib/polars/cat_name_space.rb +75 -0
  33. data/lib/polars/config.rb +2 -2
  34. data/lib/polars/data_frame.rb +179 -43
  35. data/lib/polars/data_types.rb +191 -28
  36. data/lib/polars/date_time_expr.rb +31 -14
  37. data/lib/polars/exceptions.rb +12 -1
  38. data/lib/polars/expr.rb +866 -186
  39. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  40. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  41. data/lib/polars/functions/as_datatype.rb +248 -0
  42. data/lib/polars/functions/col.rb +47 -0
  43. data/lib/polars/functions/eager.rb +182 -0
  44. data/lib/polars/functions/lazy.rb +1280 -0
  45. data/lib/polars/functions/len.rb +49 -0
  46. data/lib/polars/functions/lit.rb +35 -0
  47. data/lib/polars/functions/random.rb +16 -0
  48. data/lib/polars/functions/range/date_range.rb +103 -0
  49. data/lib/polars/functions/range/int_range.rb +51 -0
  50. data/lib/polars/functions/repeat.rb +144 -0
  51. data/lib/polars/functions/whenthen.rb +27 -0
  52. data/lib/polars/functions.rb +29 -416
  53. data/lib/polars/group_by.rb +2 -2
  54. data/lib/polars/io.rb +18 -25
  55. data/lib/polars/lazy_frame.rb +367 -53
  56. data/lib/polars/list_expr.rb +152 -6
  57. data/lib/polars/list_name_space.rb +102 -0
  58. data/lib/polars/meta_expr.rb +175 -7
  59. data/lib/polars/series.rb +273 -34
  60. data/lib/polars/string_cache.rb +75 -0
  61. data/lib/polars/string_expr.rb +412 -96
  62. data/lib/polars/string_name_space.rb +4 -4
  63. data/lib/polars/testing.rb +507 -0
  64. data/lib/polars/utils.rb +52 -8
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars.rb +15 -2
  67. metadata +35 -5
  68. data/lib/polars/lazy_functions.rb +0 -1181
@@ -17,6 +17,44 @@ macro_rules! set_unwrapped_or_0 {
17
17
  };
18
18
  }
19
19
 
20
+ pub fn rolling_corr(
21
+ x: &RbExpr,
22
+ y: &RbExpr,
23
+ window_size: IdxSize,
24
+ min_periods: IdxSize,
25
+ ddof: u8,
26
+ ) -> RbExpr {
27
+ dsl::rolling_corr(
28
+ x.inner.clone(),
29
+ y.inner.clone(),
30
+ RollingCovOptions {
31
+ min_periods,
32
+ window_size,
33
+ ddof,
34
+ },
35
+ )
36
+ .into()
37
+ }
38
+
39
+ pub fn rolling_cov(
40
+ x: &RbExpr,
41
+ y: &RbExpr,
42
+ window_size: IdxSize,
43
+ min_periods: IdxSize,
44
+ ddof: u8,
45
+ ) -> RbExpr {
46
+ dsl::rolling_cov(
47
+ x.inner.clone(),
48
+ y.inner.clone(),
49
+ RollingCovOptions {
50
+ min_periods,
51
+ window_size,
52
+ ddof,
53
+ },
54
+ )
55
+ .into()
56
+ }
57
+
20
58
  pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
21
59
  let by = rb_exprs_to_exprs(by)?;
22
60
  Ok(dsl::arg_sort_by(by, &descending).into())
@@ -83,6 +121,47 @@ pub fn concat_lf(
83
121
  Ok(lf.into())
84
122
  }
85
123
 
124
+ pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
125
+ let s = rb_exprs_to_exprs(s)?;
126
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
127
+ Ok(expr.into())
128
+ }
129
+
130
+ pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
131
+ let s = rb_exprs_to_exprs(s)?;
132
+ Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
133
+ }
134
+
135
+ pub fn len() -> RbExpr {
136
+ dsl::len().into()
137
+ }
138
+
139
+ pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
140
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
141
+ }
142
+
143
+ pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
144
+ y.inner.clone().arctan2(x.inner.clone()).into()
145
+ }
146
+
147
+ pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
148
+ y.inner.clone().arctan2(x.inner.clone()).degrees().into()
149
+ }
150
+
151
+ pub fn cum_fold(
152
+ acc: &RbExpr,
153
+ lambda: Value,
154
+ exprs: RArray,
155
+ include_init: bool,
156
+ ) -> RbResult<RbExpr> {
157
+ let exprs = rb_exprs_to_exprs(exprs)?;
158
+ let lambda = Opaque::from(lambda);
159
+
160
+ let func =
161
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
162
+ Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
163
+ }
164
+
86
165
  pub fn concat_lf_diagonal(
87
166
  lfs: RArray,
88
167
  rechunk: bool,
@@ -110,6 +189,19 @@ pub fn concat_lf_diagonal(
110
189
  Ok(lf.into())
111
190
  }
112
191
 
192
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
193
+ dsl::dtype_cols(dtypes).into()
194
+ }
195
+
196
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
197
+ let dtypes = dtypes
198
+ .each()
199
+ .map(|v| Wrap::<DataType>::try_convert(v?))
200
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
201
+ let dtypes = vec_extract_wrapped(dtypes);
202
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
203
+ }
204
+
113
205
  #[allow(clippy::too_many_arguments)]
114
206
  pub fn duration(
115
207
  weeks: Option<&RbExpr>,
@@ -146,38 +238,21 @@ pub fn duration(
146
238
  dsl::duration(args).into()
147
239
  }
148
240
 
149
- pub fn count() -> RbExpr {
150
- dsl::count().into()
151
- }
152
-
153
241
  pub fn first() -> RbExpr {
154
242
  dsl::first().into()
155
243
  }
156
244
 
157
- pub fn last() -> RbExpr {
158
- dsl::last().into()
159
- }
160
-
161
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
162
- dsl::dtype_cols(dtypes).into()
163
- }
164
-
165
245
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
166
246
  let exprs = rb_exprs_to_exprs(exprs)?;
167
247
  let lambda = Opaque::from(lambda);
168
248
 
169
249
  let func =
170
250
  move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
171
- Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
251
+ Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
172
252
  }
173
253
 
174
- pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
175
- let exprs = rb_exprs_to_exprs(exprs)?;
176
- let lambda = Opaque::from(lambda);
177
-
178
- let func =
179
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
180
- Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
254
+ pub fn last() -> RbExpr {
255
+ dsl::last().into()
181
256
  }
182
257
 
183
258
  pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
@@ -219,6 +294,10 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
219
294
  }
220
295
  }
221
296
 
297
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
298
+ dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
299
+ }
300
+
222
301
  pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
223
302
  let mut value = value.inner.clone();
224
303
  let n = n.inner.clone();
@@ -228,7 +307,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
228
307
  }
229
308
 
230
309
  if let Expr::Literal(lv) = &value {
231
- let av = lv.to_anyvalue().unwrap();
310
+ let av = lv.to_any_value().unwrap();
232
311
  // Integer inputs that fit in Int32 are parsed as such
233
312
  if let DataType::Int64 = av.dtype() {
234
313
  let int_value = av.try_extract::<i64>().unwrap();
@@ -240,35 +319,11 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
240
319
  Ok(dsl::repeat(value, n).into())
241
320
  }
242
321
 
243
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
244
- polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
245
- }
246
-
247
322
  pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
248
- polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
249
- .into()
250
- }
251
-
252
- pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
253
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
254
- }
255
-
256
- pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
257
- let s = rb_exprs_to_exprs(s)?;
258
- Ok(dsl::concat_str(s, &sep).into())
323
+ dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
259
324
  }
260
325
 
261
- pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
262
- let s = rb_exprs_to_exprs(s)?;
263
- let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
326
+ pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
327
+ let expr = polars::sql::sql_expr(&sql).map_err(RbPolarsErr::from)?;
264
328
  Ok(expr.into())
265
329
  }
266
-
267
- pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
268
- let dtypes = dtypes
269
- .each()
270
- .map(|v| Wrap::<DataType>::try_convert(v?))
271
- .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
272
- let dtypes = vec_extract_wrapped(dtypes);
273
- Ok(crate::functions::lazy::dtype_cols(dtypes))
274
- }
@@ -7,7 +7,7 @@ use polars_core::POOL;
7
7
  use crate::conversion::Wrap;
8
8
  use crate::{RbResult, RbValueError};
9
9
 
10
- pub fn get_idx_type() -> Value {
10
+ pub fn get_index_type() -> Value {
11
11
  Wrap(IDX_DTYPE).into_value()
12
12
  }
13
13
 
@@ -36,3 +36,47 @@ pub fn get_float_fmt() -> RbResult<String> {
36
36
  };
37
37
  Ok(strfmt.to_string())
38
38
  }
39
+
40
+ pub fn set_float_precision(precision: Option<usize>) -> RbResult<()> {
41
+ use polars_core::fmt::set_float_precision;
42
+ set_float_precision(precision);
43
+ Ok(())
44
+ }
45
+
46
+ pub fn get_float_precision() -> RbResult<Option<usize>> {
47
+ use polars_core::fmt::get_float_precision;
48
+ Ok(get_float_precision())
49
+ }
50
+
51
+ pub fn set_thousands_separator(sep: Option<char>) -> RbResult<()> {
52
+ use polars_core::fmt::set_thousands_separator;
53
+ set_thousands_separator(sep);
54
+ Ok(())
55
+ }
56
+
57
+ pub fn get_thousands_separator() -> RbResult<Option<String>> {
58
+ use polars_core::fmt::get_thousands_separator;
59
+ Ok(Some(get_thousands_separator()))
60
+ }
61
+
62
+ pub fn set_decimal_separator(sep: Option<char>) -> RbResult<()> {
63
+ use polars_core::fmt::set_decimal_separator;
64
+ set_decimal_separator(sep);
65
+ Ok(())
66
+ }
67
+
68
+ pub fn get_decimal_separator() -> RbResult<Option<char>> {
69
+ use polars_core::fmt::get_decimal_separator;
70
+ Ok(Some(get_decimal_separator()))
71
+ }
72
+
73
+ pub fn set_trim_decimal_zeros(trim: Option<bool>) -> RbResult<()> {
74
+ use polars_core::fmt::set_trim_decimal_zeros;
75
+ set_trim_decimal_zeros(trim);
76
+ Ok(())
77
+ }
78
+
79
+ pub fn get_trim_decimal_zeros() -> RbResult<Option<bool>> {
80
+ use polars_core::fmt::get_trim_decimal_zeros;
81
+ Ok(Some(get_trim_decimal_zeros()))
82
+ }
@@ -1,3 +1,7 @@
1
+ use crate::RbResult;
2
+ use magnus::{RArray, Ruby, Value};
3
+ use polars_core::StringCacheHolder;
4
+
1
5
  pub fn enable_string_cache() {
2
6
  polars_core::enable_string_cache()
3
7
  }
@@ -9,3 +13,13 @@ pub fn disable_string_cache() {
9
13
  pub fn using_string_cache() -> bool {
10
14
  polars_core::using_string_cache()
11
15
  }
16
+
17
+ #[magnus::wrap(class = "Polars::RbStringCacheHolder")]
18
+ pub struct RbStringCacheHolder {}
19
+
20
+ impl RbStringCacheHolder {
21
+ pub fn hold() -> RbResult<Value> {
22
+ let _hold = StringCacheHolder::hold();
23
+ Ruby::get().unwrap().yield_splat(RArray::new())
24
+ }
25
+ }
@@ -1,9 +1,10 @@
1
1
  use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
- use polars::io::RowCount;
2
+ use polars::io::RowIndex;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::num::NonZeroUsize;
7
8
  use std::path::PathBuf;
8
9
 
9
10
  use crate::conversion::*;
@@ -55,13 +56,14 @@ impl RbLazyFrame {
55
56
  pub fn new_from_ndjson(
56
57
  path: String,
57
58
  infer_schema_length: Option<usize>,
58
- batch_size: Option<usize>,
59
+ batch_size: Option<Wrap<NonZeroUsize>>,
59
60
  n_rows: Option<usize>,
60
61
  low_memory: bool,
61
62
  rechunk: bool,
62
- row_count: Option<(String, IdxSize)>,
63
+ row_index: Option<(String, IdxSize)>,
63
64
  ) -> RbResult<Self> {
64
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
65
+ let batch_size = batch_size.map(|v| v.0);
66
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
65
67
 
66
68
  let lf = LazyJsonLineReader::new(path)
67
69
  .with_infer_schema_length(infer_schema_length)
@@ -69,7 +71,7 @@ impl RbLazyFrame {
69
71
  .with_n_rows(n_rows)
70
72
  .low_memory(low_memory)
71
73
  .with_rechunk(rechunk)
72
- .with_row_count(row_count)
74
+ .with_row_index(row_index)
73
75
  .finish()
74
76
  .map_err(RbPolarsErr::from)?;
75
77
  Ok(lf.into())
@@ -95,7 +97,7 @@ impl RbLazyFrame {
95
97
  let rechunk = bool::try_convert(arguments[14])?;
96
98
  let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
99
  let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
- let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
100
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
101
  let try_parse_dates = bool::try_convert(arguments[18])?;
100
102
  let eol_char = String::try_convert(arguments[19])?;
101
103
  // end arguments
@@ -105,7 +107,7 @@ impl RbLazyFrame {
105
107
  let separator = separator.as_bytes()[0];
106
108
  let eol_char = eol_char.as_bytes()[0];
107
109
 
108
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
110
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
109
111
 
110
112
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
111
113
  overwrite_dtype
@@ -129,7 +131,7 @@ impl RbLazyFrame {
129
131
  .with_rechunk(rechunk)
130
132
  .with_skip_rows_after_header(skip_rows_after_header)
131
133
  .with_encoding(encoding.0)
132
- .with_row_count(row_count)
134
+ .with_row_index(row_index)
133
135
  .with_try_parse_dates(try_parse_dates)
134
136
  .with_null_values(null_values);
135
137
 
@@ -147,18 +149,18 @@ impl RbLazyFrame {
147
149
  cache: bool,
148
150
  parallel: Wrap<ParallelStrategy>,
149
151
  rechunk: bool,
150
- row_count: Option<(String, IdxSize)>,
152
+ row_index: Option<(String, IdxSize)>,
151
153
  low_memory: bool,
152
154
  use_statistics: bool,
153
155
  hive_partitioning: bool,
154
156
  ) -> RbResult<Self> {
155
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
157
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
156
158
  let args = ScanArgsParquet {
157
159
  n_rows,
158
160
  cache,
159
161
  parallel: parallel.0,
160
162
  rechunk,
161
- row_count,
163
+ row_index,
162
164
  low_memory,
163
165
  // TODO support cloud options
164
166
  cloud_options: None,
@@ -174,15 +176,15 @@ impl RbLazyFrame {
174
176
  n_rows: Option<usize>,
175
177
  cache: bool,
176
178
  rechunk: bool,
177
- row_count: Option<(String, IdxSize)>,
179
+ row_index: Option<(String, IdxSize)>,
178
180
  memory_map: bool,
179
181
  ) -> RbResult<Self> {
180
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
182
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
181
183
  let args = ScanArgsIpc {
182
184
  n_rows,
183
185
  cache,
184
186
  rechunk,
185
- row_count,
187
+ row_index,
186
188
  memmap: memory_map,
187
189
  };
188
190
  let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
@@ -216,20 +218,24 @@ impl RbLazyFrame {
216
218
  projection_pushdown: bool,
217
219
  simplify_expr: bool,
218
220
  slice_pushdown: bool,
219
- cse: bool,
221
+ comm_subplan_elim: bool,
222
+ comm_subexpr_elim: bool,
220
223
  allow_streaming: bool,
221
224
  _eager: bool,
222
225
  ) -> RbLazyFrame {
223
226
  let ldf = self.ldf.clone();
224
- let ldf = ldf
227
+ let mut ldf = ldf
225
228
  .with_type_coercion(type_coercion)
226
229
  .with_predicate_pushdown(predicate_pushdown)
227
230
  .with_simplify_expr(simplify_expr)
228
231
  .with_slice_pushdown(slice_pushdown)
229
- .with_comm_subplan_elim(cse)
230
232
  .with_streaming(allow_streaming)
231
233
  ._with_eager(_eager)
232
234
  .with_projection_pushdown(projection_pushdown);
235
+
236
+ ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
237
+ ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
238
+
233
239
  ldf.into()
234
240
  }
235
241
 
@@ -304,6 +310,75 @@ impl RbLazyFrame {
304
310
  Ok(())
305
311
  }
306
312
 
313
+ pub fn sink_ipc(
314
+ &self,
315
+ path: PathBuf,
316
+ compression: Option<Wrap<IpcCompression>>,
317
+ maintain_order: bool,
318
+ ) -> RbResult<()> {
319
+ let options = IpcWriterOptions {
320
+ compression: compression.map(|c| c.0),
321
+ maintain_order,
322
+ };
323
+
324
+ let ldf = self.ldf.clone();
325
+ ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
326
+ Ok(())
327
+ }
328
+
329
+ pub fn sink_csv(
330
+ &self,
331
+ path: PathBuf,
332
+ include_bom: bool,
333
+ include_header: bool,
334
+ separator: u8,
335
+ line_terminator: String,
336
+ quote_char: u8,
337
+ batch_size: Wrap<NonZeroUsize>,
338
+ datetime_format: Option<String>,
339
+ date_format: Option<String>,
340
+ time_format: Option<String>,
341
+ float_precision: Option<usize>,
342
+ null_value: Option<String>,
343
+ quote_style: Option<Wrap<QuoteStyle>>,
344
+ maintain_order: bool,
345
+ ) -> RbResult<()> {
346
+ let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
347
+ let null_value = null_value.unwrap_or(SerializeOptions::default().null);
348
+
349
+ let serialize_options = SerializeOptions {
350
+ date_format,
351
+ time_format,
352
+ datetime_format,
353
+ float_precision,
354
+ separator,
355
+ quote_char,
356
+ null: null_value,
357
+ line_terminator,
358
+ quote_style,
359
+ };
360
+
361
+ let options = CsvWriterOptions {
362
+ include_bom,
363
+ include_header,
364
+ maintain_order,
365
+ batch_size: batch_size.0,
366
+ serialize_options,
367
+ };
368
+
369
+ let ldf = self.ldf.clone();
370
+ ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
371
+ Ok(())
372
+ }
373
+
374
+ pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
375
+ let options = JsonWriterOptions { maintain_order };
376
+
377
+ let ldf = self.ldf.clone();
378
+ ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
379
+ Ok(())
380
+ }
381
+
307
382
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
308
383
  let ldf = self.ldf.clone();
309
384
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -321,6 +396,12 @@ impl RbLazyFrame {
321
396
  Ok(ldf.select(exprs).into())
322
397
  }
323
398
 
399
+ pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
400
+ let ldf = self.ldf.clone();
401
+ let exprs = rb_exprs_to_exprs(exprs)?;
402
+ Ok(ldf.select_seq(exprs).into())
403
+ }
404
+
324
405
  pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
325
406
  let ldf = self.ldf.clone();
326
407
  let by = rb_exprs_to_exprs(by)?;
@@ -334,7 +415,7 @@ impl RbLazyFrame {
334
415
  })
335
416
  }
336
417
 
337
- pub fn group_by_rolling(
418
+ pub fn rolling(
338
419
  &self,
339
420
  index_column: &RbExpr,
340
421
  period: String,
@@ -459,6 +540,7 @@ impl RbLazyFrame {
459
540
  right_on: RArray,
460
541
  allow_parallel: bool,
461
542
  force_parallel: bool,
543
+ join_nulls: bool,
462
544
  how: Wrap<JoinType>,
463
545
  suffix: String,
464
546
  ) -> RbResult<Self> {
@@ -474,17 +556,28 @@ impl RbLazyFrame {
474
556
  .right_on(right_on)
475
557
  .allow_parallel(allow_parallel)
476
558
  .force_parallel(force_parallel)
559
+ .join_nulls(join_nulls)
477
560
  .how(how.0)
478
561
  .suffix(suffix)
479
562
  .finish()
480
563
  .into())
481
564
  }
482
565
 
566
+ pub fn with_column(&self, expr: &RbExpr) -> Self {
567
+ let ldf = self.ldf.clone();
568
+ ldf.with_column(expr.inner.clone()).into()
569
+ }
570
+
483
571
  pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
484
572
  let ldf = self.ldf.clone();
485
573
  Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
486
574
  }
487
575
 
576
+ pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
577
+ let ldf = self.ldf.clone();
578
+ Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
579
+ }
580
+
488
581
  pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
489
582
  let ldf = self.ldf.clone();
490
583
  ldf.rename(existing, new).into()
@@ -569,6 +662,11 @@ impl RbLazyFrame {
569
662
  Ok(ldf.explode(column).into())
570
663
  }
571
664
 
665
+ pub fn null_count(&self) -> Self {
666
+ let ldf = self.ldf.clone();
667
+ ldf.null_count().into()
668
+ }
669
+
572
670
  pub fn unique(
573
671
  &self,
574
672
  maintain_order: bool,
@@ -619,14 +717,18 @@ impl RbLazyFrame {
619
717
  ldf.melt(args).into()
620
718
  }
621
719
 
622
- pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> Self {
720
+ pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
623
721
  let ldf = self.ldf.clone();
624
- ldf.with_row_count(&name, offset).into()
722
+ ldf.with_row_index(&name, offset).into()
625
723
  }
626
724
 
627
- pub fn drop_columns(&self, cols: Vec<String>) -> Self {
725
+ pub fn drop(&self, cols: Vec<String>) -> Self {
628
726
  let ldf = self.ldf.clone();
629
- ldf.drop_columns(cols).into()
727
+ ldf.drop(cols).into()
728
+ }
729
+
730
+ pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
731
+ self.ldf.clone().cast_all(dtype.0, strict).into()
630
732
  }
631
733
 
632
734
  pub fn clone(&self) -> Self {
@@ -668,4 +770,18 @@ impl RbLazyFrame {
668
770
  pub fn width(&self) -> RbResult<usize> {
669
771
  Ok(self.get_schema()?.len())
670
772
  }
773
+
774
+ pub fn count(&self) -> Self {
775
+ let ldf = self.ldf.clone();
776
+ ldf.count().into()
777
+ }
778
+
779
+ pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
780
+ let out = self
781
+ .ldf
782
+ .clone()
783
+ .merge_sorted(other.ldf.clone(), &key)
784
+ .map_err(RbPolarsErr::from)?;
785
+ Ok(out.into())
786
+ }
671
787
  }