polars-df 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +15 -7
  8. data/ext/polars/src/batched_csv.rs +4 -4
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
  12. data/ext/polars/src/dataframe.rs +69 -53
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/datetime.rs +22 -56
  15. data/ext/polars/src/expr/general.rs +61 -33
  16. data/ext/polars/src/expr/list.rs +52 -4
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +59 -8
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/functions/aggregation.rs +6 -0
  22. data/ext/polars/src/functions/lazy.rs +103 -48
  23. data/ext/polars/src/functions/meta.rs +45 -1
  24. data/ext/polars/src/functions/string_cache.rs +14 -0
  25. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
  26. data/ext/polars/src/lib.rs +226 -168
  27. data/ext/polars/src/series/aggregation.rs +20 -0
  28. data/ext/polars/src/series/mod.rs +25 -4
  29. data/lib/polars/array_expr.rb +449 -0
  30. data/lib/polars/array_name_space.rb +346 -0
  31. data/lib/polars/cat_expr.rb +24 -0
  32. data/lib/polars/cat_name_space.rb +75 -0
  33. data/lib/polars/config.rb +2 -2
  34. data/lib/polars/data_frame.rb +179 -43
  35. data/lib/polars/data_types.rb +191 -28
  36. data/lib/polars/date_time_expr.rb +31 -14
  37. data/lib/polars/exceptions.rb +12 -1
  38. data/lib/polars/expr.rb +866 -186
  39. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  40. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  41. data/lib/polars/functions/as_datatype.rb +248 -0
  42. data/lib/polars/functions/col.rb +47 -0
  43. data/lib/polars/functions/eager.rb +182 -0
  44. data/lib/polars/functions/lazy.rb +1280 -0
  45. data/lib/polars/functions/len.rb +49 -0
  46. data/lib/polars/functions/lit.rb +35 -0
  47. data/lib/polars/functions/random.rb +16 -0
  48. data/lib/polars/functions/range/date_range.rb +103 -0
  49. data/lib/polars/functions/range/int_range.rb +51 -0
  50. data/lib/polars/functions/repeat.rb +144 -0
  51. data/lib/polars/functions/whenthen.rb +27 -0
  52. data/lib/polars/functions.rb +29 -416
  53. data/lib/polars/group_by.rb +2 -2
  54. data/lib/polars/io.rb +18 -25
  55. data/lib/polars/lazy_frame.rb +367 -53
  56. data/lib/polars/list_expr.rb +152 -6
  57. data/lib/polars/list_name_space.rb +102 -0
  58. data/lib/polars/meta_expr.rb +175 -7
  59. data/lib/polars/series.rb +273 -34
  60. data/lib/polars/string_cache.rb +75 -0
  61. data/lib/polars/string_expr.rb +412 -96
  62. data/lib/polars/string_name_space.rb +4 -4
  63. data/lib/polars/testing.rb +507 -0
  64. data/lib/polars/utils.rb +52 -8
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars.rb +15 -2
  67. metadata +35 -5
  68. data/lib/polars/lazy_functions.rb +0 -1181
@@ -17,6 +17,44 @@ macro_rules! set_unwrapped_or_0 {
17
17
  };
18
18
  }
19
19
 
20
+ pub fn rolling_corr(
21
+ x: &RbExpr,
22
+ y: &RbExpr,
23
+ window_size: IdxSize,
24
+ min_periods: IdxSize,
25
+ ddof: u8,
26
+ ) -> RbExpr {
27
+ dsl::rolling_corr(
28
+ x.inner.clone(),
29
+ y.inner.clone(),
30
+ RollingCovOptions {
31
+ min_periods,
32
+ window_size,
33
+ ddof,
34
+ },
35
+ )
36
+ .into()
37
+ }
38
+
39
+ pub fn rolling_cov(
40
+ x: &RbExpr,
41
+ y: &RbExpr,
42
+ window_size: IdxSize,
43
+ min_periods: IdxSize,
44
+ ddof: u8,
45
+ ) -> RbExpr {
46
+ dsl::rolling_cov(
47
+ x.inner.clone(),
48
+ y.inner.clone(),
49
+ RollingCovOptions {
50
+ min_periods,
51
+ window_size,
52
+ ddof,
53
+ },
54
+ )
55
+ .into()
56
+ }
57
+
20
58
  pub fn arg_sort_by(by: RArray, descending: Vec<bool>) -> RbResult<RbExpr> {
21
59
  let by = rb_exprs_to_exprs(by)?;
22
60
  Ok(dsl::arg_sort_by(by, &descending).into())
@@ -83,6 +121,47 @@ pub fn concat_lf(
83
121
  Ok(lf.into())
84
122
  }
85
123
 
124
+ pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
125
+ let s = rb_exprs_to_exprs(s)?;
126
+ let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
127
+ Ok(expr.into())
128
+ }
129
+
130
+ pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
131
+ let s = rb_exprs_to_exprs(s)?;
132
+ Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
133
+ }
134
+
135
+ pub fn len() -> RbExpr {
136
+ dsl::len().into()
137
+ }
138
+
139
+ pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
140
+ polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
141
+ }
142
+
143
+ pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
144
+ y.inner.clone().arctan2(x.inner.clone()).into()
145
+ }
146
+
147
+ pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
148
+ y.inner.clone().arctan2(x.inner.clone()).degrees().into()
149
+ }
150
+
151
+ pub fn cum_fold(
152
+ acc: &RbExpr,
153
+ lambda: Value,
154
+ exprs: RArray,
155
+ include_init: bool,
156
+ ) -> RbResult<RbExpr> {
157
+ let exprs = rb_exprs_to_exprs(exprs)?;
158
+ let lambda = Opaque::from(lambda);
159
+
160
+ let func =
161
+ move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
162
+ Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
163
+ }
164
+
86
165
  pub fn concat_lf_diagonal(
87
166
  lfs: RArray,
88
167
  rechunk: bool,
@@ -110,6 +189,19 @@ pub fn concat_lf_diagonal(
110
189
  Ok(lf.into())
111
190
  }
112
191
 
192
+ pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
193
+ dsl::dtype_cols(dtypes).into()
194
+ }
195
+
196
+ pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
197
+ let dtypes = dtypes
198
+ .each()
199
+ .map(|v| Wrap::<DataType>::try_convert(v?))
200
+ .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
201
+ let dtypes = vec_extract_wrapped(dtypes);
202
+ Ok(crate::functions::lazy::dtype_cols(dtypes))
203
+ }
204
+
113
205
  #[allow(clippy::too_many_arguments)]
114
206
  pub fn duration(
115
207
  weeks: Option<&RbExpr>,
@@ -146,38 +238,21 @@ pub fn duration(
146
238
  dsl::duration(args).into()
147
239
  }
148
240
 
149
- pub fn count() -> RbExpr {
150
- dsl::count().into()
151
- }
152
-
153
241
  pub fn first() -> RbExpr {
154
242
  dsl::first().into()
155
243
  }
156
244
 
157
- pub fn last() -> RbExpr {
158
- dsl::last().into()
159
- }
160
-
161
- pub fn dtype_cols(dtypes: Vec<DataType>) -> RbExpr {
162
- dsl::dtype_cols(dtypes).into()
163
- }
164
-
165
245
  pub fn fold(acc: &RbExpr, lambda: Value, exprs: RArray) -> RbResult<RbExpr> {
166
246
  let exprs = rb_exprs_to_exprs(exprs)?;
167
247
  let lambda = Opaque::from(lambda);
168
248
 
169
249
  let func =
170
250
  move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
171
- Ok(polars::lazy::dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
251
+ Ok(dsl::fold_exprs(acc.inner.clone(), func, exprs).into())
172
252
  }
173
253
 
174
- pub fn cumfold(acc: &RbExpr, lambda: Value, exprs: RArray, include_init: bool) -> RbResult<RbExpr> {
175
- let exprs = rb_exprs_to_exprs(exprs)?;
176
- let lambda = Opaque::from(lambda);
177
-
178
- let func =
179
- move |a: Series, b: Series| binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b);
180
- Ok(polars::lazy::dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
254
+ pub fn last() -> RbExpr {
255
+ dsl::last().into()
181
256
  }
182
257
 
183
258
  pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
@@ -219,6 +294,10 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
219
294
  }
220
295
  }
221
296
 
297
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
298
+ dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
299
+ }
300
+
222
301
  pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
223
302
  let mut value = value.inner.clone();
224
303
  let n = n.inner.clone();
@@ -228,7 +307,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
228
307
  }
229
308
 
230
309
  if let Expr::Literal(lv) = &value {
231
- let av = lv.to_anyvalue().unwrap();
310
+ let av = lv.to_any_value().unwrap();
232
311
  // Integer inputs that fit in Int32 are parsed as such
233
312
  if let DataType::Int64 = av.dtype() {
234
313
  let int_value = av.try_extract::<i64>().unwrap();
@@ -240,35 +319,11 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
240
319
  Ok(dsl::repeat(value, n).into())
241
320
  }
242
321
 
243
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
244
- polars::lazy::dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
245
- }
246
-
247
322
  pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
248
- polars::lazy::dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans)
249
- .into()
250
- }
251
-
252
- pub fn cov(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
253
- polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone(), ddof).into()
254
- }
255
-
256
- pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
257
- let s = rb_exprs_to_exprs(s)?;
258
- Ok(dsl::concat_str(s, &sep).into())
323
+ dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
259
324
  }
260
325
 
261
- pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
262
- let s = rb_exprs_to_exprs(s)?;
263
- let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
326
+ pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
327
+ let expr = polars::sql::sql_expr(&sql).map_err(RbPolarsErr::from)?;
264
328
  Ok(expr.into())
265
329
  }
266
-
267
- pub fn dtype_cols2(dtypes: RArray) -> RbResult<RbExpr> {
268
- let dtypes = dtypes
269
- .each()
270
- .map(|v| Wrap::<DataType>::try_convert(v?))
271
- .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
272
- let dtypes = vec_extract_wrapped(dtypes);
273
- Ok(crate::functions::lazy::dtype_cols(dtypes))
274
- }
@@ -7,7 +7,7 @@ use polars_core::POOL;
7
7
  use crate::conversion::Wrap;
8
8
  use crate::{RbResult, RbValueError};
9
9
 
10
- pub fn get_idx_type() -> Value {
10
+ pub fn get_index_type() -> Value {
11
11
  Wrap(IDX_DTYPE).into_value()
12
12
  }
13
13
 
@@ -36,3 +36,47 @@ pub fn get_float_fmt() -> RbResult<String> {
36
36
  };
37
37
  Ok(strfmt.to_string())
38
38
  }
39
+
40
+ pub fn set_float_precision(precision: Option<usize>) -> RbResult<()> {
41
+ use polars_core::fmt::set_float_precision;
42
+ set_float_precision(precision);
43
+ Ok(())
44
+ }
45
+
46
+ pub fn get_float_precision() -> RbResult<Option<usize>> {
47
+ use polars_core::fmt::get_float_precision;
48
+ Ok(get_float_precision())
49
+ }
50
+
51
+ pub fn set_thousands_separator(sep: Option<char>) -> RbResult<()> {
52
+ use polars_core::fmt::set_thousands_separator;
53
+ set_thousands_separator(sep);
54
+ Ok(())
55
+ }
56
+
57
+ pub fn get_thousands_separator() -> RbResult<Option<String>> {
58
+ use polars_core::fmt::get_thousands_separator;
59
+ Ok(Some(get_thousands_separator()))
60
+ }
61
+
62
+ pub fn set_decimal_separator(sep: Option<char>) -> RbResult<()> {
63
+ use polars_core::fmt::set_decimal_separator;
64
+ set_decimal_separator(sep);
65
+ Ok(())
66
+ }
67
+
68
+ pub fn get_decimal_separator() -> RbResult<Option<char>> {
69
+ use polars_core::fmt::get_decimal_separator;
70
+ Ok(Some(get_decimal_separator()))
71
+ }
72
+
73
+ pub fn set_trim_decimal_zeros(trim: Option<bool>) -> RbResult<()> {
74
+ use polars_core::fmt::set_trim_decimal_zeros;
75
+ set_trim_decimal_zeros(trim);
76
+ Ok(())
77
+ }
78
+
79
+ pub fn get_trim_decimal_zeros() -> RbResult<Option<bool>> {
80
+ use polars_core::fmt::get_trim_decimal_zeros;
81
+ Ok(Some(get_trim_decimal_zeros()))
82
+ }
@@ -1,3 +1,7 @@
1
+ use crate::RbResult;
2
+ use magnus::{RArray, Ruby, Value};
3
+ use polars_core::StringCacheHolder;
4
+
1
5
  pub fn enable_string_cache() {
2
6
  polars_core::enable_string_cache()
3
7
  }
@@ -9,3 +13,13 @@ pub fn disable_string_cache() {
9
13
  pub fn using_string_cache() -> bool {
10
14
  polars_core::using_string_cache()
11
15
  }
16
+
17
+ #[magnus::wrap(class = "Polars::RbStringCacheHolder")]
18
+ pub struct RbStringCacheHolder {}
19
+
20
+ impl RbStringCacheHolder {
21
+ pub fn hold() -> RbResult<Value> {
22
+ let _hold = StringCacheHolder::hold();
23
+ Ruby::get().unwrap().yield_splat(RArray::new())
24
+ }
25
+ }
@@ -1,9 +1,10 @@
1
1
  use magnus::{IntoValue, RArray, RHash, TryConvert, Value};
2
- use polars::io::RowCount;
2
+ use polars::io::RowIndex;
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
5
  use std::cell::RefCell;
6
6
  use std::io::{BufWriter, Read};
7
+ use std::num::NonZeroUsize;
7
8
  use std::path::PathBuf;
8
9
 
9
10
  use crate::conversion::*;
@@ -55,13 +56,14 @@ impl RbLazyFrame {
55
56
  pub fn new_from_ndjson(
56
57
  path: String,
57
58
  infer_schema_length: Option<usize>,
58
- batch_size: Option<usize>,
59
+ batch_size: Option<Wrap<NonZeroUsize>>,
59
60
  n_rows: Option<usize>,
60
61
  low_memory: bool,
61
62
  rechunk: bool,
62
- row_count: Option<(String, IdxSize)>,
63
+ row_index: Option<(String, IdxSize)>,
63
64
  ) -> RbResult<Self> {
64
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
65
+ let batch_size = batch_size.map(|v| v.0);
66
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
65
67
 
66
68
  let lf = LazyJsonLineReader::new(path)
67
69
  .with_infer_schema_length(infer_schema_length)
@@ -69,7 +71,7 @@ impl RbLazyFrame {
69
71
  .with_n_rows(n_rows)
70
72
  .low_memory(low_memory)
71
73
  .with_rechunk(rechunk)
72
- .with_row_count(row_count)
74
+ .with_row_index(row_index)
73
75
  .finish()
74
76
  .map_err(RbPolarsErr::from)?;
75
77
  Ok(lf.into())
@@ -95,7 +97,7 @@ impl RbLazyFrame {
95
97
  let rechunk = bool::try_convert(arguments[14])?;
96
98
  let skip_rows_after_header = usize::try_convert(arguments[15])?;
97
99
  let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
98
- let row_count = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
100
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
99
101
  let try_parse_dates = bool::try_convert(arguments[18])?;
100
102
  let eol_char = String::try_convert(arguments[19])?;
101
103
  // end arguments
@@ -105,7 +107,7 @@ impl RbLazyFrame {
105
107
  let separator = separator.as_bytes()[0];
106
108
  let eol_char = eol_char.as_bytes()[0];
107
109
 
108
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
110
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
109
111
 
110
112
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
111
113
  overwrite_dtype
@@ -129,7 +131,7 @@ impl RbLazyFrame {
129
131
  .with_rechunk(rechunk)
130
132
  .with_skip_rows_after_header(skip_rows_after_header)
131
133
  .with_encoding(encoding.0)
132
- .with_row_count(row_count)
134
+ .with_row_index(row_index)
133
135
  .with_try_parse_dates(try_parse_dates)
134
136
  .with_null_values(null_values);
135
137
 
@@ -147,18 +149,18 @@ impl RbLazyFrame {
147
149
  cache: bool,
148
150
  parallel: Wrap<ParallelStrategy>,
149
151
  rechunk: bool,
150
- row_count: Option<(String, IdxSize)>,
152
+ row_index: Option<(String, IdxSize)>,
151
153
  low_memory: bool,
152
154
  use_statistics: bool,
153
155
  hive_partitioning: bool,
154
156
  ) -> RbResult<Self> {
155
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
157
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
156
158
  let args = ScanArgsParquet {
157
159
  n_rows,
158
160
  cache,
159
161
  parallel: parallel.0,
160
162
  rechunk,
161
- row_count,
163
+ row_index,
162
164
  low_memory,
163
165
  // TODO support cloud options
164
166
  cloud_options: None,
@@ -174,15 +176,15 @@ impl RbLazyFrame {
174
176
  n_rows: Option<usize>,
175
177
  cache: bool,
176
178
  rechunk: bool,
177
- row_count: Option<(String, IdxSize)>,
179
+ row_index: Option<(String, IdxSize)>,
178
180
  memory_map: bool,
179
181
  ) -> RbResult<Self> {
180
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
182
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
181
183
  let args = ScanArgsIpc {
182
184
  n_rows,
183
185
  cache,
184
186
  rechunk,
185
- row_count,
187
+ row_index,
186
188
  memmap: memory_map,
187
189
  };
188
190
  let lf = LazyFrame::scan_ipc(path, args).map_err(RbPolarsErr::from)?;
@@ -216,20 +218,24 @@ impl RbLazyFrame {
216
218
  projection_pushdown: bool,
217
219
  simplify_expr: bool,
218
220
  slice_pushdown: bool,
219
- cse: bool,
221
+ comm_subplan_elim: bool,
222
+ comm_subexpr_elim: bool,
220
223
  allow_streaming: bool,
221
224
  _eager: bool,
222
225
  ) -> RbLazyFrame {
223
226
  let ldf = self.ldf.clone();
224
- let ldf = ldf
227
+ let mut ldf = ldf
225
228
  .with_type_coercion(type_coercion)
226
229
  .with_predicate_pushdown(predicate_pushdown)
227
230
  .with_simplify_expr(simplify_expr)
228
231
  .with_slice_pushdown(slice_pushdown)
229
- .with_comm_subplan_elim(cse)
230
232
  .with_streaming(allow_streaming)
231
233
  ._with_eager(_eager)
232
234
  .with_projection_pushdown(projection_pushdown);
235
+
236
+ ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
237
+ ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
238
+
233
239
  ldf.into()
234
240
  }
235
241
 
@@ -304,6 +310,75 @@ impl RbLazyFrame {
304
310
  Ok(())
305
311
  }
306
312
 
313
+ pub fn sink_ipc(
314
+ &self,
315
+ path: PathBuf,
316
+ compression: Option<Wrap<IpcCompression>>,
317
+ maintain_order: bool,
318
+ ) -> RbResult<()> {
319
+ let options = IpcWriterOptions {
320
+ compression: compression.map(|c| c.0),
321
+ maintain_order,
322
+ };
323
+
324
+ let ldf = self.ldf.clone();
325
+ ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
326
+ Ok(())
327
+ }
328
+
329
+ pub fn sink_csv(
330
+ &self,
331
+ path: PathBuf,
332
+ include_bom: bool,
333
+ include_header: bool,
334
+ separator: u8,
335
+ line_terminator: String,
336
+ quote_char: u8,
337
+ batch_size: Wrap<NonZeroUsize>,
338
+ datetime_format: Option<String>,
339
+ date_format: Option<String>,
340
+ time_format: Option<String>,
341
+ float_precision: Option<usize>,
342
+ null_value: Option<String>,
343
+ quote_style: Option<Wrap<QuoteStyle>>,
344
+ maintain_order: bool,
345
+ ) -> RbResult<()> {
346
+ let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
347
+ let null_value = null_value.unwrap_or(SerializeOptions::default().null);
348
+
349
+ let serialize_options = SerializeOptions {
350
+ date_format,
351
+ time_format,
352
+ datetime_format,
353
+ float_precision,
354
+ separator,
355
+ quote_char,
356
+ null: null_value,
357
+ line_terminator,
358
+ quote_style,
359
+ };
360
+
361
+ let options = CsvWriterOptions {
362
+ include_bom,
363
+ include_header,
364
+ maintain_order,
365
+ batch_size: batch_size.0,
366
+ serialize_options,
367
+ };
368
+
369
+ let ldf = self.ldf.clone();
370
+ ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
371
+ Ok(())
372
+ }
373
+
374
+ pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
375
+ let options = JsonWriterOptions { maintain_order };
376
+
377
+ let ldf = self.ldf.clone();
378
+ ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
379
+ Ok(())
380
+ }
381
+
307
382
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
308
383
  let ldf = self.ldf.clone();
309
384
  let df = ldf.fetch(n_rows).map_err(RbPolarsErr::from)?;
@@ -321,6 +396,12 @@ impl RbLazyFrame {
321
396
  Ok(ldf.select(exprs).into())
322
397
  }
323
398
 
399
+ pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
400
+ let ldf = self.ldf.clone();
401
+ let exprs = rb_exprs_to_exprs(exprs)?;
402
+ Ok(ldf.select_seq(exprs).into())
403
+ }
404
+
324
405
  pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
325
406
  let ldf = self.ldf.clone();
326
407
  let by = rb_exprs_to_exprs(by)?;
@@ -334,7 +415,7 @@ impl RbLazyFrame {
334
415
  })
335
416
  }
336
417
 
337
- pub fn group_by_rolling(
418
+ pub fn rolling(
338
419
  &self,
339
420
  index_column: &RbExpr,
340
421
  period: String,
@@ -459,6 +540,7 @@ impl RbLazyFrame {
459
540
  right_on: RArray,
460
541
  allow_parallel: bool,
461
542
  force_parallel: bool,
543
+ join_nulls: bool,
462
544
  how: Wrap<JoinType>,
463
545
  suffix: String,
464
546
  ) -> RbResult<Self> {
@@ -474,17 +556,28 @@ impl RbLazyFrame {
474
556
  .right_on(right_on)
475
557
  .allow_parallel(allow_parallel)
476
558
  .force_parallel(force_parallel)
559
+ .join_nulls(join_nulls)
477
560
  .how(how.0)
478
561
  .suffix(suffix)
479
562
  .finish()
480
563
  .into())
481
564
  }
482
565
 
566
+ pub fn with_column(&self, expr: &RbExpr) -> Self {
567
+ let ldf = self.ldf.clone();
568
+ ldf.with_column(expr.inner.clone()).into()
569
+ }
570
+
483
571
  pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
484
572
  let ldf = self.ldf.clone();
485
573
  Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
486
574
  }
487
575
 
576
+ pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
577
+ let ldf = self.ldf.clone();
578
+ Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
579
+ }
580
+
488
581
  pub fn rename(&self, existing: Vec<String>, new: Vec<String>) -> Self {
489
582
  let ldf = self.ldf.clone();
490
583
  ldf.rename(existing, new).into()
@@ -569,6 +662,11 @@ impl RbLazyFrame {
569
662
  Ok(ldf.explode(column).into())
570
663
  }
571
664
 
665
+ pub fn null_count(&self) -> Self {
666
+ let ldf = self.ldf.clone();
667
+ ldf.null_count().into()
668
+ }
669
+
572
670
  pub fn unique(
573
671
  &self,
574
672
  maintain_order: bool,
@@ -619,14 +717,18 @@ impl RbLazyFrame {
619
717
  ldf.melt(args).into()
620
718
  }
621
719
 
622
- pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> Self {
720
+ pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
623
721
  let ldf = self.ldf.clone();
624
- ldf.with_row_count(&name, offset).into()
722
+ ldf.with_row_index(&name, offset).into()
625
723
  }
626
724
 
627
- pub fn drop_columns(&self, cols: Vec<String>) -> Self {
725
+ pub fn drop(&self, cols: Vec<String>) -> Self {
628
726
  let ldf = self.ldf.clone();
629
- ldf.drop_columns(cols).into()
727
+ ldf.drop(cols).into()
728
+ }
729
+
730
+ pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
731
+ self.ldf.clone().cast_all(dtype.0, strict).into()
630
732
  }
631
733
 
632
734
  pub fn clone(&self) -> Self {
@@ -668,4 +770,18 @@ impl RbLazyFrame {
668
770
  pub fn width(&self) -> RbResult<usize> {
669
771
  Ok(self.get_schema()?.len())
670
772
  }
773
+
774
+ pub fn count(&self) -> Self {
775
+ let ldf = self.ldf.clone();
776
+ ldf.count().into()
777
+ }
778
+
779
+ pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
780
+ let out = self
781
+ .ldf
782
+ .clone()
783
+ .merge_sorted(other.ldf.clone(), &key)
784
+ .map_err(RbPolarsErr::from)?;
785
+ Ok(out.into())
786
+ }
671
787
  }