polars-df 0.9.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/Cargo.lock +144 -57
  4. data/README.md +7 -6
  5. data/ext/polars/Cargo.toml +10 -6
  6. data/ext/polars/src/batched_csv.rs +53 -50
  7. data/ext/polars/src/conversion/anyvalue.rs +3 -2
  8. data/ext/polars/src/conversion/mod.rs +31 -67
  9. data/ext/polars/src/dataframe/construction.rs +186 -0
  10. data/ext/polars/src/dataframe/export.rs +48 -0
  11. data/ext/polars/src/dataframe/general.rs +607 -0
  12. data/ext/polars/src/dataframe/io.rs +463 -0
  13. data/ext/polars/src/dataframe/mod.rs +26 -0
  14. data/ext/polars/src/expr/array.rs +6 -2
  15. data/ext/polars/src/expr/datetime.rs +13 -4
  16. data/ext/polars/src/expr/general.rs +50 -9
  17. data/ext/polars/src/expr/list.rs +6 -2
  18. data/ext/polars/src/expr/rolling.rs +185 -69
  19. data/ext/polars/src/expr/string.rs +12 -33
  20. data/ext/polars/src/file.rs +158 -11
  21. data/ext/polars/src/functions/lazy.rs +20 -3
  22. data/ext/polars/src/functions/range.rs +74 -0
  23. data/ext/polars/src/functions/whenthen.rs +47 -17
  24. data/ext/polars/src/interop/mod.rs +1 -0
  25. data/ext/polars/src/interop/numo/mod.rs +2 -0
  26. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  27. data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
  28. data/ext/polars/src/lazyframe/mod.rs +111 -56
  29. data/ext/polars/src/lib.rs +68 -34
  30. data/ext/polars/src/map/dataframe.rs +17 -9
  31. data/ext/polars/src/map/lazy.rs +5 -25
  32. data/ext/polars/src/map/series.rs +7 -1
  33. data/ext/polars/src/series/aggregation.rs +47 -30
  34. data/ext/polars/src/series/export.rs +131 -49
  35. data/ext/polars/src/series/mod.rs +13 -133
  36. data/lib/polars/array_expr.rb +6 -2
  37. data/lib/polars/batched_csv_reader.rb +11 -3
  38. data/lib/polars/convert.rb +6 -1
  39. data/lib/polars/data_frame.rb +225 -370
  40. data/lib/polars/date_time_expr.rb +11 -4
  41. data/lib/polars/date_time_name_space.rb +14 -4
  42. data/lib/polars/dynamic_group_by.rb +2 -2
  43. data/lib/polars/exceptions.rb +4 -0
  44. data/lib/polars/expr.rb +1171 -54
  45. data/lib/polars/functions/lazy.rb +3 -3
  46. data/lib/polars/functions/range/date_range.rb +92 -0
  47. data/lib/polars/functions/range/datetime_range.rb +149 -0
  48. data/lib/polars/functions/range/time_range.rb +141 -0
  49. data/lib/polars/functions/whenthen.rb +74 -5
  50. data/lib/polars/group_by.rb +88 -23
  51. data/lib/polars/io/avro.rb +24 -0
  52. data/lib/polars/{io.rb → io/csv.rb} +307 -489
  53. data/lib/polars/io/database.rb +73 -0
  54. data/lib/polars/io/ipc.rb +247 -0
  55. data/lib/polars/io/json.rb +18 -0
  56. data/lib/polars/io/ndjson.rb +69 -0
  57. data/lib/polars/io/parquet.rb +226 -0
  58. data/lib/polars/lazy_frame.rb +55 -195
  59. data/lib/polars/lazy_group_by.rb +100 -3
  60. data/lib/polars/list_expr.rb +6 -2
  61. data/lib/polars/rolling_group_by.rb +2 -2
  62. data/lib/polars/series.rb +14 -12
  63. data/lib/polars/string_expr.rb +38 -36
  64. data/lib/polars/utils.rb +89 -1
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars/whenthen.rb +83 -0
  67. data/lib/polars.rb +10 -3
  68. metadata +23 -8
  69. data/ext/polars/src/dataframe.rs +0 -1182
  70. data/lib/polars/when.rb +0 -16
  71. data/lib/polars/when_then.rb +0 -19
@@ -0,0 +1,463 @@
1
+ use magnus::{prelude::*, RString, Value};
2
+ use polars::io::avro::AvroCompression;
3
+ use polars::io::mmap::ReaderBytes;
4
+ use polars::io::RowIndex;
5
+ use polars::prelude::*;
6
+ use std::io::{BufWriter, Cursor};
7
+ use std::num::NonZeroUsize;
8
+ use std::ops::Deref;
9
+
10
+ use super::*;
11
+ use crate::conversion::*;
12
+ use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
13
+ use crate::{RbPolarsErr, RbResult};
14
+
15
+ impl RbDataFrame {
16
+ pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
17
+ // start arguments
18
+ // this pattern is needed for more than 16
19
+ let rb_f = arguments[0];
20
+ let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
21
+ let chunk_size = usize::try_convert(arguments[2])?;
22
+ let has_header = bool::try_convert(arguments[3])?;
23
+ let ignore_errors = bool::try_convert(arguments[4])?;
24
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
25
+ let skip_rows = usize::try_convert(arguments[6])?;
26
+ let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
27
+ let separator = String::try_convert(arguments[8])?;
28
+ let rechunk = bool::try_convert(arguments[9])?;
29
+ let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
30
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
31
+ let n_threads = Option::<usize>::try_convert(arguments[12])?;
32
+ let path = Option::<String>::try_convert(arguments[13])?;
33
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
34
+ // TODO fix
35
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
36
+ let low_memory = bool::try_convert(arguments[16])?;
37
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
38
+ let quote_char = Option::<String>::try_convert(arguments[18])?;
39
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
40
+ let missing_utf8_is_empty_string = bool::try_convert(arguments[20])?;
41
+ let try_parse_dates = bool::try_convert(arguments[21])?;
42
+ let skip_rows_after_header = usize::try_convert(arguments[22])?;
43
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[23])?;
44
+ let sample_size = usize::try_convert(arguments[24])?;
45
+ let eol_char = String::try_convert(arguments[25])?;
46
+ let raise_if_empty = bool::try_convert(arguments[26])?;
47
+ let truncate_ragged_lines = bool::try_convert(arguments[27])?;
48
+ let decimal_comma = bool::try_convert(arguments[28])?;
49
+ let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
50
+ // end arguments
51
+
52
+ let null_values = null_values.map(|w| w.0);
53
+ let eol_char = eol_char.as_bytes()[0];
54
+ let row_index = row_index.map(|(name, offset)| RowIndex {
55
+ name: Arc::from(name.as_str()),
56
+ offset,
57
+ });
58
+ let quote_char = if let Some(s) = quote_char {
59
+ if s.is_empty() {
60
+ None
61
+ } else {
62
+ Some(s.as_bytes()[0])
63
+ }
64
+ } else {
65
+ None
66
+ };
67
+
68
+ let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
69
+ overwrite_dtype
70
+ .iter()
71
+ .map(|(name, dtype)| {
72
+ let dtype = dtype.0.clone();
73
+ Field::new(name, dtype)
74
+ })
75
+ .collect::<Schema>()
76
+ });
77
+
78
+ let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
79
+ overwrite_dtype
80
+ .iter()
81
+ .map(|dt| dt.0.clone())
82
+ .collect::<Vec<_>>()
83
+ });
84
+
85
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
86
+ let df = CsvReadOptions::default()
87
+ .with_path(path)
88
+ .with_infer_schema_length(infer_schema_length)
89
+ .with_has_header(has_header)
90
+ .with_n_rows(n_rows)
91
+ .with_skip_rows(skip_rows)
92
+ .with_ignore_errors(ignore_errors)
93
+ .with_projection(projection.map(Arc::new))
94
+ .with_rechunk(rechunk)
95
+ .with_chunk_size(chunk_size)
96
+ .with_columns(columns.map(Arc::new))
97
+ .with_n_threads(n_threads)
98
+ .with_schema_overwrite(overwrite_dtype.map(Arc::new))
99
+ .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
100
+ .with_schema(schema.map(|schema| Arc::new(schema.0)))
101
+ .with_low_memory(low_memory)
102
+ .with_skip_rows_after_header(skip_rows_after_header)
103
+ .with_row_index(row_index)
104
+ .with_sample_size(sample_size)
105
+ .with_raise_if_empty(raise_if_empty)
106
+ .with_parse_options(
107
+ CsvParseOptions::default()
108
+ .with_separator(separator.as_bytes()[0])
109
+ .with_encoding(encoding.0)
110
+ .with_missing_is_null(!missing_utf8_is_empty_string)
111
+ .with_comment_prefix(comment_prefix.as_deref())
112
+ .with_null_values(null_values)
113
+ .with_try_parse_dates(try_parse_dates)
114
+ .with_quote_char(quote_char)
115
+ .with_eol_char(eol_char)
116
+ .with_truncate_ragged_lines(truncate_ragged_lines)
117
+ .with_decimal_comma(decimal_comma),
118
+ )
119
+ .into_reader_with_file_handle(mmap_bytes_r)
120
+ .finish()
121
+ .map_err(RbPolarsErr::from)?;
122
+ Ok(df.into())
123
+ }
124
+
125
+ #[allow(clippy::too_many_arguments)]
126
+ pub fn read_parquet(
127
+ rb_f: Value,
128
+ columns: Option<Vec<String>>,
129
+ projection: Option<Vec<usize>>,
130
+ n_rows: Option<usize>,
131
+ parallel: Wrap<ParallelStrategy>,
132
+ row_index: Option<(String, IdxSize)>,
133
+ low_memory: bool,
134
+ use_statistics: bool,
135
+ rechunk: bool,
136
+ ) -> RbResult<Self> {
137
+ use EitherRustRubyFile::*;
138
+
139
+ let row_index = row_index.map(|(name, offset)| RowIndex {
140
+ name: Arc::from(name.as_str()),
141
+ offset,
142
+ });
143
+ let result = match get_either_file(rb_f, false)? {
144
+ Rb(f) => {
145
+ let buf = f.as_buffer();
146
+ ParquetReader::new(buf)
147
+ .with_projection(projection)
148
+ .with_columns(columns)
149
+ .read_parallel(parallel.0)
150
+ .with_n_rows(n_rows)
151
+ .with_row_index(row_index)
152
+ .set_low_memory(low_memory)
153
+ .use_statistics(use_statistics)
154
+ .set_rechunk(rechunk)
155
+ .finish()
156
+ }
157
+ Rust(f) => ParquetReader::new(f.into_inner())
158
+ .with_projection(projection)
159
+ .with_columns(columns)
160
+ .read_parallel(parallel.0)
161
+ .with_n_rows(n_rows)
162
+ .with_row_index(row_index)
163
+ .use_statistics(use_statistics)
164
+ .set_rechunk(rechunk)
165
+ .finish(),
166
+ };
167
+ let df = result.map_err(RbPolarsErr::from)?;
168
+ Ok(RbDataFrame::new(df))
169
+ }
170
+
171
+ pub fn read_json(rb_f: Value) -> RbResult<Self> {
172
+ // memmap the file first
173
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
174
+ let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
175
+ let bytes = mmap_read.deref();
176
+
177
+ // Happy path is our column oriented json as that is most performant
178
+ // on failure we try
179
+ match serde_json::from_slice::<DataFrame>(bytes) {
180
+ Ok(df) => Ok(df.into()),
181
+ // try arrow json reader instead
182
+ // this is row oriented
183
+ Err(e) => {
184
+ let msg = format!("{e}");
185
+ if msg.contains("successful parse invalid data") {
186
+ let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
187
+ Err(e)
188
+ } else {
189
+ let out = JsonReader::new(mmap_bytes_r)
190
+ .with_json_format(JsonFormat::Json)
191
+ .finish()
192
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
193
+ Ok(out.into())
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+ pub fn read_ndjson(rb_f: Value) -> RbResult<Self> {
200
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
201
+
202
+ let out = JsonReader::new(mmap_bytes_r)
203
+ .with_json_format(JsonFormat::JsonLines)
204
+ .finish()
205
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
206
+ Ok(out.into())
207
+ }
208
+
209
+ pub fn read_ipc(
210
+ rb_f: Value,
211
+ columns: Option<Vec<String>>,
212
+ projection: Option<Vec<usize>>,
213
+ n_rows: Option<usize>,
214
+ row_index: Option<(String, IdxSize)>,
215
+ _memory_map: bool,
216
+ ) -> RbResult<Self> {
217
+ let row_index = row_index.map(|(name, offset)| RowIndex {
218
+ name: Arc::from(name.as_str()),
219
+ offset,
220
+ });
221
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
222
+
223
+ // TODO fix
224
+ let mmap_path = None;
225
+ let df = IpcReader::new(mmap_bytes_r)
226
+ .with_projection(projection)
227
+ .with_columns(columns)
228
+ .with_n_rows(n_rows)
229
+ .with_row_index(row_index)
230
+ .memory_mapped(mmap_path)
231
+ .finish()
232
+ .map_err(RbPolarsErr::from)?;
233
+ Ok(RbDataFrame::new(df))
234
+ }
235
+
236
+ pub fn read_ipc_stream(
237
+ rb_f: Value,
238
+ columns: Option<Vec<String>>,
239
+ projection: Option<Vec<usize>>,
240
+ n_rows: Option<usize>,
241
+ row_index: Option<(String, IdxSize)>,
242
+ rechunk: bool,
243
+ ) -> RbResult<Self> {
244
+ let row_index = row_index.map(|(name, offset)| RowIndex {
245
+ name: Arc::from(name.as_str()),
246
+ offset,
247
+ });
248
+ // rb_f = read_if_bytesio(rb_f);
249
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
250
+ let df = IpcStreamReader::new(mmap_bytes_r)
251
+ .with_projection(projection)
252
+ .with_columns(columns)
253
+ .with_n_rows(n_rows)
254
+ .with_row_index(row_index)
255
+ .set_rechunk(rechunk)
256
+ .finish()
257
+ .map_err(RbPolarsErr::from)?;
258
+ Ok(RbDataFrame::new(df))
259
+ }
260
+
261
+ pub fn read_avro(
262
+ rb_f: Value,
263
+ columns: Option<Vec<String>>,
264
+ projection: Option<Vec<usize>>,
265
+ n_rows: Option<usize>,
266
+ ) -> RbResult<Self> {
267
+ use polars::io::avro::AvroReader;
268
+
269
+ let file = get_file_like(rb_f, false)?;
270
+ let df = AvroReader::new(file)
271
+ .with_projection(projection)
272
+ .with_columns(columns)
273
+ .with_n_rows(n_rows)
274
+ .finish()
275
+ .map_err(RbPolarsErr::from)?;
276
+ Ok(RbDataFrame::new(df))
277
+ }
278
+
279
+ #[allow(clippy::too_many_arguments)]
280
+ pub fn write_csv(
281
+ &self,
282
+ rb_f: Value,
283
+ include_header: bool,
284
+ separator: u8,
285
+ quote_char: u8,
286
+ batch_size: Wrap<NonZeroUsize>,
287
+ datetime_format: Option<String>,
288
+ date_format: Option<String>,
289
+ time_format: Option<String>,
290
+ float_precision: Option<usize>,
291
+ null_value: Option<String>,
292
+ ) -> RbResult<()> {
293
+ let batch_size = batch_size.0;
294
+ let null = null_value.unwrap_or_default();
295
+
296
+ if let Ok(s) = String::try_convert(rb_f) {
297
+ let f = std::fs::File::create(s).unwrap();
298
+ // no need for a buffered writer, because the csv writer does internal buffering
299
+ CsvWriter::new(f)
300
+ .include_header(include_header)
301
+ .with_separator(separator)
302
+ .with_quote_char(quote_char)
303
+ .with_batch_size(batch_size)
304
+ .with_datetime_format(datetime_format)
305
+ .with_date_format(date_format)
306
+ .with_time_format(time_format)
307
+ .with_float_precision(float_precision)
308
+ .with_null_value(null)
309
+ .finish(&mut self.df.borrow_mut())
310
+ .map_err(RbPolarsErr::from)?;
311
+ } else {
312
+ let mut buf = Cursor::new(Vec::new());
313
+ CsvWriter::new(&mut buf)
314
+ .include_header(include_header)
315
+ .with_separator(separator)
316
+ .with_quote_char(quote_char)
317
+ .with_batch_size(batch_size)
318
+ .with_datetime_format(datetime_format)
319
+ .with_date_format(date_format)
320
+ .with_time_format(time_format)
321
+ .with_float_precision(float_precision)
322
+ .with_null_value(null)
323
+ .finish(&mut self.df.borrow_mut())
324
+ .map_err(RbPolarsErr::from)?;
325
+ // TODO less copying
326
+ let rb_str = RString::from_slice(&buf.into_inner());
327
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
328
+ }
329
+
330
+ Ok(())
331
+ }
332
+
333
+ pub fn write_parquet(
334
+ &self,
335
+ rb_f: Value,
336
+ compression: String,
337
+ compression_level: Option<i32>,
338
+ statistics: bool,
339
+ row_group_size: Option<usize>,
340
+ data_page_size: Option<usize>,
341
+ ) -> RbResult<()> {
342
+ let compression = parse_parquet_compression(&compression, compression_level)?;
343
+
344
+ if let Ok(s) = String::try_convert(rb_f) {
345
+ let f = std::fs::File::create(s).unwrap();
346
+ ParquetWriter::new(f)
347
+ .with_compression(compression)
348
+ .with_statistics(statistics)
349
+ .with_row_group_size(row_group_size)
350
+ .with_data_page_size(data_page_size)
351
+ .finish(&mut self.df.borrow_mut())
352
+ .map_err(RbPolarsErr::from)?;
353
+ } else {
354
+ let buf = get_file_like(rb_f, true)?;
355
+ ParquetWriter::new(buf)
356
+ .with_compression(compression)
357
+ .with_statistics(statistics)
358
+ .with_row_group_size(row_group_size)
359
+ .with_data_page_size(data_page_size)
360
+ .finish(&mut self.df.borrow_mut())
361
+ .map_err(RbPolarsErr::from)?;
362
+ }
363
+
364
+ Ok(())
365
+ }
366
+
367
+ pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
368
+ let file = BufWriter::new(get_file_like(rb_f, true)?);
369
+
370
+ let r = match (pretty, row_oriented) {
371
+ (_, true) => JsonWriter::new(file)
372
+ .with_json_format(JsonFormat::Json)
373
+ .finish(&mut self.df.borrow_mut()),
374
+ (true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
375
+ .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
376
+ (false, _) => serde_json::to_writer(file, &*self.df.borrow())
377
+ .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
378
+ };
379
+ r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
380
+ Ok(())
381
+ }
382
+
383
+ pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
384
+ let file = BufWriter::new(get_file_like(rb_f, true)?);
385
+
386
+ let r = JsonWriter::new(file)
387
+ .with_json_format(JsonFormat::JsonLines)
388
+ .finish(&mut self.df.borrow_mut());
389
+
390
+ r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
391
+ Ok(())
392
+ }
393
+
394
+ pub fn write_ipc(
395
+ &self,
396
+ rb_f: Value,
397
+ compression: Wrap<Option<IpcCompression>>,
398
+ ) -> RbResult<()> {
399
+ if let Ok(s) = String::try_convert(rb_f) {
400
+ let f = std::fs::File::create(s).unwrap();
401
+ IpcWriter::new(f)
402
+ .with_compression(compression.0)
403
+ .finish(&mut self.df.borrow_mut())
404
+ .map_err(RbPolarsErr::from)?;
405
+ } else {
406
+ let mut buf = Cursor::new(Vec::new());
407
+ IpcWriter::new(&mut buf)
408
+ .with_compression(compression.0)
409
+ .finish(&mut self.df.borrow_mut())
410
+ .map_err(RbPolarsErr::from)?;
411
+ // TODO less copying
412
+ let rb_str = RString::from_slice(&buf.into_inner());
413
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
414
+ }
415
+ Ok(())
416
+ }
417
+
418
+ pub fn write_ipc_stream(
419
+ &self,
420
+ rb_f: Value,
421
+ compression: Wrap<Option<IpcCompression>>,
422
+ ) -> RbResult<()> {
423
+ if let Ok(s) = String::try_convert(rb_f) {
424
+ let f = std::fs::File::create(s).unwrap();
425
+ IpcStreamWriter::new(f)
426
+ .with_compression(compression.0)
427
+ .finish(&mut self.df.borrow_mut())
428
+ .map_err(RbPolarsErr::from)?
429
+ } else {
430
+ let mut buf = get_file_like(rb_f, true)?;
431
+
432
+ IpcStreamWriter::new(&mut buf)
433
+ .with_compression(compression.0)
434
+ .finish(&mut self.df.borrow_mut())
435
+ .map_err(RbPolarsErr::from)?;
436
+ }
437
+ Ok(())
438
+ }
439
+
440
+ pub fn write_avro(
441
+ &self,
442
+ rb_f: Value,
443
+ compression: Wrap<Option<AvroCompression>>,
444
+ ) -> RbResult<()> {
445
+ use polars::io::avro::AvroWriter;
446
+
447
+ if let Ok(s) = String::try_convert(rb_f) {
448
+ let f = std::fs::File::create(s).unwrap();
449
+ AvroWriter::new(f)
450
+ .with_compression(compression.0)
451
+ .finish(&mut self.df.borrow_mut())
452
+ .map_err(RbPolarsErr::from)?;
453
+ } else {
454
+ let mut buf = get_file_like(rb_f, true)?;
455
+ AvroWriter::new(&mut buf)
456
+ .with_compression(compression.0)
457
+ .finish(&mut self.df.borrow_mut())
458
+ .map_err(RbPolarsErr::from)?;
459
+ }
460
+
461
+ Ok(())
462
+ }
463
+ }
@@ -0,0 +1,26 @@
1
+ mod construction;
2
+ mod export;
3
+ mod general;
4
+ mod io;
5
+
6
+ use polars::prelude::*;
7
+ use std::cell::RefCell;
8
+
9
+ #[magnus::wrap(class = "Polars::RbDataFrame")]
10
+ pub struct RbDataFrame {
11
+ pub df: RefCell<DataFrame>,
12
+ }
13
+
14
+ impl From<DataFrame> for RbDataFrame {
15
+ fn from(df: DataFrame) -> Self {
16
+ RbDataFrame::new(df)
17
+ }
18
+ }
19
+
20
+ impl RbDataFrame {
21
+ pub fn new(df: DataFrame) -> Self {
22
+ RbDataFrame {
23
+ df: RefCell::new(df),
24
+ }
25
+ }
26
+ }
@@ -59,8 +59,12 @@ impl RbExpr {
59
59
  self.inner.clone().arr().arg_max().into()
60
60
  }
61
61
 
62
- pub fn arr_get(&self, index: &RbExpr) -> Self {
63
- self.inner.clone().arr().get(index.inner.clone()).into()
62
+ pub fn arr_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
63
+ self.inner
64
+ .clone()
65
+ .arr()
66
+ .get(index.inner.clone(), null_on_oob)
67
+ .into()
64
68
  }
65
69
 
66
70
  pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
@@ -37,11 +37,16 @@ impl RbExpr {
37
37
  self.inner.clone().dt().cast_time_unit(tu.0).into()
38
38
  }
39
39
 
40
- pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
40
+ pub fn dt_replace_time_zone(
41
+ &self,
42
+ time_zone: Option<String>,
43
+ ambiguous: &Self,
44
+ non_existent: Wrap<NonExistent>,
45
+ ) -> Self {
41
46
  self.inner
42
47
  .clone()
43
48
  .dt()
44
- .replace_time_zone(time_zone, ambiguous.inner.clone())
49
+ .replace_time_zone(time_zone, ambiguous.inner.clone(), non_existent.0)
45
50
  .into()
46
51
  }
47
52
 
@@ -69,8 +74,12 @@ impl RbExpr {
69
74
  self.inner.clone().dt().dst_offset().into()
70
75
  }
71
76
 
72
- pub fn dt_round(&self, every: String, offset: String) -> Self {
73
- self.inner.clone().dt().round(&every, &offset).into()
77
+ pub fn dt_round(&self, every: &Self, offset: String) -> Self {
78
+ self.inner
79
+ .clone()
80
+ .dt()
81
+ .round(every.inner.clone(), &offset)
82
+ .into()
74
83
  }
75
84
 
76
85
  pub fn dt_combine(&self, time: &Self, time_unit: Wrap<TimeUnit>) -> Self {
@@ -267,7 +267,7 @@ impl RbExpr {
267
267
  pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
268
268
  self.clone()
269
269
  .inner
270
- .sort_with(SortOptions {
270
+ .sort(SortOptions {
271
271
  descending,
272
272
  nulls_last,
273
273
  multithreaded: true,
@@ -288,12 +288,28 @@ impl RbExpr {
288
288
  .into()
289
289
  }
290
290
 
291
- pub fn top_k(&self, k: &Self) -> Self {
292
- self.inner.clone().top_k(k.inner.clone()).into()
291
+ pub fn top_k(&self, k: &Self, nulls_last: bool, multithreaded: bool) -> Self {
292
+ self.inner
293
+ .clone()
294
+ .top_k(
295
+ k.inner.clone(),
296
+ SortOptions::default()
297
+ .with_nulls_last(nulls_last)
298
+ .with_maintain_order(multithreaded),
299
+ )
300
+ .into()
293
301
  }
294
302
 
295
- pub fn bottom_k(&self, k: &Self) -> Self {
296
- self.inner.clone().bottom_k(k.inner.clone()).into()
303
+ pub fn bottom_k(&self, k: &Self, nulls_last: bool, multithreaded: bool) -> Self {
304
+ self.inner
305
+ .clone()
306
+ .bottom_k(
307
+ k.inner.clone(),
308
+ SortOptions::default()
309
+ .with_nulls_last(nulls_last)
310
+ .with_maintain_order(multithreaded),
311
+ )
312
+ .into()
297
313
  }
298
314
 
299
315
  pub fn peak_min(&self) -> Self {
@@ -323,9 +339,28 @@ impl RbExpr {
323
339
  self.clone().inner.gather(idx.inner.clone()).into()
324
340
  }
325
341
 
326
- pub fn sort_by(&self, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
342
+ pub fn sort_by(
343
+ &self,
344
+ by: RArray,
345
+ descending: Vec<bool>,
346
+ nulls_last: bool,
347
+ multithreaded: bool,
348
+ maintain_order: bool,
349
+ ) -> RbResult<Self> {
327
350
  let by = rb_exprs_to_exprs(by)?;
328
- Ok(self.clone().inner.sort_by(by, reverse).into())
351
+ Ok(self
352
+ .clone()
353
+ .inner
354
+ .sort_by(
355
+ by,
356
+ SortMultipleOptions {
357
+ descending,
358
+ nulls_last,
359
+ multithreaded,
360
+ maintain_order,
361
+ },
362
+ )
363
+ .into())
329
364
  }
330
365
 
331
366
  pub fn backward_fill(&self, limit: FillNullLimit) -> Self {
@@ -590,8 +625,14 @@ impl RbExpr {
590
625
  self.inner.clone().shrink_dtype().into()
591
626
  }
592
627
 
593
- pub fn map(&self, lambda: Value, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
594
- map_single(self, lambda, output_type, agg_list)
628
+ pub fn map_batches(
629
+ &self,
630
+ lambda: Value,
631
+ output_type: Option<Wrap<DataType>>,
632
+ agg_list: bool,
633
+ is_elementwise: bool,
634
+ ) -> Self {
635
+ map_single(self, lambda, output_type, agg_list, is_elementwise)
595
636
  }
596
637
 
597
638
  pub fn dot(&self, other: &Self) -> Self {
@@ -51,8 +51,12 @@ impl RbExpr {
51
51
  .into()
52
52
  }
53
53
 
54
- pub fn list_get(&self, index: &RbExpr) -> Self {
55
- self.inner.clone().list().get(index.inner.clone()).into()
54
+ pub fn list_get(&self, index: &RbExpr, null_on_oob: bool) -> Self {
55
+ self.inner
56
+ .clone()
57
+ .list()
58
+ .get(index.inner.clone(), null_on_oob)
59
+ .into()
56
60
  }
57
61
 
58
62
  pub fn list_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {