polars-df 0.10.0 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +392 -351
  4. data/README.md +6 -6
  5. data/ext/polars/Cargo.toml +12 -7
  6. data/ext/polars/src/batched_csv.rs +53 -52
  7. data/ext/polars/src/conversion/any_value.rs +261 -0
  8. data/ext/polars/src/conversion/chunked_array.rs +4 -4
  9. data/ext/polars/src/conversion/mod.rs +60 -66
  10. data/ext/polars/src/dataframe/construction.rs +184 -0
  11. data/ext/polars/src/dataframe/export.rs +48 -0
  12. data/ext/polars/src/dataframe/general.rs +597 -0
  13. data/ext/polars/src/dataframe/io.rs +473 -0
  14. data/ext/polars/src/dataframe/mod.rs +26 -0
  15. data/ext/polars/src/error.rs +26 -4
  16. data/ext/polars/src/expr/categorical.rs +0 -10
  17. data/ext/polars/src/expr/datetime.rs +4 -8
  18. data/ext/polars/src/expr/general.rs +129 -94
  19. data/ext/polars/src/expr/mod.rs +2 -2
  20. data/ext/polars/src/expr/rolling.rs +201 -77
  21. data/ext/polars/src/expr/string.rs +11 -36
  22. data/ext/polars/src/functions/eager.rs +10 -10
  23. data/ext/polars/src/functions/lazy.rs +23 -21
  24. data/ext/polars/src/functions/range.rs +69 -1
  25. data/ext/polars/src/interop/mod.rs +1 -0
  26. data/ext/polars/src/interop/numo/mod.rs +2 -0
  27. data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
  29. data/ext/polars/src/lazyframe/mod.rs +135 -136
  30. data/ext/polars/src/lib.rs +94 -59
  31. data/ext/polars/src/map/dataframe.rs +2 -2
  32. data/ext/polars/src/map/lazy.rs +5 -25
  33. data/ext/polars/src/map/series.rs +7 -1
  34. data/ext/polars/src/rb_modules.rs +25 -1
  35. data/ext/polars/src/series/aggregation.rs +49 -30
  36. data/ext/polars/src/series/arithmetic.rs +21 -11
  37. data/ext/polars/src/series/construction.rs +56 -38
  38. data/ext/polars/src/series/export.rs +131 -49
  39. data/ext/polars/src/series/mod.rs +32 -141
  40. data/ext/polars/src/sql.rs +3 -1
  41. data/lib/polars/array_expr.rb +4 -4
  42. data/lib/polars/batched_csv_reader.rb +11 -5
  43. data/lib/polars/cat_expr.rb +0 -36
  44. data/lib/polars/cat_name_space.rb +0 -37
  45. data/lib/polars/convert.rb +6 -1
  46. data/lib/polars/data_frame.rb +176 -403
  47. data/lib/polars/data_types.rb +1 -1
  48. data/lib/polars/date_time_expr.rb +525 -572
  49. data/lib/polars/date_time_name_space.rb +263 -460
  50. data/lib/polars/dynamic_group_by.rb +5 -5
  51. data/lib/polars/exceptions.rb +7 -0
  52. data/lib/polars/expr.rb +1394 -243
  53. data/lib/polars/expr_dispatch.rb +1 -1
  54. data/lib/polars/functions/aggregation/horizontal.rb +8 -8
  55. data/lib/polars/functions/as_datatype.rb +63 -40
  56. data/lib/polars/functions/lazy.rb +63 -14
  57. data/lib/polars/functions/lit.rb +1 -1
  58. data/lib/polars/functions/range/date_range.rb +90 -57
  59. data/lib/polars/functions/range/datetime_range.rb +149 -0
  60. data/lib/polars/functions/range/int_range.rb +2 -2
  61. data/lib/polars/functions/range/time_range.rb +141 -0
  62. data/lib/polars/functions/repeat.rb +1 -1
  63. data/lib/polars/functions/whenthen.rb +1 -1
  64. data/lib/polars/group_by.rb +88 -23
  65. data/lib/polars/io/avro.rb +24 -0
  66. data/lib/polars/{io.rb → io/csv.rb} +299 -493
  67. data/lib/polars/io/database.rb +73 -0
  68. data/lib/polars/io/ipc.rb +247 -0
  69. data/lib/polars/io/json.rb +29 -0
  70. data/lib/polars/io/ndjson.rb +80 -0
  71. data/lib/polars/io/parquet.rb +227 -0
  72. data/lib/polars/lazy_frame.rb +143 -272
  73. data/lib/polars/lazy_group_by.rb +100 -3
  74. data/lib/polars/list_expr.rb +11 -11
  75. data/lib/polars/list_name_space.rb +5 -1
  76. data/lib/polars/rolling_group_by.rb +7 -9
  77. data/lib/polars/series.rb +103 -187
  78. data/lib/polars/string_expr.rb +78 -102
  79. data/lib/polars/string_name_space.rb +5 -4
  80. data/lib/polars/testing.rb +2 -2
  81. data/lib/polars/utils/constants.rb +9 -0
  82. data/lib/polars/utils/convert.rb +97 -0
  83. data/lib/polars/utils/parse.rb +89 -0
  84. data/lib/polars/utils/various.rb +76 -0
  85. data/lib/polars/utils/wrap.rb +19 -0
  86. data/lib/polars/utils.rb +8 -300
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars/whenthen.rb +6 -6
  89. data/lib/polars.rb +20 -1
  90. metadata +28 -7
  91. data/ext/polars/src/conversion/anyvalue.rs +0 -186
  92. data/ext/polars/src/dataframe.rs +0 -1208
@@ -0,0 +1,473 @@
1
+ use magnus::{prelude::*, RString, Value};
2
+ use polars::io::avro::AvroCompression;
3
+ use polars::io::RowIndex;
4
+ use polars::prelude::*;
5
+ use std::io::{BufWriter, Cursor};
6
+ use std::num::NonZeroUsize;
7
+
8
+ use super::*;
9
+ use crate::conversion::*;
10
+ use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
11
+ use crate::{RbPolarsErr, RbResult};
12
+
13
+ impl RbDataFrame {
14
+ pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
15
+ // start arguments
16
+ // this pattern is needed for more than 16
17
+ let rb_f = arguments[0];
18
+ let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
19
+ let chunk_size = usize::try_convert(arguments[2])?;
20
+ let has_header = bool::try_convert(arguments[3])?;
21
+ let ignore_errors = bool::try_convert(arguments[4])?;
22
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
23
+ let skip_rows = usize::try_convert(arguments[6])?;
24
+ let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
25
+ let separator = String::try_convert(arguments[8])?;
26
+ let rechunk = bool::try_convert(arguments[9])?;
27
+ let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
28
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
29
+ let n_threads = Option::<usize>::try_convert(arguments[12])?;
30
+ let path = Option::<String>::try_convert(arguments[13])?;
31
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
32
+ // TODO fix
33
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
34
+ let low_memory = bool::try_convert(arguments[16])?;
35
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
36
+ let quote_char = Option::<String>::try_convert(arguments[18])?;
37
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
38
+ let missing_utf8_is_empty_string = bool::try_convert(arguments[20])?;
39
+ let try_parse_dates = bool::try_convert(arguments[21])?;
40
+ let skip_rows_after_header = usize::try_convert(arguments[22])?;
41
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[23])?;
42
+ let sample_size = usize::try_convert(arguments[24])?;
43
+ let eol_char = String::try_convert(arguments[25])?;
44
+ let raise_if_empty = bool::try_convert(arguments[26])?;
45
+ let truncate_ragged_lines = bool::try_convert(arguments[27])?;
46
+ let decimal_comma = bool::try_convert(arguments[28])?;
47
+ let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
48
+ // end arguments
49
+
50
+ let null_values = null_values.map(|w| w.0);
51
+ let eol_char = eol_char.as_bytes()[0];
52
+ let row_index = row_index.map(|(name, offset)| RowIndex {
53
+ name: Arc::from(name.as_str()),
54
+ offset,
55
+ });
56
+ let quote_char = if let Some(s) = quote_char {
57
+ if s.is_empty() {
58
+ None
59
+ } else {
60
+ Some(s.as_bytes()[0])
61
+ }
62
+ } else {
63
+ None
64
+ };
65
+
66
+ let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
67
+ overwrite_dtype
68
+ .iter()
69
+ .map(|(name, dtype)| {
70
+ let dtype = dtype.0.clone();
71
+ Field::new(name, dtype)
72
+ })
73
+ .collect::<Schema>()
74
+ });
75
+
76
+ let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
77
+ overwrite_dtype
78
+ .iter()
79
+ .map(|dt| dt.0.clone())
80
+ .collect::<Vec<_>>()
81
+ });
82
+
83
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
84
+ let df = CsvReadOptions::default()
85
+ .with_path(path)
86
+ .with_infer_schema_length(infer_schema_length)
87
+ .with_has_header(has_header)
88
+ .with_n_rows(n_rows)
89
+ .with_skip_rows(skip_rows)
90
+ .with_ignore_errors(ignore_errors)
91
+ .with_projection(projection.map(Arc::new))
92
+ .with_rechunk(rechunk)
93
+ .with_chunk_size(chunk_size)
94
+ .with_columns(columns.map(Arc::from))
95
+ .with_n_threads(n_threads)
96
+ .with_schema_overwrite(overwrite_dtype.map(Arc::new))
97
+ .with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
98
+ .with_schema(schema.map(|schema| Arc::new(schema.0)))
99
+ .with_low_memory(low_memory)
100
+ .with_skip_rows_after_header(skip_rows_after_header)
101
+ .with_row_index(row_index)
102
+ .with_sample_size(sample_size)
103
+ .with_raise_if_empty(raise_if_empty)
104
+ .with_parse_options(
105
+ CsvParseOptions::default()
106
+ .with_separator(separator.as_bytes()[0])
107
+ .with_encoding(encoding.0)
108
+ .with_missing_is_null(!missing_utf8_is_empty_string)
109
+ .with_comment_prefix(comment_prefix.as_deref())
110
+ .with_null_values(null_values)
111
+ .with_try_parse_dates(try_parse_dates)
112
+ .with_quote_char(quote_char)
113
+ .with_eol_char(eol_char)
114
+ .with_truncate_ragged_lines(truncate_ragged_lines)
115
+ .with_decimal_comma(decimal_comma),
116
+ )
117
+ .into_reader_with_file_handle(mmap_bytes_r)
118
+ .finish()
119
+ .map_err(RbPolarsErr::from)?;
120
+ Ok(df.into())
121
+ }
122
+
123
+ #[allow(clippy::too_many_arguments)]
124
+ pub fn read_parquet(
125
+ rb_f: Value,
126
+ columns: Option<Vec<String>>,
127
+ projection: Option<Vec<usize>>,
128
+ n_rows: Option<usize>,
129
+ parallel: Wrap<ParallelStrategy>,
130
+ row_index: Option<(String, IdxSize)>,
131
+ low_memory: bool,
132
+ use_statistics: bool,
133
+ rechunk: bool,
134
+ ) -> RbResult<Self> {
135
+ use EitherRustRubyFile::*;
136
+
137
+ let row_index = row_index.map(|(name, offset)| RowIndex {
138
+ name: Arc::from(name.as_str()),
139
+ offset,
140
+ });
141
+ let result = match get_either_file(rb_f, false)? {
142
+ Rb(f) => {
143
+ let buf = f.as_buffer();
144
+ ParquetReader::new(buf)
145
+ .with_projection(projection)
146
+ .with_columns(columns)
147
+ .read_parallel(parallel.0)
148
+ .with_n_rows(n_rows)
149
+ .with_row_index(row_index)
150
+ .set_low_memory(low_memory)
151
+ .use_statistics(use_statistics)
152
+ .set_rechunk(rechunk)
153
+ .finish()
154
+ }
155
+ Rust(f) => ParquetReader::new(f.into_inner())
156
+ .with_projection(projection)
157
+ .with_columns(columns)
158
+ .read_parallel(parallel.0)
159
+ .with_n_rows(n_rows)
160
+ .with_row_index(row_index)
161
+ .use_statistics(use_statistics)
162
+ .set_rechunk(rechunk)
163
+ .finish(),
164
+ };
165
+ let df = result.map_err(RbPolarsErr::from)?;
166
+ Ok(RbDataFrame::new(df))
167
+ }
168
+
169
+ pub fn read_json(
170
+ rb_f: Value,
171
+ infer_schema_length: Option<usize>,
172
+ schema: Option<Wrap<Schema>>,
173
+ schema_overrides: Option<Wrap<Schema>>,
174
+ ) -> RbResult<Self> {
175
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
176
+
177
+ let mut builder = JsonReader::new(mmap_bytes_r)
178
+ .with_json_format(JsonFormat::Json)
179
+ .infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));
180
+
181
+ if let Some(schema) = schema {
182
+ builder = builder.with_schema(Arc::new(schema.0));
183
+ }
184
+
185
+ if let Some(schema) = schema_overrides.as_ref() {
186
+ builder = builder.with_schema_overwrite(&schema.0);
187
+ }
188
+
189
+ let out = builder.finish().map_err(RbPolarsErr::from)?;
190
+ Ok(out.into())
191
+ }
192
+
193
+ pub fn read_ndjson(
194
+ rb_f: Value,
195
+ ignore_errors: bool,
196
+ schema: Option<Wrap<Schema>>,
197
+ schema_overrides: Option<Wrap<Schema>>,
198
+ ) -> RbResult<Self> {
199
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
200
+
201
+ let mut builder = JsonReader::new(mmap_bytes_r)
202
+ .with_json_format(JsonFormat::JsonLines)
203
+ .with_ignore_errors(ignore_errors);
204
+
205
+ if let Some(schema) = schema {
206
+ builder = builder.with_schema(Arc::new(schema.0));
207
+ }
208
+
209
+ if let Some(schema) = schema_overrides.as_ref() {
210
+ builder = builder.with_schema_overwrite(&schema.0);
211
+ }
212
+
213
+ let out = builder
214
+ .finish()
215
+ .map_err(|e| RbPolarsErr::other(format!("{e}")))?;
216
+ Ok(out.into())
217
+ }
218
+
219
+ pub fn read_ipc(
220
+ rb_f: Value,
221
+ columns: Option<Vec<String>>,
222
+ projection: Option<Vec<usize>>,
223
+ n_rows: Option<usize>,
224
+ row_index: Option<(String, IdxSize)>,
225
+ _memory_map: bool,
226
+ ) -> RbResult<Self> {
227
+ let row_index = row_index.map(|(name, offset)| RowIndex {
228
+ name: Arc::from(name.as_str()),
229
+ offset,
230
+ });
231
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
232
+
233
+ // TODO fix
234
+ let mmap_path = None;
235
+ let df = IpcReader::new(mmap_bytes_r)
236
+ .with_projection(projection)
237
+ .with_columns(columns)
238
+ .with_n_rows(n_rows)
239
+ .with_row_index(row_index)
240
+ .memory_mapped(mmap_path)
241
+ .finish()
242
+ .map_err(RbPolarsErr::from)?;
243
+ Ok(RbDataFrame::new(df))
244
+ }
245
+
246
+ pub fn read_ipc_stream(
247
+ rb_f: Value,
248
+ columns: Option<Vec<String>>,
249
+ projection: Option<Vec<usize>>,
250
+ n_rows: Option<usize>,
251
+ row_index: Option<(String, IdxSize)>,
252
+ rechunk: bool,
253
+ ) -> RbResult<Self> {
254
+ let row_index = row_index.map(|(name, offset)| RowIndex {
255
+ name: Arc::from(name.as_str()),
256
+ offset,
257
+ });
258
+ // rb_f = read_if_bytesio(rb_f);
259
+ let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
260
+ let df = IpcStreamReader::new(mmap_bytes_r)
261
+ .with_projection(projection)
262
+ .with_columns(columns)
263
+ .with_n_rows(n_rows)
264
+ .with_row_index(row_index)
265
+ .set_rechunk(rechunk)
266
+ .finish()
267
+ .map_err(RbPolarsErr::from)?;
268
+ Ok(RbDataFrame::new(df))
269
+ }
270
+
271
+ pub fn read_avro(
272
+ rb_f: Value,
273
+ columns: Option<Vec<String>>,
274
+ projection: Option<Vec<usize>>,
275
+ n_rows: Option<usize>,
276
+ ) -> RbResult<Self> {
277
+ use polars::io::avro::AvroReader;
278
+
279
+ let file = get_file_like(rb_f, false)?;
280
+ let df = AvroReader::new(file)
281
+ .with_projection(projection)
282
+ .with_columns(columns)
283
+ .with_n_rows(n_rows)
284
+ .finish()
285
+ .map_err(RbPolarsErr::from)?;
286
+ Ok(RbDataFrame::new(df))
287
+ }
288
+
289
+ #[allow(clippy::too_many_arguments)]
290
+ pub fn write_csv(
291
+ &self,
292
+ rb_f: Value,
293
+ include_header: bool,
294
+ separator: u8,
295
+ quote_char: u8,
296
+ batch_size: Wrap<NonZeroUsize>,
297
+ datetime_format: Option<String>,
298
+ date_format: Option<String>,
299
+ time_format: Option<String>,
300
+ float_precision: Option<usize>,
301
+ null_value: Option<String>,
302
+ ) -> RbResult<()> {
303
+ let batch_size = batch_size.0;
304
+ let null = null_value.unwrap_or_default();
305
+
306
+ if let Ok(s) = String::try_convert(rb_f) {
307
+ let f = std::fs::File::create(s).unwrap();
308
+ // no need for a buffered writer, because the csv writer does internal buffering
309
+ CsvWriter::new(f)
310
+ .include_header(include_header)
311
+ .with_separator(separator)
312
+ .with_quote_char(quote_char)
313
+ .with_batch_size(batch_size)
314
+ .with_datetime_format(datetime_format)
315
+ .with_date_format(date_format)
316
+ .with_time_format(time_format)
317
+ .with_float_precision(float_precision)
318
+ .with_null_value(null)
319
+ .finish(&mut self.df.borrow_mut())
320
+ .map_err(RbPolarsErr::from)?;
321
+ } else {
322
+ let mut buf = Cursor::new(Vec::new());
323
+ CsvWriter::new(&mut buf)
324
+ .include_header(include_header)
325
+ .with_separator(separator)
326
+ .with_quote_char(quote_char)
327
+ .with_batch_size(batch_size)
328
+ .with_datetime_format(datetime_format)
329
+ .with_date_format(date_format)
330
+ .with_time_format(time_format)
331
+ .with_float_precision(float_precision)
332
+ .with_null_value(null)
333
+ .finish(&mut self.df.borrow_mut())
334
+ .map_err(RbPolarsErr::from)?;
335
+ // TODO less copying
336
+ let rb_str = RString::from_slice(&buf.into_inner());
337
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
338
+ }
339
+
340
+ Ok(())
341
+ }
342
+
343
+ pub fn write_parquet(
344
+ &self,
345
+ rb_f: Value,
346
+ compression: String,
347
+ compression_level: Option<i32>,
348
+ statistics: Wrap<StatisticsOptions>,
349
+ row_group_size: Option<usize>,
350
+ data_page_size: Option<usize>,
351
+ ) -> RbResult<()> {
352
+ let compression = parse_parquet_compression(&compression, compression_level)?;
353
+
354
+ if let Ok(s) = String::try_convert(rb_f) {
355
+ let f = std::fs::File::create(s).unwrap();
356
+ ParquetWriter::new(f)
357
+ .with_compression(compression)
358
+ .with_statistics(statistics.0)
359
+ .with_row_group_size(row_group_size)
360
+ .with_data_page_size(data_page_size)
361
+ .finish(&mut self.df.borrow_mut())
362
+ .map_err(RbPolarsErr::from)?;
363
+ } else {
364
+ let buf = get_file_like(rb_f, true)?;
365
+ ParquetWriter::new(buf)
366
+ .with_compression(compression)
367
+ .with_statistics(statistics.0)
368
+ .with_row_group_size(row_group_size)
369
+ .with_data_page_size(data_page_size)
370
+ .finish(&mut self.df.borrow_mut())
371
+ .map_err(RbPolarsErr::from)?;
372
+ }
373
+
374
+ Ok(())
375
+ }
376
+
377
+ pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
378
+ let file = BufWriter::new(get_file_like(rb_f, true)?);
379
+
380
+ let r = match (pretty, row_oriented) {
381
+ (_, true) => JsonWriter::new(file)
382
+ .with_json_format(JsonFormat::Json)
383
+ .finish(&mut self.df.borrow_mut()),
384
+ (true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
385
+ .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
386
+ (false, _) => serde_json::to_writer(file, &*self.df.borrow())
387
+ .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
388
+ };
389
+ r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
390
+ Ok(())
391
+ }
392
+
393
+ pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
394
+ let file = BufWriter::new(get_file_like(rb_f, true)?);
395
+
396
+ let r = JsonWriter::new(file)
397
+ .with_json_format(JsonFormat::JsonLines)
398
+ .finish(&mut self.df.borrow_mut());
399
+
400
+ r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
401
+ Ok(())
402
+ }
403
+
404
+ pub fn write_ipc(
405
+ &self,
406
+ rb_f: Value,
407
+ compression: Wrap<Option<IpcCompression>>,
408
+ ) -> RbResult<()> {
409
+ if let Ok(s) = String::try_convert(rb_f) {
410
+ let f = std::fs::File::create(s).unwrap();
411
+ IpcWriter::new(f)
412
+ .with_compression(compression.0)
413
+ .finish(&mut self.df.borrow_mut())
414
+ .map_err(RbPolarsErr::from)?;
415
+ } else {
416
+ let mut buf = Cursor::new(Vec::new());
417
+ IpcWriter::new(&mut buf)
418
+ .with_compression(compression.0)
419
+ .finish(&mut self.df.borrow_mut())
420
+ .map_err(RbPolarsErr::from)?;
421
+ // TODO less copying
422
+ let rb_str = RString::from_slice(&buf.into_inner());
423
+ rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
424
+ }
425
+ Ok(())
426
+ }
427
+
428
+ pub fn write_ipc_stream(
429
+ &self,
430
+ rb_f: Value,
431
+ compression: Wrap<Option<IpcCompression>>,
432
+ ) -> RbResult<()> {
433
+ if let Ok(s) = String::try_convert(rb_f) {
434
+ let f = std::fs::File::create(s).unwrap();
435
+ IpcStreamWriter::new(f)
436
+ .with_compression(compression.0)
437
+ .finish(&mut self.df.borrow_mut())
438
+ .map_err(RbPolarsErr::from)?
439
+ } else {
440
+ let mut buf = get_file_like(rb_f, true)?;
441
+
442
+ IpcStreamWriter::new(&mut buf)
443
+ .with_compression(compression.0)
444
+ .finish(&mut self.df.borrow_mut())
445
+ .map_err(RbPolarsErr::from)?;
446
+ }
447
+ Ok(())
448
+ }
449
+
450
+ pub fn write_avro(
451
+ &self,
452
+ rb_f: Value,
453
+ compression: Wrap<Option<AvroCompression>>,
454
+ ) -> RbResult<()> {
455
+ use polars::io::avro::AvroWriter;
456
+
457
+ if let Ok(s) = String::try_convert(rb_f) {
458
+ let f = std::fs::File::create(s).unwrap();
459
+ AvroWriter::new(f)
460
+ .with_compression(compression.0)
461
+ .finish(&mut self.df.borrow_mut())
462
+ .map_err(RbPolarsErr::from)?;
463
+ } else {
464
+ let mut buf = get_file_like(rb_f, true)?;
465
+ AvroWriter::new(&mut buf)
466
+ .with_compression(compression.0)
467
+ .finish(&mut self.df.borrow_mut())
468
+ .map_err(RbPolarsErr::from)?;
469
+ }
470
+
471
+ Ok(())
472
+ }
473
+ }
@@ -0,0 +1,26 @@
1
+ mod construction;
2
+ mod export;
3
+ mod general;
4
+ mod io;
5
+
6
+ use polars::prelude::*;
7
+ use std::cell::RefCell;
8
+
9
+ #[magnus::wrap(class = "Polars::RbDataFrame")]
10
+ pub struct RbDataFrame {
11
+ pub df: RefCell<DataFrame>,
12
+ }
13
+
14
+ impl From<DataFrame> for RbDataFrame {
15
+ fn from(df: DataFrame) -> Self {
16
+ RbDataFrame::new(df)
17
+ }
18
+ }
19
+
20
+ impl RbDataFrame {
21
+ pub fn new(df: DataFrame) -> Self {
22
+ RbDataFrame {
23
+ df: RefCell::new(df),
24
+ }
25
+ }
26
+ }
@@ -2,20 +2,26 @@ use magnus::exception;
2
2
  use magnus::Error;
3
3
  use polars::prelude::PolarsError;
4
4
 
5
+ use crate::rb_modules;
6
+
5
7
  pub struct RbPolarsErr {}
6
8
 
7
9
  impl RbPolarsErr {
8
10
  // convert to Error instead of Self
9
11
  pub fn from(e: PolarsError) -> Error {
10
- Error::new(exception::runtime_error(), e.to_string())
12
+ match e {
13
+ PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
14
+ PolarsError::InvalidOperation(err) => InvalidOperationError::new_err(err.to_string()),
15
+ _ => Error::new(rb_modules::error(), e.to_string()),
16
+ }
11
17
  }
12
18
 
13
19
  pub fn io(e: std::io::Error) -> Error {
14
- Error::new(exception::runtime_error(), e.to_string())
20
+ Error::new(rb_modules::error(), e.to_string())
15
21
  }
16
22
 
17
23
  pub fn other(message: String) -> Error {
18
- Error::new(exception::runtime_error(), message)
24
+ Error::new(rb_modules::error(), message)
19
25
  }
20
26
  }
21
27
 
@@ -35,11 +41,27 @@ impl RbValueError {
35
41
  }
36
42
  }
37
43
 
44
+ pub struct RbOverflowError {}
45
+
46
+ impl RbOverflowError {
47
+ pub fn new_err(message: String) -> Error {
48
+ Error::new(exception::range_error(), message)
49
+ }
50
+ }
51
+
38
52
  pub struct ComputeError {}
39
53
 
40
54
  impl ComputeError {
41
55
  pub fn new_err(message: String) -> Error {
42
- Error::new(exception::runtime_error(), message)
56
+ Error::new(rb_modules::compute_error(), message)
57
+ }
58
+ }
59
+
60
+ pub struct InvalidOperationError {}
61
+
62
+ impl InvalidOperationError {
63
+ pub fn new_err(message: String) -> Error {
64
+ Error::new(rb_modules::invalid_operation_error(), message)
43
65
  }
44
66
  }
45
67
 
@@ -1,16 +1,6 @@
1
- use polars::prelude::*;
2
-
3
- use crate::conversion::Wrap;
4
1
  use crate::RbExpr;
5
2
 
6
3
  impl RbExpr {
7
- pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
8
- self.inner
9
- .clone()
10
- .cast(DataType::Categorical(None, ordering.0))
11
- .into()
12
- }
13
-
14
4
  pub fn cat_get_categories(&self) -> Self {
15
5
  self.inner.clone().cat().get_categories().into()
16
6
  }
@@ -50,12 +50,8 @@ impl RbExpr {
50
50
  .into()
51
51
  }
52
52
 
53
- pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
54
- self.inner
55
- .clone()
56
- .dt()
57
- .truncate(every.inner.clone(), offset)
58
- .into()
53
+ pub fn dt_truncate(&self, every: &Self) -> Self {
54
+ self.inner.clone().dt().truncate(every.inner.clone()).into()
59
55
  }
60
56
 
61
57
  pub fn dt_month_start(&self) -> Self {
@@ -74,8 +70,8 @@ impl RbExpr {
74
70
  self.inner.clone().dt().dst_offset().into()
75
71
  }
76
72
 
77
- pub fn dt_round(&self, every: String, offset: String) -> Self {
78
- self.inner.clone().dt().round(&every, &offset).into()
73
+ pub fn dt_round(&self, every: &Self) -> Self {
74
+ self.inner.clone().dt().round(every.inner.clone()).into()
79
75
  }
80
76
 
81
77
  pub fn dt_combine(&self, time: &Self, time_unit: Wrap<TimeUnit>) -> Self {