polars-df 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -1,13 +1,15 @@
|
|
1
|
-
use magnus::{prelude::*,
|
1
|
+
use magnus::{prelude::*, Value};
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
3
|
use polars::io::RowIndex;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use std::io::
|
5
|
+
use std::io::BufWriter;
|
6
6
|
use std::num::NonZeroUsize;
|
7
7
|
|
8
8
|
use super::*;
|
9
9
|
use crate::conversion::*;
|
10
|
-
use crate::file::{
|
10
|
+
use crate::file::{
|
11
|
+
get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio,
|
12
|
+
};
|
11
13
|
use crate::{RbPolarsErr, RbResult};
|
12
14
|
|
13
15
|
impl RbDataFrame {
|
@@ -29,8 +31,7 @@ impl RbDataFrame {
|
|
29
31
|
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
30
32
|
let path = Option::<String>::try_convert(arguments[13])?;
|
31
33
|
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
32
|
-
|
33
|
-
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
34
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
34
35
|
let low_memory = bool::try_convert(arguments[16])?;
|
35
36
|
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
36
37
|
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
@@ -39,12 +40,11 @@ impl RbDataFrame {
|
|
39
40
|
let try_parse_dates = bool::try_convert(arguments[21])?;
|
40
41
|
let skip_rows_after_header = usize::try_convert(arguments[22])?;
|
41
42
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[23])?;
|
42
|
-
let
|
43
|
-
let
|
44
|
-
let
|
45
|
-
let
|
46
|
-
let
|
47
|
-
let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
|
43
|
+
let eol_char = String::try_convert(arguments[24])?;
|
44
|
+
let raise_if_empty = bool::try_convert(arguments[25])?;
|
45
|
+
let truncate_ragged_lines = bool::try_convert(arguments[26])?;
|
46
|
+
let decimal_comma = bool::try_convert(arguments[27])?;
|
47
|
+
let schema = Option::<Wrap<Schema>>::try_convert(arguments[28])?;
|
48
48
|
// end arguments
|
49
49
|
|
50
50
|
let null_values = null_values.map(|w| w.0);
|
@@ -80,7 +80,8 @@ impl RbDataFrame {
|
|
80
80
|
.collect::<Vec<_>>()
|
81
81
|
});
|
82
82
|
|
83
|
-
let
|
83
|
+
let rb_f = read_if_bytesio(rb_f);
|
84
|
+
let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
|
84
85
|
let df = CsvReadOptions::default()
|
85
86
|
.with_path(path)
|
86
87
|
.with_infer_schema_length(infer_schema_length)
|
@@ -99,7 +100,6 @@ impl RbDataFrame {
|
|
99
100
|
.with_low_memory(low_memory)
|
100
101
|
.with_skip_rows_after_header(skip_rows_after_header)
|
101
102
|
.with_row_index(row_index)
|
102
|
-
.with_sample_size(sample_size)
|
103
103
|
.with_raise_if_empty(raise_if_empty)
|
104
104
|
.with_parse_options(
|
105
105
|
CsvParseOptions::default()
|
@@ -120,59 +120,14 @@ impl RbDataFrame {
|
|
120
120
|
Ok(df.into())
|
121
121
|
}
|
122
122
|
|
123
|
-
#[allow(clippy::too_many_arguments)]
|
124
|
-
pub fn read_parquet(
|
125
|
-
rb_f: Value,
|
126
|
-
columns: Option<Vec<String>>,
|
127
|
-
projection: Option<Vec<usize>>,
|
128
|
-
n_rows: Option<usize>,
|
129
|
-
parallel: Wrap<ParallelStrategy>,
|
130
|
-
row_index: Option<(String, IdxSize)>,
|
131
|
-
low_memory: bool,
|
132
|
-
use_statistics: bool,
|
133
|
-
rechunk: bool,
|
134
|
-
) -> RbResult<Self> {
|
135
|
-
use EitherRustRubyFile::*;
|
136
|
-
|
137
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
138
|
-
name: name.into(),
|
139
|
-
offset,
|
140
|
-
});
|
141
|
-
let result = match get_either_file(rb_f, false)? {
|
142
|
-
Rb(f) => {
|
143
|
-
let buf = f.as_buffer();
|
144
|
-
ParquetReader::new(buf)
|
145
|
-
.with_projection(projection)
|
146
|
-
.with_columns(columns)
|
147
|
-
.read_parallel(parallel.0)
|
148
|
-
.with_slice(n_rows.map(|x| (0, x)))
|
149
|
-
.with_row_index(row_index)
|
150
|
-
.set_low_memory(low_memory)
|
151
|
-
.use_statistics(use_statistics)
|
152
|
-
.set_rechunk(rechunk)
|
153
|
-
.finish()
|
154
|
-
}
|
155
|
-
Rust(f) => ParquetReader::new(f.into_inner())
|
156
|
-
.with_projection(projection)
|
157
|
-
.with_columns(columns)
|
158
|
-
.read_parallel(parallel.0)
|
159
|
-
.with_slice(n_rows.map(|x| (0, x)))
|
160
|
-
.with_row_index(row_index)
|
161
|
-
.use_statistics(use_statistics)
|
162
|
-
.set_rechunk(rechunk)
|
163
|
-
.finish(),
|
164
|
-
};
|
165
|
-
let df = result.map_err(RbPolarsErr::from)?;
|
166
|
-
Ok(RbDataFrame::new(df))
|
167
|
-
}
|
168
|
-
|
169
123
|
pub fn read_json(
|
170
124
|
rb_f: Value,
|
171
125
|
infer_schema_length: Option<usize>,
|
172
126
|
schema: Option<Wrap<Schema>>,
|
173
127
|
schema_overrides: Option<Wrap<Schema>>,
|
174
128
|
) -> RbResult<Self> {
|
175
|
-
let
|
129
|
+
let rb_f = read_if_bytesio(rb_f);
|
130
|
+
let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
|
176
131
|
|
177
132
|
let mut builder = JsonReader::new(mmap_bytes_r)
|
178
133
|
.with_json_format(JsonFormat::Json)
|
@@ -196,7 +151,8 @@ impl RbDataFrame {
|
|
196
151
|
schema: Option<Wrap<Schema>>,
|
197
152
|
schema_overrides: Option<Wrap<Schema>>,
|
198
153
|
) -> RbResult<Self> {
|
199
|
-
let
|
154
|
+
let rb_f = read_if_bytesio(rb_f);
|
155
|
+
let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
|
200
156
|
|
201
157
|
let mut builder = JsonReader::new(mmap_bytes_r)
|
202
158
|
.with_json_format(JsonFormat::JsonLines)
|
@@ -212,7 +168,7 @@ impl RbDataFrame {
|
|
212
168
|
|
213
169
|
let out = builder
|
214
170
|
.finish()
|
215
|
-
.map_err(|e| RbPolarsErr::
|
171
|
+
.map_err(|e| RbPolarsErr::Other(format!("{e}")))?;
|
216
172
|
Ok(out.into())
|
217
173
|
}
|
218
174
|
|
@@ -222,16 +178,16 @@ impl RbDataFrame {
|
|
222
178
|
projection: Option<Vec<usize>>,
|
223
179
|
n_rows: Option<usize>,
|
224
180
|
row_index: Option<(String, IdxSize)>,
|
225
|
-
|
181
|
+
memory_map: bool,
|
226
182
|
) -> RbResult<Self> {
|
227
183
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
228
184
|
name: name.into(),
|
229
185
|
offset,
|
230
186
|
});
|
231
|
-
let
|
187
|
+
let rb_f = read_if_bytesio(rb_f);
|
188
|
+
let (mmap_bytes_r, mmap_path) = get_mmap_bytes_reader_and_path(&rb_f)?;
|
232
189
|
|
233
|
-
|
234
|
-
let mmap_path = None;
|
190
|
+
let mmap_path = if memory_map { mmap_path } else { None };
|
235
191
|
let df = IpcReader::new(mmap_bytes_r)
|
236
192
|
.with_projection(projection)
|
237
193
|
.with_columns(columns)
|
@@ -255,8 +211,8 @@ impl RbDataFrame {
|
|
255
211
|
name: name.into(),
|
256
212
|
offset,
|
257
213
|
});
|
258
|
-
|
259
|
-
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
214
|
+
let rb_f = read_if_bytesio(rb_f);
|
215
|
+
let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
|
260
216
|
let df = IpcStreamReader::new(mmap_bytes_r)
|
261
217
|
.with_projection(projection)
|
262
218
|
.with_columns(columns)
|
@@ -302,41 +258,19 @@ impl RbDataFrame {
|
|
302
258
|
) -> RbResult<()> {
|
303
259
|
let batch_size = batch_size.0;
|
304
260
|
let null = null_value.unwrap_or_default();
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
.with_null_value(null)
|
319
|
-
.finish(&mut self.df.borrow_mut())
|
320
|
-
.map_err(RbPolarsErr::from)?;
|
321
|
-
} else {
|
322
|
-
let mut buf = Cursor::new(Vec::new());
|
323
|
-
CsvWriter::new(&mut buf)
|
324
|
-
.include_header(include_header)
|
325
|
-
.with_separator(separator)
|
326
|
-
.with_quote_char(quote_char)
|
327
|
-
.with_batch_size(batch_size)
|
328
|
-
.with_datetime_format(datetime_format)
|
329
|
-
.with_date_format(date_format)
|
330
|
-
.with_time_format(time_format)
|
331
|
-
.with_float_precision(float_precision)
|
332
|
-
.with_null_value(null)
|
333
|
-
.finish(&mut self.df.borrow_mut())
|
334
|
-
.map_err(RbPolarsErr::from)?;
|
335
|
-
// TODO less copying
|
336
|
-
let rb_str = RString::from_slice(&buf.into_inner());
|
337
|
-
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
338
|
-
}
|
339
|
-
|
261
|
+
let mut buf = get_file_like(rb_f, true)?;
|
262
|
+
CsvWriter::new(&mut buf)
|
263
|
+
.include_header(include_header)
|
264
|
+
.with_separator(separator)
|
265
|
+
.with_quote_char(quote_char)
|
266
|
+
.with_batch_size(batch_size)
|
267
|
+
.with_datetime_format(datetime_format)
|
268
|
+
.with_date_format(date_format)
|
269
|
+
.with_time_format(time_format)
|
270
|
+
.with_float_precision(float_precision)
|
271
|
+
.with_null_value(null)
|
272
|
+
.finish(&mut self.df.borrow_mut())
|
273
|
+
.map_err(RbPolarsErr::from)?;
|
340
274
|
Ok(())
|
341
275
|
}
|
342
276
|
|
@@ -351,26 +285,14 @@ impl RbDataFrame {
|
|
351
285
|
) -> RbResult<()> {
|
352
286
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
353
287
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
.map_err(RbPolarsErr::from)?;
|
363
|
-
} else {
|
364
|
-
let buf = get_file_like(rb_f, true)?;
|
365
|
-
ParquetWriter::new(buf)
|
366
|
-
.with_compression(compression)
|
367
|
-
.with_statistics(statistics.0)
|
368
|
-
.with_row_group_size(row_group_size)
|
369
|
-
.with_data_page_size(data_page_size)
|
370
|
-
.finish(&mut self.df.borrow_mut())
|
371
|
-
.map_err(RbPolarsErr::from)?;
|
372
|
-
}
|
373
|
-
|
288
|
+
let buf = get_file_like(rb_f, true)?;
|
289
|
+
ParquetWriter::new(buf)
|
290
|
+
.with_compression(compression)
|
291
|
+
.with_statistics(statistics.0)
|
292
|
+
.with_row_group_size(row_group_size)
|
293
|
+
.with_data_page_size(data_page_size)
|
294
|
+
.finish(&mut self.df.borrow_mut())
|
295
|
+
.map_err(RbPolarsErr::from)?;
|
374
296
|
Ok(())
|
375
297
|
}
|
376
298
|
|
@@ -386,7 +308,7 @@ impl RbDataFrame {
|
|
386
308
|
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
387
309
|
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
388
310
|
};
|
389
|
-
r.map_err(|e| RbPolarsErr::
|
311
|
+
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
390
312
|
Ok(())
|
391
313
|
}
|
392
314
|
|
@@ -397,7 +319,7 @@ impl RbDataFrame {
|
|
397
319
|
.with_json_format(JsonFormat::JsonLines)
|
398
320
|
.finish(&mut self.df.borrow_mut());
|
399
321
|
|
400
|
-
r.map_err(|e| RbPolarsErr::
|
322
|
+
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
401
323
|
Ok(())
|
402
324
|
}
|
403
325
|
|
@@ -405,23 +327,24 @@ impl RbDataFrame {
|
|
405
327
|
&self,
|
406
328
|
rb_f: Value,
|
407
329
|
compression: Wrap<Option<IpcCompression>>,
|
330
|
+
compat_level: RbCompatLevel,
|
331
|
+
cloud_options: Option<Vec<(String, String)>>,
|
332
|
+
retries: usize,
|
408
333
|
) -> RbResult<()> {
|
409
|
-
if let Ok(
|
410
|
-
let
|
411
|
-
|
412
|
-
.with_compression(compression.0)
|
413
|
-
.finish(&mut self.df.borrow_mut())
|
414
|
-
.map_err(RbPolarsErr::from)?;
|
334
|
+
let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
|
335
|
+
let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
|
336
|
+
Some(cloud_options.with_max_retries(retries))
|
415
337
|
} else {
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
338
|
+
None
|
339
|
+
};
|
340
|
+
|
341
|
+
let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
|
342
|
+
|
343
|
+
IpcWriter::new(f)
|
344
|
+
.with_compression(compression.0)
|
345
|
+
.with_compat_level(compat_level.0)
|
346
|
+
.finish(&mut self.df.borrow_mut())
|
347
|
+
.map_err(RbPolarsErr::from)?;
|
425
348
|
Ok(())
|
426
349
|
}
|
427
350
|
|
@@ -429,21 +352,14 @@ impl RbDataFrame {
|
|
429
352
|
&self,
|
430
353
|
rb_f: Value,
|
431
354
|
compression: Wrap<Option<IpcCompression>>,
|
355
|
+
compat_level: RbCompatLevel,
|
432
356
|
) -> RbResult<()> {
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
} else {
|
440
|
-
let mut buf = get_file_like(rb_f, true)?;
|
441
|
-
|
442
|
-
IpcStreamWriter::new(&mut buf)
|
443
|
-
.with_compression(compression.0)
|
444
|
-
.finish(&mut self.df.borrow_mut())
|
445
|
-
.map_err(RbPolarsErr::from)?;
|
446
|
-
}
|
357
|
+
let mut buf = get_file_like(rb_f, true)?;
|
358
|
+
IpcStreamWriter::new(&mut buf)
|
359
|
+
.with_compression(compression.0)
|
360
|
+
.with_compat_level(compat_level.0)
|
361
|
+
.finish(&mut self.df.borrow_mut())
|
362
|
+
.map_err(RbPolarsErr::from)?;
|
447
363
|
Ok(())
|
448
364
|
}
|
449
365
|
|
@@ -451,23 +367,15 @@ impl RbDataFrame {
|
|
451
367
|
&self,
|
452
368
|
rb_f: Value,
|
453
369
|
compression: Wrap<Option<AvroCompression>>,
|
370
|
+
name: String,
|
454
371
|
) -> RbResult<()> {
|
455
372
|
use polars::io::avro::AvroWriter;
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
.map_err(RbPolarsErr::from)?;
|
463
|
-
} else {
|
464
|
-
let mut buf = get_file_like(rb_f, true)?;
|
465
|
-
AvroWriter::new(&mut buf)
|
466
|
-
.with_compression(compression.0)
|
467
|
-
.finish(&mut self.df.borrow_mut())
|
468
|
-
.map_err(RbPolarsErr::from)?;
|
469
|
-
}
|
470
|
-
|
373
|
+
let mut buf = get_file_like(rb_f, true)?;
|
374
|
+
AvroWriter::new(&mut buf)
|
375
|
+
.with_compression(compression.0)
|
376
|
+
.with_name(name)
|
377
|
+
.finish(&mut self.df.borrow_mut())
|
378
|
+
.map_err(RbPolarsErr::from)?;
|
471
379
|
Ok(())
|
472
380
|
}
|
473
381
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::exceptions::ComputeError;
|
2
|
+
use crate::file::get_file_like;
|
3
|
+
use crate::{RbDataFrame, RbResult};
|
4
|
+
use magnus::Value;
|
5
|
+
use std::io::BufWriter;
|
6
|
+
|
7
|
+
impl RbDataFrame {
|
8
|
+
// TODO add to Ruby
|
9
|
+
pub fn serialize_json(&self, rb_f: Value) -> RbResult<()> {
|
10
|
+
let file = get_file_like(rb_f, true)?;
|
11
|
+
let writer = BufWriter::new(file);
|
12
|
+
serde_json::to_writer(writer, &self.df)
|
13
|
+
.map_err(|err| ComputeError::new_err(err.to_string()))
|
14
|
+
}
|
15
|
+
}
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,67 +1,50 @@
|
|
1
|
-
use
|
1
|
+
use std::fmt::{Debug, Formatter};
|
2
|
+
|
2
3
|
use magnus::Error;
|
3
4
|
use polars::prelude::PolarsError;
|
4
5
|
|
6
|
+
use crate::exceptions::{ComputeError, InvalidOperationError};
|
5
7
|
use crate::rb_modules;
|
6
8
|
|
7
|
-
pub
|
8
|
-
|
9
|
-
|
10
|
-
// convert to Error instead of Self
|
11
|
-
pub fn from(e: PolarsError) -> Error {
|
12
|
-
match e {
|
13
|
-
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
14
|
-
PolarsError::InvalidOperation(err) => InvalidOperationError::new_err(err.to_string()),
|
15
|
-
_ => Error::new(rb_modules::error(), e.to_string()),
|
16
|
-
}
|
17
|
-
}
|
18
|
-
|
19
|
-
pub fn io(e: std::io::Error) -> Error {
|
20
|
-
Error::new(rb_modules::error(), e.to_string())
|
21
|
-
}
|
22
|
-
|
23
|
-
pub fn other(message: String) -> Error {
|
24
|
-
Error::new(rb_modules::error(), message)
|
25
|
-
}
|
26
|
-
}
|
27
|
-
|
28
|
-
pub struct RbTypeError {}
|
29
|
-
|
30
|
-
impl RbTypeError {
|
31
|
-
pub fn new_err(message: String) -> Error {
|
32
|
-
Error::new(exception::type_error(), message)
|
33
|
-
}
|
9
|
+
pub enum RbPolarsErr {
|
10
|
+
Polars(PolarsError),
|
11
|
+
Other(String),
|
34
12
|
}
|
35
13
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
pub fn new_err(message: String) -> Error {
|
40
|
-
Error::new(exception::arg_error(), message)
|
14
|
+
impl From<PolarsError> for RbPolarsErr {
|
15
|
+
fn from(err: PolarsError) -> Self {
|
16
|
+
RbPolarsErr::Polars(err)
|
41
17
|
}
|
42
18
|
}
|
43
19
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
pub fn new_err(message: String) -> Error {
|
48
|
-
Error::new(exception::range_error(), message)
|
20
|
+
impl From<std::io::Error> for RbPolarsErr {
|
21
|
+
fn from(value: std::io::Error) -> Self {
|
22
|
+
RbPolarsErr::Other(format!("{value:?}"))
|
49
23
|
}
|
50
24
|
}
|
51
25
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
26
|
+
impl From<RbPolarsErr> for Error {
|
27
|
+
fn from(err: RbPolarsErr) -> Self {
|
28
|
+
match err {
|
29
|
+
RbPolarsErr::Polars(err) => match err {
|
30
|
+
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
31
|
+
PolarsError::InvalidOperation(err) => {
|
32
|
+
InvalidOperationError::new_err(err.to_string())
|
33
|
+
}
|
34
|
+
_ => Error::new(rb_modules::error(), err.to_string()),
|
35
|
+
},
|
36
|
+
RbPolarsErr::Other(err) => Error::new(rb_modules::error(), err.to_string()),
|
37
|
+
}
|
57
38
|
}
|
58
39
|
}
|
59
40
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
41
|
+
impl Debug for RbPolarsErr {
|
42
|
+
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
43
|
+
use RbPolarsErr::*;
|
44
|
+
match self {
|
45
|
+
Polars(err) => write!(f, "{err:?}"),
|
46
|
+
Other(err) => write!(f, "BindingsError: {err:?}"),
|
47
|
+
}
|
65
48
|
}
|
66
49
|
}
|
67
50
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
use crate::rb_modules;
|
2
|
+
use magnus::{exception, Error};
|
3
|
+
use std::borrow::Cow;
|
4
|
+
|
5
|
+
macro_rules! create_exception {
|
6
|
+
($type:ident, $cls:expr) => {
|
7
|
+
pub struct $type {}
|
8
|
+
|
9
|
+
impl $type {
|
10
|
+
pub fn new_err<T>(message: T) -> Error
|
11
|
+
where
|
12
|
+
T: Into<Cow<'static, str>>,
|
13
|
+
{
|
14
|
+
Error::new($cls, message)
|
15
|
+
}
|
16
|
+
}
|
17
|
+
};
|
18
|
+
}
|
19
|
+
|
20
|
+
create_exception!(RbTypeError, exception::type_error());
|
21
|
+
create_exception!(RbValueError, exception::arg_error());
|
22
|
+
create_exception!(RbOverflowError, exception::range_error());
|
23
|
+
create_exception!(ComputeError, rb_modules::compute_error());
|
24
|
+
create_exception!(InvalidOperationError, rb_modules::invalid_operation_error());
|
@@ -1,5 +1,3 @@
|
|
1
|
-
use polars::prelude::*;
|
2
|
-
|
3
1
|
use crate::RbExpr;
|
4
2
|
|
5
3
|
impl RbExpr {
|
@@ -28,54 +26,18 @@ impl RbExpr {
|
|
28
26
|
}
|
29
27
|
|
30
28
|
pub fn bin_hex_decode(&self, strict: bool) -> Self {
|
31
|
-
self.clone()
|
32
|
-
.inner
|
33
|
-
.map(
|
34
|
-
move |s| {
|
35
|
-
s.binary()?
|
36
|
-
.hex_decode(strict)
|
37
|
-
.map(|s| Some(s.into_series()))
|
38
|
-
},
|
39
|
-
GetOutput::same_type(),
|
40
|
-
)
|
41
|
-
.with_fmt("bin.hex_decode")
|
42
|
-
.into()
|
29
|
+
self.inner.clone().binary().hex_decode(strict).into()
|
43
30
|
}
|
44
31
|
|
45
32
|
pub fn bin_base64_decode(&self, strict: bool) -> Self {
|
46
|
-
self.clone()
|
47
|
-
.inner
|
48
|
-
.map(
|
49
|
-
move |s| {
|
50
|
-
s.binary()?
|
51
|
-
.base64_decode(strict)
|
52
|
-
.map(|s| Some(s.into_series()))
|
53
|
-
},
|
54
|
-
GetOutput::same_type(),
|
55
|
-
)
|
56
|
-
.with_fmt("bin.base64_decode")
|
57
|
-
.into()
|
33
|
+
self.inner.clone().binary().base64_decode(strict).into()
|
58
34
|
}
|
59
35
|
|
60
36
|
pub fn bin_hex_encode(&self) -> Self {
|
61
|
-
self.clone()
|
62
|
-
.inner
|
63
|
-
.map(
|
64
|
-
move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
|
65
|
-
GetOutput::same_type(),
|
66
|
-
)
|
67
|
-
.with_fmt("bin.hex_encode")
|
68
|
-
.into()
|
37
|
+
self.inner.clone().binary().hex_encode().into()
|
69
38
|
}
|
70
39
|
|
71
40
|
pub fn bin_base64_encode(&self) -> Self {
|
72
|
-
self.clone()
|
73
|
-
.inner
|
74
|
-
.map(
|
75
|
-
move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
|
76
|
-
GetOutput::same_type(),
|
77
|
-
)
|
78
|
-
.with_fmt("bin.base64_encode")
|
79
|
-
.into()
|
41
|
+
self.inner.clone().binary().base64_encode().into()
|
80
42
|
}
|
81
43
|
}
|
@@ -13,12 +13,13 @@ impl RbExpr {
|
|
13
13
|
}
|
14
14
|
|
15
15
|
pub fn dt_epoch_seconds(&self) -> Self {
|
16
|
-
self.
|
17
|
-
.
|
16
|
+
self.inner
|
17
|
+
.clone()
|
18
18
|
.map(
|
19
19
|
|s| {
|
20
|
-
s.
|
21
|
-
.
|
20
|
+
s.take_materialized_series()
|
21
|
+
.timestamp(TimeUnit::Milliseconds)
|
22
|
+
.map(|ca| Some((ca / 1000).into_column()))
|
22
23
|
},
|
23
24
|
GetOutput::from_type(DataType::Int64),
|
24
25
|
)
|