polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -23,28 +23,29 @@ impl RbDataFrame {
|
|
|
23
23
|
let ignore_errors = bool::try_convert(arguments[4])?;
|
|
24
24
|
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
|
25
25
|
let skip_rows = usize::try_convert(arguments[6])?;
|
|
26
|
-
let
|
|
27
|
-
let
|
|
28
|
-
let
|
|
29
|
-
let
|
|
30
|
-
let
|
|
31
|
-
let
|
|
32
|
-
let
|
|
33
|
-
let
|
|
34
|
-
let
|
|
35
|
-
let
|
|
36
|
-
let
|
|
37
|
-
let
|
|
38
|
-
let
|
|
39
|
-
let
|
|
40
|
-
let
|
|
41
|
-
let
|
|
42
|
-
let
|
|
43
|
-
let
|
|
44
|
-
let
|
|
45
|
-
let
|
|
46
|
-
let
|
|
47
|
-
let
|
|
26
|
+
let skip_lines = usize::try_convert(arguments[7])?;
|
|
27
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[8])?;
|
|
28
|
+
let separator = String::try_convert(arguments[9])?;
|
|
29
|
+
let rechunk = bool::try_convert(arguments[10])?;
|
|
30
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[11])?;
|
|
31
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[12])?;
|
|
32
|
+
let n_threads = Option::<usize>::try_convert(arguments[13])?;
|
|
33
|
+
let path = Option::<String>::try_convert(arguments[14])?;
|
|
34
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[15])?;
|
|
35
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[16])?;
|
|
36
|
+
let low_memory = bool::try_convert(arguments[17])?;
|
|
37
|
+
let comment_prefix = Option::<String>::try_convert(arguments[18])?;
|
|
38
|
+
let quote_char = Option::<String>::try_convert(arguments[19])?;
|
|
39
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[20])?;
|
|
40
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[21])?;
|
|
41
|
+
let try_parse_dates = bool::try_convert(arguments[22])?;
|
|
42
|
+
let skip_rows_after_header = usize::try_convert(arguments[23])?;
|
|
43
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[24])?;
|
|
44
|
+
let eol_char = String::try_convert(arguments[25])?;
|
|
45
|
+
let raise_if_empty = bool::try_convert(arguments[26])?;
|
|
46
|
+
let truncate_ragged_lines = bool::try_convert(arguments[27])?;
|
|
47
|
+
let decimal_comma = bool::try_convert(arguments[28])?;
|
|
48
|
+
let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
|
|
48
49
|
// end arguments
|
|
49
50
|
|
|
50
51
|
let null_values = null_values.map(|w| w.0);
|
|
@@ -88,6 +89,7 @@ impl RbDataFrame {
|
|
|
88
89
|
.with_has_header(has_header)
|
|
89
90
|
.with_n_rows(n_rows)
|
|
90
91
|
.with_skip_rows(skip_rows)
|
|
92
|
+
.with_skip_lines(skip_lines)
|
|
91
93
|
.with_ignore_errors(ignore_errors)
|
|
92
94
|
.with_projection(projection.map(Arc::new))
|
|
93
95
|
.with_rechunk(rechunk)
|
|
@@ -145,33 +147,6 @@ impl RbDataFrame {
|
|
|
145
147
|
Ok(out.into())
|
|
146
148
|
}
|
|
147
149
|
|
|
148
|
-
pub fn read_ndjson(
|
|
149
|
-
rb_f: Value,
|
|
150
|
-
ignore_errors: bool,
|
|
151
|
-
schema: Option<Wrap<Schema>>,
|
|
152
|
-
schema_overrides: Option<Wrap<Schema>>,
|
|
153
|
-
) -> RbResult<Self> {
|
|
154
|
-
let rb_f = read_if_bytesio(rb_f);
|
|
155
|
-
let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
|
|
156
|
-
|
|
157
|
-
let mut builder = JsonReader::new(mmap_bytes_r)
|
|
158
|
-
.with_json_format(JsonFormat::JsonLines)
|
|
159
|
-
.with_ignore_errors(ignore_errors);
|
|
160
|
-
|
|
161
|
-
if let Some(schema) = schema {
|
|
162
|
-
builder = builder.with_schema(Arc::new(schema.0));
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
if let Some(schema) = schema_overrides.as_ref() {
|
|
166
|
-
builder = builder.with_schema_overwrite(&schema.0);
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
let out = builder
|
|
170
|
-
.finish()
|
|
171
|
-
.map_err(|e| RbPolarsErr::Other(format!("{e}")))?;
|
|
172
|
-
Ok(out.into())
|
|
173
|
-
}
|
|
174
|
-
|
|
175
150
|
pub fn read_ipc(
|
|
176
151
|
rb_f: Value,
|
|
177
152
|
columns: Option<Vec<String>>,
|
|
@@ -242,101 +217,12 @@ impl RbDataFrame {
|
|
|
242
217
|
Ok(RbDataFrame::new(df))
|
|
243
218
|
}
|
|
244
219
|
|
|
245
|
-
#[allow(clippy::too_many_arguments)]
|
|
246
|
-
pub fn write_csv(
|
|
247
|
-
&self,
|
|
248
|
-
rb_f: Value,
|
|
249
|
-
include_header: bool,
|
|
250
|
-
separator: u8,
|
|
251
|
-
quote_char: u8,
|
|
252
|
-
batch_size: NonZeroUsize,
|
|
253
|
-
datetime_format: Option<String>,
|
|
254
|
-
date_format: Option<String>,
|
|
255
|
-
time_format: Option<String>,
|
|
256
|
-
float_precision: Option<usize>,
|
|
257
|
-
null_value: Option<String>,
|
|
258
|
-
) -> RbResult<()> {
|
|
259
|
-
let null = null_value.unwrap_or_default();
|
|
260
|
-
let mut buf = get_file_like(rb_f, true)?;
|
|
261
|
-
CsvWriter::new(&mut buf)
|
|
262
|
-
.include_header(include_header)
|
|
263
|
-
.with_separator(separator)
|
|
264
|
-
.with_quote_char(quote_char)
|
|
265
|
-
.with_batch_size(batch_size)
|
|
266
|
-
.with_datetime_format(datetime_format)
|
|
267
|
-
.with_date_format(date_format)
|
|
268
|
-
.with_time_format(time_format)
|
|
269
|
-
.with_float_precision(float_precision)
|
|
270
|
-
.with_null_value(null)
|
|
271
|
-
.finish(&mut self.df.borrow_mut())
|
|
272
|
-
.map_err(RbPolarsErr::from)?;
|
|
273
|
-
Ok(())
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
pub fn write_parquet(
|
|
277
|
-
&self,
|
|
278
|
-
rb_f: Value,
|
|
279
|
-
compression: String,
|
|
280
|
-
compression_level: Option<i32>,
|
|
281
|
-
statistics: Wrap<StatisticsOptions>,
|
|
282
|
-
row_group_size: Option<usize>,
|
|
283
|
-
data_page_size: Option<usize>,
|
|
284
|
-
) -> RbResult<()> {
|
|
285
|
-
let compression = parse_parquet_compression(&compression, compression_level)?;
|
|
286
|
-
|
|
287
|
-
let buf = get_file_like(rb_f, true)?;
|
|
288
|
-
ParquetWriter::new(buf)
|
|
289
|
-
.with_compression(compression)
|
|
290
|
-
.with_statistics(statistics.0)
|
|
291
|
-
.with_row_group_size(row_group_size)
|
|
292
|
-
.with_data_page_size(data_page_size)
|
|
293
|
-
.finish(&mut self.df.borrow_mut())
|
|
294
|
-
.map_err(RbPolarsErr::from)?;
|
|
295
|
-
Ok(())
|
|
296
|
-
}
|
|
297
|
-
|
|
298
220
|
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
|
299
221
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
|
300
222
|
|
|
301
223
|
JsonWriter::new(file)
|
|
302
224
|
.with_json_format(JsonFormat::Json)
|
|
303
|
-
.finish(&mut self.df.
|
|
304
|
-
.map_err(RbPolarsErr::from)?;
|
|
305
|
-
Ok(())
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
|
309
|
-
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
|
310
|
-
|
|
311
|
-
JsonWriter::new(file)
|
|
312
|
-
.with_json_format(JsonFormat::JsonLines)
|
|
313
|
-
.finish(&mut self.df.borrow_mut())
|
|
314
|
-
.map_err(RbPolarsErr::from)?;
|
|
315
|
-
|
|
316
|
-
Ok(())
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
pub fn write_ipc(
|
|
320
|
-
&self,
|
|
321
|
-
rb_f: Value,
|
|
322
|
-
compression: Wrap<Option<IpcCompression>>,
|
|
323
|
-
compat_level: RbCompatLevel,
|
|
324
|
-
cloud_options: Option<Vec<(String, String)>>,
|
|
325
|
-
retries: usize,
|
|
326
|
-
) -> RbResult<()> {
|
|
327
|
-
let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
|
|
328
|
-
let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
|
|
329
|
-
Some(cloud_options.with_max_retries(retries))
|
|
330
|
-
} else {
|
|
331
|
-
None
|
|
332
|
-
};
|
|
333
|
-
|
|
334
|
-
let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
|
|
335
|
-
|
|
336
|
-
IpcWriter::new(f)
|
|
337
|
-
.with_compression(compression.0)
|
|
338
|
-
.with_compat_level(compat_level.0)
|
|
339
|
-
.finish(&mut self.df.borrow_mut())
|
|
225
|
+
.finish(&mut self.df.write())
|
|
340
226
|
.map_err(RbPolarsErr::from)?;
|
|
341
227
|
Ok(())
|
|
342
228
|
}
|
|
@@ -351,7 +237,7 @@ impl RbDataFrame {
|
|
|
351
237
|
IpcStreamWriter::new(&mut buf)
|
|
352
238
|
.with_compression(compression.0)
|
|
353
239
|
.with_compat_level(compat_level.0)
|
|
354
|
-
.finish(&mut self.df.
|
|
240
|
+
.finish(&mut self.df.write())
|
|
355
241
|
.map_err(RbPolarsErr::from)?;
|
|
356
242
|
Ok(())
|
|
357
243
|
}
|
|
@@ -367,7 +253,7 @@ impl RbDataFrame {
|
|
|
367
253
|
AvroWriter::new(&mut buf)
|
|
368
254
|
.with_compression(compression.0)
|
|
369
255
|
.with_name(name)
|
|
370
|
-
.finish(&mut self.df.
|
|
256
|
+
.finish(&mut self.df.write())
|
|
371
257
|
.map_err(RbPolarsErr::from)?;
|
|
372
258
|
Ok(())
|
|
373
259
|
}
|
|
@@ -5,27 +5,35 @@ mod io;
|
|
|
5
5
|
mod serde;
|
|
6
6
|
|
|
7
7
|
use magnus::{DataTypeFunctions, TypedData, gc};
|
|
8
|
+
use parking_lot::RwLock;
|
|
8
9
|
use polars::prelude::*;
|
|
9
|
-
use std::cell::RefCell;
|
|
10
10
|
|
|
11
11
|
use crate::series::mark_series;
|
|
12
12
|
|
|
13
13
|
#[derive(TypedData)]
|
|
14
14
|
#[magnus(class = "Polars::RbDataFrame", mark)]
|
|
15
15
|
pub struct RbDataFrame {
|
|
16
|
-
pub df:
|
|
16
|
+
pub df: RwLock<DataFrame>,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
impl Clone for RbDataFrame {
|
|
20
|
+
fn clone(&self) -> Self {
|
|
21
|
+
RbDataFrame {
|
|
22
|
+
df: RwLock::new(self.df.read().clone()),
|
|
23
|
+
}
|
|
24
|
+
}
|
|
17
25
|
}
|
|
18
26
|
|
|
19
27
|
impl From<DataFrame> for RbDataFrame {
|
|
20
28
|
fn from(df: DataFrame) -> Self {
|
|
21
|
-
|
|
29
|
+
Self::new(df)
|
|
22
30
|
}
|
|
23
31
|
}
|
|
24
32
|
|
|
25
33
|
impl RbDataFrame {
|
|
26
34
|
pub fn new(df: DataFrame) -> Self {
|
|
27
35
|
RbDataFrame {
|
|
28
|
-
df:
|
|
36
|
+
df: RwLock::new(df),
|
|
29
37
|
}
|
|
30
38
|
}
|
|
31
39
|
}
|
|
@@ -36,7 +44,7 @@ impl DataTypeFunctions for RbDataFrame {
|
|
|
36
44
|
// currently, this should only happen for write_* methods,
|
|
37
45
|
// which should refuse to write Object datatype, and therefore be safe,
|
|
38
46
|
// since GC will not have a chance to run
|
|
39
|
-
if let
|
|
47
|
+
if let Some(df) = self.df.try_read() {
|
|
40
48
|
for column in df.get_columns() {
|
|
41
49
|
if let DataType::Object(_) = column.dtype() {
|
|
42
50
|
match column {
|
data/ext/polars/src/error.rs
CHANGED
|
@@ -3,11 +3,17 @@ use std::fmt::{Debug, Formatter};
|
|
|
3
3
|
use magnus::Error;
|
|
4
4
|
use polars::prelude::PolarsError;
|
|
5
5
|
|
|
6
|
-
use crate::
|
|
7
|
-
use crate::
|
|
6
|
+
use crate::RbErr;
|
|
7
|
+
use crate::exceptions::{
|
|
8
|
+
AssertionError, ColumnNotFoundError, ComputeError, DuplicateError, InvalidOperationError,
|
|
9
|
+
NoDataError, OutOfBoundsError, RbIOError, RbRuntimeError, SQLInterfaceError, SQLSyntaxError,
|
|
10
|
+
SchemaError, SchemaFieldNotFoundError, ShapeError, StringCacheMismatchError,
|
|
11
|
+
StructFieldNotFoundError,
|
|
12
|
+
};
|
|
8
13
|
|
|
9
14
|
pub enum RbPolarsErr {
|
|
10
15
|
Polars(PolarsError),
|
|
16
|
+
Ruby(RbErr),
|
|
11
17
|
Other(String),
|
|
12
18
|
}
|
|
13
19
|
|
|
@@ -17,9 +23,9 @@ impl From<PolarsError> for RbPolarsErr {
|
|
|
17
23
|
}
|
|
18
24
|
}
|
|
19
25
|
|
|
20
|
-
impl From<
|
|
21
|
-
fn from(
|
|
22
|
-
RbPolarsErr::
|
|
26
|
+
impl From<RbErr> for RbPolarsErr {
|
|
27
|
+
fn from(err: RbErr) -> Self {
|
|
28
|
+
RbPolarsErr::Ruby(err)
|
|
23
29
|
}
|
|
24
30
|
}
|
|
25
31
|
|
|
@@ -27,13 +33,43 @@ impl From<RbPolarsErr> for Error {
|
|
|
27
33
|
fn from(err: RbPolarsErr) -> Self {
|
|
28
34
|
match err {
|
|
29
35
|
RbPolarsErr::Polars(err) => match err {
|
|
36
|
+
PolarsError::AssertionError(err) => AssertionError::new_err(err.to_string()),
|
|
37
|
+
PolarsError::ColumnNotFound(name) => ColumnNotFoundError::new_err(name.to_string()),
|
|
30
38
|
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
|
39
|
+
PolarsError::Duplicate(err) => DuplicateError::new_err(err.to_string()),
|
|
31
40
|
PolarsError::InvalidOperation(err) => {
|
|
32
41
|
InvalidOperationError::new_err(err.to_string())
|
|
33
42
|
}
|
|
34
|
-
|
|
43
|
+
PolarsError::IO { error, msg } => {
|
|
44
|
+
let msg = if let Some(msg) = msg {
|
|
45
|
+
msg.to_string()
|
|
46
|
+
} else {
|
|
47
|
+
error.to_string()
|
|
48
|
+
};
|
|
49
|
+
RbIOError::new_err(msg)
|
|
50
|
+
}
|
|
51
|
+
PolarsError::NoData(err) => NoDataError::new_err(err.to_string()),
|
|
52
|
+
PolarsError::OutOfBounds(err) => OutOfBoundsError::new_err(err.to_string()),
|
|
53
|
+
PolarsError::SQLInterface(name) => SQLInterfaceError::new_err(name.to_string()),
|
|
54
|
+
PolarsError::SQLSyntax(name) => SQLSyntaxError::new_err(name.to_string()),
|
|
55
|
+
PolarsError::SchemaFieldNotFound(name) => {
|
|
56
|
+
SchemaFieldNotFoundError::new_err(name.to_string())
|
|
57
|
+
}
|
|
58
|
+
PolarsError::SchemaMismatch(err) => SchemaError::new_err(err.to_string()),
|
|
59
|
+
PolarsError::ShapeMismatch(err) => ShapeError::new_err(err.to_string()),
|
|
60
|
+
PolarsError::StringCacheMismatch(err) => {
|
|
61
|
+
StringCacheMismatchError::new_err(err.to_string())
|
|
62
|
+
}
|
|
63
|
+
PolarsError::StructFieldNotFound(name) => {
|
|
64
|
+
StructFieldNotFoundError::new_err(name.to_string())
|
|
65
|
+
}
|
|
66
|
+
PolarsError::Context { .. } => {
|
|
67
|
+
let tmp = RbPolarsErr::Polars(err.context_trace());
|
|
68
|
+
RbErr::from(tmp)
|
|
69
|
+
}
|
|
35
70
|
},
|
|
36
|
-
RbPolarsErr::
|
|
71
|
+
RbPolarsErr::Ruby(err) => err,
|
|
72
|
+
err => RbRuntimeError::new_err(format!("{:?}", &err)),
|
|
37
73
|
}
|
|
38
74
|
}
|
|
39
75
|
}
|
|
@@ -43,6 +79,7 @@ impl Debug for RbPolarsErr {
|
|
|
43
79
|
use RbPolarsErr::*;
|
|
44
80
|
match self {
|
|
45
81
|
Polars(err) => write!(f, "{err:?}"),
|
|
82
|
+
Ruby(err) => write!(f, "{err:?}"),
|
|
46
83
|
Other(err) => write!(f, "BindingsError: {err:?}"),
|
|
47
84
|
}
|
|
48
85
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
use crate::rb_modules;
|
|
2
|
-
use magnus::{Error, Ruby};
|
|
2
|
+
use magnus::{Error, Module, Ruby};
|
|
3
3
|
use std::borrow::Cow;
|
|
4
4
|
|
|
5
5
|
macro_rules! create_exception {
|
|
6
|
-
($type:ident
|
|
6
|
+
($type:ident) => {
|
|
7
7
|
pub struct $type {}
|
|
8
8
|
|
|
9
9
|
impl $type {
|
|
@@ -11,18 +11,51 @@ macro_rules! create_exception {
|
|
|
11
11
|
where
|
|
12
12
|
T: Into<Cow<'static, str>>,
|
|
13
13
|
{
|
|
14
|
-
|
|
14
|
+
let ruby = Ruby::get().unwrap();
|
|
15
|
+
let cls = rb_modules::polars(&ruby)
|
|
16
|
+
.const_get(stringify!($type))
|
|
17
|
+
.unwrap();
|
|
18
|
+
Error::new(cls, message)
|
|
15
19
|
}
|
|
16
20
|
}
|
|
17
21
|
};
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
create_exception!(
|
|
21
|
-
create_exception!(
|
|
22
|
-
create_exception!(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
);
|
|
26
|
-
create_exception!(
|
|
27
|
-
create_exception!(
|
|
28
|
-
create_exception!(
|
|
24
|
+
create_exception!(AssertionError);
|
|
25
|
+
create_exception!(ColumnNotFoundError);
|
|
26
|
+
create_exception!(ComputeError);
|
|
27
|
+
create_exception!(DuplicateError);
|
|
28
|
+
create_exception!(InvalidOperationError);
|
|
29
|
+
create_exception!(NoDataError);
|
|
30
|
+
create_exception!(OutOfBoundsError);
|
|
31
|
+
create_exception!(SQLInterfaceError);
|
|
32
|
+
create_exception!(SQLSyntaxError);
|
|
33
|
+
create_exception!(SchemaError);
|
|
34
|
+
create_exception!(SchemaFieldNotFoundError);
|
|
35
|
+
create_exception!(ShapeError);
|
|
36
|
+
create_exception!(StringCacheMismatchError);
|
|
37
|
+
create_exception!(StructFieldNotFoundError);
|
|
38
|
+
|
|
39
|
+
macro_rules! create_ruby_exception {
|
|
40
|
+
($type:ident, $cls:ident) => {
|
|
41
|
+
pub struct $type {}
|
|
42
|
+
|
|
43
|
+
impl $type {
|
|
44
|
+
pub fn new_err<T>(message: T) -> Error
|
|
45
|
+
where
|
|
46
|
+
T: Into<Cow<'static, str>>,
|
|
47
|
+
{
|
|
48
|
+
let cls = Ruby::get().unwrap().$cls();
|
|
49
|
+
Error::new(cls, message)
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
create_ruby_exception!(RbKeyboardInterrupt, exception_interrupt);
|
|
56
|
+
create_ruby_exception!(RbIndexError, exception_index_error);
|
|
57
|
+
create_ruby_exception!(RbIOError, exception_io_error);
|
|
58
|
+
create_ruby_exception!(RbOverflowError, exception_range_error);
|
|
59
|
+
create_ruby_exception!(RbRuntimeError, exception_runtime_error);
|
|
60
|
+
create_ruby_exception!(RbTypeError, exception_type_error);
|
|
61
|
+
create_ruby_exception!(RbValueError, exception_arg_error);
|
|
@@ -159,4 +159,16 @@ impl RbExpr {
|
|
|
159
159
|
pub fn arr_explode(&self) -> Self {
|
|
160
160
|
self.inner.clone().arr().explode().into()
|
|
161
161
|
}
|
|
162
|
+
|
|
163
|
+
pub fn arr_eval(&self, expr: &RbExpr, as_list: bool) -> Self {
|
|
164
|
+
self.inner
|
|
165
|
+
.clone()
|
|
166
|
+
.arr()
|
|
167
|
+
.eval(expr.inner.clone(), as_list)
|
|
168
|
+
.into()
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
pub fn arr_agg(&self, expr: &RbExpr) -> Self {
|
|
172
|
+
self.inner.clone().arr().agg(expr.inner.clone()).into()
|
|
173
|
+
}
|
|
162
174
|
}
|
|
@@ -26,8 +26,8 @@ impl RbDataTypeExpr {
|
|
|
26
26
|
DataTypeExpr::OfExpr(Box::new(expr.inner.clone())).into()
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
pub fn collect_dtype(ruby: &Ruby,
|
|
30
|
-
let dtype =
|
|
29
|
+
pub fn collect_dtype(ruby: &Ruby, self_: &Self, schema: Wrap<Schema>) -> RbResult<Value> {
|
|
30
|
+
let dtype = self_
|
|
31
31
|
.clone()
|
|
32
32
|
.inner
|
|
33
33
|
.into_datatype(&schema.0)
|
|
@@ -26,10 +26,6 @@ impl RbExpr {
|
|
|
26
26
|
self.inner.clone().dt().offset_by(by.inner.clone()).into()
|
|
27
27
|
}
|
|
28
28
|
|
|
29
|
-
pub fn dt_with_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
|
|
30
|
-
self.inner.clone().dt().with_time_unit(tu.0).into()
|
|
31
|
-
}
|
|
32
|
-
|
|
33
29
|
pub fn dt_convert_time_zone(&self, time_zone: String) -> RbResult<Self> {
|
|
34
30
|
Ok(self
|
|
35
31
|
.inner
|
|
@@ -90,7 +86,6 @@ impl RbExpr {
|
|
|
90
86
|
self.inner.clone().dt().round(every.inner.clone()).into()
|
|
91
87
|
}
|
|
92
88
|
|
|
93
|
-
#[allow(clippy::too_many_arguments)]
|
|
94
89
|
pub fn dt_replace(
|
|
95
90
|
&self,
|
|
96
91
|
year: &Self,
|
|
@@ -162,6 +157,10 @@ impl RbExpr {
|
|
|
162
157
|
self.clone().inner.dt().month().into()
|
|
163
158
|
}
|
|
164
159
|
|
|
160
|
+
pub fn dt_days_in_month(&self) -> Self {
|
|
161
|
+
self.inner.clone().dt().days_in_month().into()
|
|
162
|
+
}
|
|
163
|
+
|
|
165
164
|
pub fn dt_week(&self) -> Self {
|
|
166
165
|
self.clone().inner.dt().week().into()
|
|
167
166
|
}
|
|
@@ -4,12 +4,14 @@ use magnus::{RArray, Value};
|
|
|
4
4
|
use polars::lazy::dsl;
|
|
5
5
|
use polars::prelude::*;
|
|
6
6
|
use polars::series::ops::NullBehavior;
|
|
7
|
+
use polars_core::chunked_array::cast::CastOptions;
|
|
7
8
|
use polars_core::series::IsSorted;
|
|
8
9
|
|
|
10
|
+
use super::datatype::RbDataTypeExpr;
|
|
9
11
|
use super::selector::RbSelector;
|
|
10
12
|
use crate::conversion::{Wrap, parse_fill_null_strategy};
|
|
13
|
+
use crate::expr::ToExprs;
|
|
11
14
|
use crate::map::lazy::map_single;
|
|
12
|
-
use crate::rb_exprs_to_exprs;
|
|
13
15
|
use crate::{RbExpr, RbPolarsErr, RbResult};
|
|
14
16
|
|
|
15
17
|
impl RbExpr {
|
|
@@ -161,6 +163,10 @@ impl RbExpr {
|
|
|
161
163
|
self.inner.clone().last().into()
|
|
162
164
|
}
|
|
163
165
|
|
|
166
|
+
pub fn item(&self, allow_empty: bool) -> Self {
|
|
167
|
+
self.inner.clone().item(allow_empty).into()
|
|
168
|
+
}
|
|
169
|
+
|
|
164
170
|
pub fn implode(&self) -> Self {
|
|
165
171
|
self.inner.clone().implode().into()
|
|
166
172
|
}
|
|
@@ -254,14 +260,20 @@ impl RbExpr {
|
|
|
254
260
|
self.inner.clone().null_count().into()
|
|
255
261
|
}
|
|
256
262
|
|
|
257
|
-
pub fn cast(&self,
|
|
258
|
-
let
|
|
259
|
-
|
|
260
|
-
|
|
263
|
+
pub fn cast(&self, dtype: &RbDataTypeExpr, strict: bool, wrap_numerical: bool) -> Self {
|
|
264
|
+
let options = if wrap_numerical {
|
|
265
|
+
CastOptions::Overflowing
|
|
266
|
+
} else if strict {
|
|
267
|
+
CastOptions::Strict
|
|
261
268
|
} else {
|
|
262
|
-
|
|
269
|
+
CastOptions::NonStrict
|
|
263
270
|
};
|
|
264
|
-
|
|
271
|
+
|
|
272
|
+
let expr = self
|
|
273
|
+
.inner
|
|
274
|
+
.clone()
|
|
275
|
+
.cast_with_options(dtype.inner.clone(), options);
|
|
276
|
+
expr.into()
|
|
265
277
|
}
|
|
266
278
|
|
|
267
279
|
pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
|
|
@@ -286,7 +298,7 @@ impl RbExpr {
|
|
|
286
298
|
}
|
|
287
299
|
|
|
288
300
|
pub fn top_k_by(&self, by: RArray, k: &Self, reverse: Vec<bool>) -> RbResult<Self> {
|
|
289
|
-
let by =
|
|
301
|
+
let by = by.to_exprs()?;
|
|
290
302
|
Ok(self
|
|
291
303
|
.inner
|
|
292
304
|
.clone()
|
|
@@ -299,7 +311,7 @@ impl RbExpr {
|
|
|
299
311
|
}
|
|
300
312
|
|
|
301
313
|
pub fn bottom_k_by(&self, by: RArray, k: &Self, reverse: Vec<bool>) -> RbResult<Self> {
|
|
302
|
-
let by =
|
|
314
|
+
let by = by.to_exprs()?;
|
|
303
315
|
Ok(self
|
|
304
316
|
.inner
|
|
305
317
|
.clone()
|
|
@@ -355,7 +367,7 @@ impl RbExpr {
|
|
|
355
367
|
multithreaded: bool,
|
|
356
368
|
maintain_order: bool,
|
|
357
369
|
) -> RbResult<Self> {
|
|
358
|
-
let by =
|
|
370
|
+
let by = by.to_exprs()?;
|
|
359
371
|
Ok(self
|
|
360
372
|
.inner
|
|
361
373
|
.clone()
|
|
@@ -585,9 +597,33 @@ impl RbExpr {
|
|
|
585
597
|
self.inner.clone().is_duplicated().into()
|
|
586
598
|
}
|
|
587
599
|
|
|
588
|
-
pub fn over(
|
|
589
|
-
|
|
590
|
-
|
|
600
|
+
pub fn over(
|
|
601
|
+
&self,
|
|
602
|
+
partition_by: Option<RArray>,
|
|
603
|
+
order_by: Option<RArray>,
|
|
604
|
+
order_by_descending: bool,
|
|
605
|
+
order_by_nulls_last: bool,
|
|
606
|
+
mapping_strategy: Wrap<WindowMapping>,
|
|
607
|
+
) -> RbResult<Self> {
|
|
608
|
+
let partition_by = partition_by.map(|v| v.to_exprs()).transpose()?;
|
|
609
|
+
|
|
610
|
+
let order_by = order_by.map(|v| v.to_exprs()).transpose()?.map(|order_by| {
|
|
611
|
+
(
|
|
612
|
+
order_by,
|
|
613
|
+
SortOptions {
|
|
614
|
+
descending: order_by_descending,
|
|
615
|
+
nulls_last: order_by_nulls_last,
|
|
616
|
+
..Default::default()
|
|
617
|
+
},
|
|
618
|
+
)
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
Ok(self
|
|
622
|
+
.inner
|
|
623
|
+
.clone()
|
|
624
|
+
.over_with_options(partition_by, order_by, mapping_strategy.0)
|
|
625
|
+
.map_err(RbPolarsErr::from)?
|
|
626
|
+
.into())
|
|
591
627
|
}
|
|
592
628
|
|
|
593
629
|
pub fn rolling(
|
data/ext/polars/src/expr/list.rs
CHANGED
|
@@ -47,6 +47,10 @@ impl RbExpr {
|
|
|
47
47
|
self.inner.clone().list().eval(expr.inner.clone()).into()
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
+
pub fn list_agg(&self, expr: &RbExpr) -> Self {
|
|
51
|
+
self.inner.clone().list().agg(expr.inner.clone()).into()
|
|
52
|
+
}
|
|
53
|
+
|
|
50
54
|
pub fn list_filter(&self, predicate: &RbExpr) -> Self {
|
|
51
55
|
self.inner
|
|
52
56
|
.clone()
|
data/ext/polars/src/expr/meta.rs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
use magnus::{RArray, Ruby};
|
|
2
2
|
use polars::prelude::Schema;
|
|
3
3
|
|
|
4
|
+
use crate::expr::ToRbExprs;
|
|
4
5
|
use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
|
|
5
6
|
|
|
6
7
|
impl RbExpr {
|
|
@@ -8,15 +9,15 @@ impl RbExpr {
|
|
|
8
9
|
self.inner == other.inner
|
|
9
10
|
}
|
|
10
11
|
|
|
11
|
-
pub fn meta_pop(ruby: &Ruby,
|
|
12
|
+
pub fn meta_pop(ruby: &Ruby, self_: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
|
|
12
13
|
let schema = schema.as_ref().map(|s| &s.0);
|
|
13
|
-
let exprs =
|
|
14
|
+
let exprs = self_
|
|
14
15
|
.inner
|
|
15
16
|
.clone()
|
|
16
17
|
.meta()
|
|
17
18
|
.pop(schema)
|
|
18
19
|
.map_err(RbPolarsErr::from)?;
|
|
19
|
-
Ok(
|
|
20
|
+
Ok(exprs.to_rbexprs(ruby))
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
pub fn meta_root_names(&self) -> Vec<String> {
|
|
@@ -83,4 +84,8 @@ impl RbExpr {
|
|
|
83
84
|
pub fn meta_tree_format(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
|
|
84
85
|
self.compute_tree_format(false, schema)
|
|
85
86
|
}
|
|
87
|
+
|
|
88
|
+
pub fn meta_show_graph(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
|
|
89
|
+
self.compute_tree_format(true, schema)
|
|
90
|
+
}
|
|
86
91
|
}
|