polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -1,11 +1,9 @@
|
|
1
1
|
use magnus::{prelude::*, RString, Value};
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
|
-
use polars::io::mmap::ReaderBytes;
|
4
3
|
use polars::io::RowIndex;
|
5
4
|
use polars::prelude::*;
|
6
5
|
use std::io::{BufWriter, Cursor};
|
7
6
|
use std::num::NonZeroUsize;
|
8
|
-
use std::ops::Deref;
|
9
7
|
|
10
8
|
use super::*;
|
11
9
|
use crate::conversion::*;
|
@@ -93,7 +91,7 @@ impl RbDataFrame {
|
|
93
91
|
.with_projection(projection.map(Arc::new))
|
94
92
|
.with_rechunk(rechunk)
|
95
93
|
.with_chunk_size(chunk_size)
|
96
|
-
.with_columns(columns.map(Arc::
|
94
|
+
.with_columns(columns.map(Arc::from))
|
97
95
|
.with_n_threads(n_threads)
|
98
96
|
.with_schema_overwrite(overwrite_dtype.map(Arc::new))
|
99
97
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
@@ -168,41 +166,53 @@ impl RbDataFrame {
|
|
168
166
|
Ok(RbDataFrame::new(df))
|
169
167
|
}
|
170
168
|
|
171
|
-
pub fn read_json(
|
172
|
-
|
169
|
+
pub fn read_json(
|
170
|
+
rb_f: Value,
|
171
|
+
infer_schema_length: Option<usize>,
|
172
|
+
schema: Option<Wrap<Schema>>,
|
173
|
+
schema_overrides: Option<Wrap<Schema>>,
|
174
|
+
) -> RbResult<Self> {
|
173
175
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
174
|
-
|
175
|
-
let
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if msg.contains("successful parse invalid data") {
|
186
|
-
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
187
|
-
Err(e)
|
188
|
-
} else {
|
189
|
-
let out = JsonReader::new(mmap_bytes_r)
|
190
|
-
.with_json_format(JsonFormat::Json)
|
191
|
-
.finish()
|
192
|
-
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
193
|
-
Ok(out.into())
|
194
|
-
}
|
195
|
-
}
|
176
|
+
|
177
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
178
|
+
.with_json_format(JsonFormat::Json)
|
179
|
+
.infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));
|
180
|
+
|
181
|
+
if let Some(schema) = schema {
|
182
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
183
|
+
}
|
184
|
+
|
185
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
186
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
196
187
|
}
|
188
|
+
|
189
|
+
let out = builder.finish().map_err(RbPolarsErr::from)?;
|
190
|
+
Ok(out.into())
|
197
191
|
}
|
198
192
|
|
199
|
-
pub fn read_ndjson(
|
193
|
+
pub fn read_ndjson(
|
194
|
+
rb_f: Value,
|
195
|
+
ignore_errors: bool,
|
196
|
+
schema: Option<Wrap<Schema>>,
|
197
|
+
schema_overrides: Option<Wrap<Schema>>,
|
198
|
+
) -> RbResult<Self> {
|
200
199
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
201
200
|
|
202
|
-
let
|
201
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
203
202
|
.with_json_format(JsonFormat::JsonLines)
|
203
|
+
.with_ignore_errors(ignore_errors);
|
204
|
+
|
205
|
+
if let Some(schema) = schema {
|
206
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
207
|
+
}
|
208
|
+
|
209
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
210
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
211
|
+
}
|
212
|
+
|
213
|
+
let out = builder
|
204
214
|
.finish()
|
205
|
-
.map_err(|e| RbPolarsErr::other(format!("{
|
215
|
+
.map_err(|e| RbPolarsErr::other(format!("{e}")))?;
|
206
216
|
Ok(out.into())
|
207
217
|
}
|
208
218
|
|
@@ -335,7 +345,7 @@ impl RbDataFrame {
|
|
335
345
|
rb_f: Value,
|
336
346
|
compression: String,
|
337
347
|
compression_level: Option<i32>,
|
338
|
-
statistics:
|
348
|
+
statistics: Wrap<StatisticsOptions>,
|
339
349
|
row_group_size: Option<usize>,
|
340
350
|
data_page_size: Option<usize>,
|
341
351
|
) -> RbResult<()> {
|
@@ -345,7 +355,7 @@ impl RbDataFrame {
|
|
345
355
|
let f = std::fs::File::create(s).unwrap();
|
346
356
|
ParquetWriter::new(f)
|
347
357
|
.with_compression(compression)
|
348
|
-
.with_statistics(statistics)
|
358
|
+
.with_statistics(statistics.0)
|
349
359
|
.with_row_group_size(row_group_size)
|
350
360
|
.with_data_page_size(data_page_size)
|
351
361
|
.finish(&mut self.df.borrow_mut())
|
@@ -354,7 +364,7 @@ impl RbDataFrame {
|
|
354
364
|
let buf = get_file_like(rb_f, true)?;
|
355
365
|
ParquetWriter::new(buf)
|
356
366
|
.with_compression(compression)
|
357
|
-
.with_statistics(statistics)
|
367
|
+
.with_statistics(statistics.0)
|
358
368
|
.with_row_group_size(row_group_size)
|
359
369
|
.with_data_page_size(data_page_size)
|
360
370
|
.finish(&mut self.df.borrow_mut())
|
data/ext/polars/src/error.rs
CHANGED
@@ -2,20 +2,26 @@ use magnus::exception;
|
|
2
2
|
use magnus::Error;
|
3
3
|
use polars::prelude::PolarsError;
|
4
4
|
|
5
|
+
use crate::rb_modules;
|
6
|
+
|
5
7
|
pub struct RbPolarsErr {}
|
6
8
|
|
7
9
|
impl RbPolarsErr {
|
8
10
|
// convert to Error instead of Self
|
9
11
|
pub fn from(e: PolarsError) -> Error {
|
10
|
-
|
12
|
+
match e {
|
13
|
+
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
14
|
+
PolarsError::InvalidOperation(err) => InvalidOperationError::new_err(err.to_string()),
|
15
|
+
_ => Error::new(rb_modules::error(), e.to_string()),
|
16
|
+
}
|
11
17
|
}
|
12
18
|
|
13
19
|
pub fn io(e: std::io::Error) -> Error {
|
14
|
-
Error::new(
|
20
|
+
Error::new(rb_modules::error(), e.to_string())
|
15
21
|
}
|
16
22
|
|
17
23
|
pub fn other(message: String) -> Error {
|
18
|
-
Error::new(
|
24
|
+
Error::new(rb_modules::error(), message)
|
19
25
|
}
|
20
26
|
}
|
21
27
|
|
@@ -35,11 +41,27 @@ impl RbValueError {
|
|
35
41
|
}
|
36
42
|
}
|
37
43
|
|
44
|
+
pub struct RbOverflowError {}
|
45
|
+
|
46
|
+
impl RbOverflowError {
|
47
|
+
pub fn new_err(message: String) -> Error {
|
48
|
+
Error::new(exception::range_error(), message)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
38
52
|
pub struct ComputeError {}
|
39
53
|
|
40
54
|
impl ComputeError {
|
41
55
|
pub fn new_err(message: String) -> Error {
|
42
|
-
Error::new(
|
56
|
+
Error::new(rb_modules::compute_error(), message)
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
pub struct InvalidOperationError {}
|
61
|
+
|
62
|
+
impl InvalidOperationError {
|
63
|
+
pub fn new_err(message: String) -> Error {
|
64
|
+
Error::new(rb_modules::invalid_operation_error(), message)
|
43
65
|
}
|
44
66
|
}
|
45
67
|
|
@@ -1,16 +1,6 @@
|
|
1
|
-
use polars::prelude::*;
|
2
|
-
|
3
|
-
use crate::conversion::Wrap;
|
4
1
|
use crate::RbExpr;
|
5
2
|
|
6
3
|
impl RbExpr {
|
7
|
-
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
9
|
-
.clone()
|
10
|
-
.cast(DataType::Categorical(None, ordering.0))
|
11
|
-
.into()
|
12
|
-
}
|
13
|
-
|
14
4
|
pub fn cat_get_categories(&self) -> Self {
|
15
5
|
self.inner.clone().cat().get_categories().into()
|
16
6
|
}
|
@@ -50,12 +50,8 @@ impl RbExpr {
|
|
50
50
|
.into()
|
51
51
|
}
|
52
52
|
|
53
|
-
pub fn dt_truncate(&self, every: &Self
|
54
|
-
self.inner
|
55
|
-
.clone()
|
56
|
-
.dt()
|
57
|
-
.truncate(every.inner.clone(), offset)
|
58
|
-
.into()
|
53
|
+
pub fn dt_truncate(&self, every: &Self) -> Self {
|
54
|
+
self.inner.clone().dt().truncate(every.inner.clone()).into()
|
59
55
|
}
|
60
56
|
|
61
57
|
pub fn dt_month_start(&self) -> Self {
|
@@ -74,12 +70,8 @@ impl RbExpr {
|
|
74
70
|
self.inner.clone().dt().dst_offset().into()
|
75
71
|
}
|
76
72
|
|
77
|
-
pub fn dt_round(&self, every: &Self
|
78
|
-
self.inner
|
79
|
-
.clone()
|
80
|
-
.dt()
|
81
|
-
.round(every.inner.clone(), &offset)
|
82
|
-
.into()
|
73
|
+
pub fn dt_round(&self, every: &Self) -> Self {
|
74
|
+
self.inner.clone().dt().round(every.inner.clone()).into()
|
83
75
|
}
|
84
76
|
|
85
77
|
pub fn dt_combine(&self, time: &Self, time_unit: Wrap<TimeUnit>) -> Self {
|