polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
@@ -1,11 +1,9 @@
|
|
1
1
|
use magnus::{prelude::*, RString, Value};
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
|
-
use polars::io::mmap::ReaderBytes;
|
4
3
|
use polars::io::RowIndex;
|
5
4
|
use polars::prelude::*;
|
6
5
|
use std::io::{BufWriter, Cursor};
|
7
6
|
use std::num::NonZeroUsize;
|
8
|
-
use std::ops::Deref;
|
9
7
|
|
10
8
|
use super::*;
|
11
9
|
use crate::conversion::*;
|
@@ -93,7 +91,7 @@ impl RbDataFrame {
|
|
93
91
|
.with_projection(projection.map(Arc::new))
|
94
92
|
.with_rechunk(rechunk)
|
95
93
|
.with_chunk_size(chunk_size)
|
96
|
-
.with_columns(columns.map(Arc::
|
94
|
+
.with_columns(columns.map(Arc::from))
|
97
95
|
.with_n_threads(n_threads)
|
98
96
|
.with_schema_overwrite(overwrite_dtype.map(Arc::new))
|
99
97
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
@@ -168,41 +166,53 @@ impl RbDataFrame {
|
|
168
166
|
Ok(RbDataFrame::new(df))
|
169
167
|
}
|
170
168
|
|
171
|
-
pub fn read_json(
|
172
|
-
|
169
|
+
pub fn read_json(
|
170
|
+
rb_f: Value,
|
171
|
+
infer_schema_length: Option<usize>,
|
172
|
+
schema: Option<Wrap<Schema>>,
|
173
|
+
schema_overrides: Option<Wrap<Schema>>,
|
174
|
+
) -> RbResult<Self> {
|
173
175
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
174
|
-
|
175
|
-
let
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
if msg.contains("successful parse invalid data") {
|
186
|
-
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
187
|
-
Err(e)
|
188
|
-
} else {
|
189
|
-
let out = JsonReader::new(mmap_bytes_r)
|
190
|
-
.with_json_format(JsonFormat::Json)
|
191
|
-
.finish()
|
192
|
-
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
193
|
-
Ok(out.into())
|
194
|
-
}
|
195
|
-
}
|
176
|
+
|
177
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
178
|
+
.with_json_format(JsonFormat::Json)
|
179
|
+
.infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));
|
180
|
+
|
181
|
+
if let Some(schema) = schema {
|
182
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
183
|
+
}
|
184
|
+
|
185
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
186
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
196
187
|
}
|
188
|
+
|
189
|
+
let out = builder.finish().map_err(RbPolarsErr::from)?;
|
190
|
+
Ok(out.into())
|
197
191
|
}
|
198
192
|
|
199
|
-
pub fn read_ndjson(
|
193
|
+
pub fn read_ndjson(
|
194
|
+
rb_f: Value,
|
195
|
+
ignore_errors: bool,
|
196
|
+
schema: Option<Wrap<Schema>>,
|
197
|
+
schema_overrides: Option<Wrap<Schema>>,
|
198
|
+
) -> RbResult<Self> {
|
200
199
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
201
200
|
|
202
|
-
let
|
201
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
203
202
|
.with_json_format(JsonFormat::JsonLines)
|
203
|
+
.with_ignore_errors(ignore_errors);
|
204
|
+
|
205
|
+
if let Some(schema) = schema {
|
206
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
207
|
+
}
|
208
|
+
|
209
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
210
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
211
|
+
}
|
212
|
+
|
213
|
+
let out = builder
|
204
214
|
.finish()
|
205
|
-
.map_err(|e| RbPolarsErr::other(format!("{
|
215
|
+
.map_err(|e| RbPolarsErr::other(format!("{e}")))?;
|
206
216
|
Ok(out.into())
|
207
217
|
}
|
208
218
|
|
@@ -335,7 +345,7 @@ impl RbDataFrame {
|
|
335
345
|
rb_f: Value,
|
336
346
|
compression: String,
|
337
347
|
compression_level: Option<i32>,
|
338
|
-
statistics:
|
348
|
+
statistics: Wrap<StatisticsOptions>,
|
339
349
|
row_group_size: Option<usize>,
|
340
350
|
data_page_size: Option<usize>,
|
341
351
|
) -> RbResult<()> {
|
@@ -345,7 +355,7 @@ impl RbDataFrame {
|
|
345
355
|
let f = std::fs::File::create(s).unwrap();
|
346
356
|
ParquetWriter::new(f)
|
347
357
|
.with_compression(compression)
|
348
|
-
.with_statistics(statistics)
|
358
|
+
.with_statistics(statistics.0)
|
349
359
|
.with_row_group_size(row_group_size)
|
350
360
|
.with_data_page_size(data_page_size)
|
351
361
|
.finish(&mut self.df.borrow_mut())
|
@@ -354,7 +364,7 @@ impl RbDataFrame {
|
|
354
364
|
let buf = get_file_like(rb_f, true)?;
|
355
365
|
ParquetWriter::new(buf)
|
356
366
|
.with_compression(compression)
|
357
|
-
.with_statistics(statistics)
|
367
|
+
.with_statistics(statistics.0)
|
358
368
|
.with_row_group_size(row_group_size)
|
359
369
|
.with_data_page_size(data_page_size)
|
360
370
|
.finish(&mut self.df.borrow_mut())
|
data/ext/polars/src/error.rs
CHANGED
@@ -2,20 +2,26 @@ use magnus::exception;
|
|
2
2
|
use magnus::Error;
|
3
3
|
use polars::prelude::PolarsError;
|
4
4
|
|
5
|
+
use crate::rb_modules;
|
6
|
+
|
5
7
|
pub struct RbPolarsErr {}
|
6
8
|
|
7
9
|
impl RbPolarsErr {
|
8
10
|
// convert to Error instead of Self
|
9
11
|
pub fn from(e: PolarsError) -> Error {
|
10
|
-
|
12
|
+
match e {
|
13
|
+
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
14
|
+
PolarsError::InvalidOperation(err) => InvalidOperationError::new_err(err.to_string()),
|
15
|
+
_ => Error::new(rb_modules::error(), e.to_string()),
|
16
|
+
}
|
11
17
|
}
|
12
18
|
|
13
19
|
pub fn io(e: std::io::Error) -> Error {
|
14
|
-
Error::new(
|
20
|
+
Error::new(rb_modules::error(), e.to_string())
|
15
21
|
}
|
16
22
|
|
17
23
|
pub fn other(message: String) -> Error {
|
18
|
-
Error::new(
|
24
|
+
Error::new(rb_modules::error(), message)
|
19
25
|
}
|
20
26
|
}
|
21
27
|
|
@@ -35,11 +41,27 @@ impl RbValueError {
|
|
35
41
|
}
|
36
42
|
}
|
37
43
|
|
44
|
+
pub struct RbOverflowError {}
|
45
|
+
|
46
|
+
impl RbOverflowError {
|
47
|
+
pub fn new_err(message: String) -> Error {
|
48
|
+
Error::new(exception::range_error(), message)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
38
52
|
pub struct ComputeError {}
|
39
53
|
|
40
54
|
impl ComputeError {
|
41
55
|
pub fn new_err(message: String) -> Error {
|
42
|
-
Error::new(
|
56
|
+
Error::new(rb_modules::compute_error(), message)
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
pub struct InvalidOperationError {}
|
61
|
+
|
62
|
+
impl InvalidOperationError {
|
63
|
+
pub fn new_err(message: String) -> Error {
|
64
|
+
Error::new(rb_modules::invalid_operation_error(), message)
|
43
65
|
}
|
44
66
|
}
|
45
67
|
|
@@ -1,16 +1,6 @@
|
|
1
|
-
use polars::prelude::*;
|
2
|
-
|
3
|
-
use crate::conversion::Wrap;
|
4
1
|
use crate::RbExpr;
|
5
2
|
|
6
3
|
impl RbExpr {
|
7
|
-
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
9
|
-
.clone()
|
10
|
-
.cast(DataType::Categorical(None, ordering.0))
|
11
|
-
.into()
|
12
|
-
}
|
13
|
-
|
14
4
|
pub fn cat_get_categories(&self) -> Self {
|
15
5
|
self.inner.clone().cat().get_categories().into()
|
16
6
|
}
|
@@ -50,12 +50,8 @@ impl RbExpr {
|
|
50
50
|
.into()
|
51
51
|
}
|
52
52
|
|
53
|
-
pub fn dt_truncate(&self, every: &Self
|
54
|
-
self.inner
|
55
|
-
.clone()
|
56
|
-
.dt()
|
57
|
-
.truncate(every.inner.clone(), offset)
|
58
|
-
.into()
|
53
|
+
pub fn dt_truncate(&self, every: &Self) -> Self {
|
54
|
+
self.inner.clone().dt().truncate(every.inner.clone()).into()
|
59
55
|
}
|
60
56
|
|
61
57
|
pub fn dt_month_start(&self) -> Self {
|
@@ -74,12 +70,8 @@ impl RbExpr {
|
|
74
70
|
self.inner.clone().dt().dst_offset().into()
|
75
71
|
}
|
76
72
|
|
77
|
-
pub fn dt_round(&self, every: &Self
|
78
|
-
self.inner
|
79
|
-
.clone()
|
80
|
-
.dt()
|
81
|
-
.round(every.inner.clone(), &offset)
|
82
|
-
.into()
|
73
|
+
pub fn dt_round(&self, every: &Self) -> Self {
|
74
|
+
self.inner.clone().dt().round(every.inner.clone()).into()
|
83
75
|
}
|
84
76
|
|
85
77
|
pub fn dt_combine(&self, time: &Self, time_unit: Wrap<TimeUnit>) -> Self {
|