polars-df 0.2.5 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/README.md +40 -2
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +56 -17
- data/ext/polars/src/dataframe.rs +65 -43
- data/ext/polars/src/error.rs +16 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +33 -29
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +67 -53
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +165 -209
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +79 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +61 -19
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +5 -3
data/Cargo.toml
CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
6
|
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/README.md
CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
|
|
282
282
|
|
283
283
|
## Conversion
|
284
284
|
|
285
|
-
Array of
|
285
|
+
Array of hashes
|
286
286
|
|
287
287
|
```ruby
|
288
|
-
df.rows
|
288
|
+
df.rows(named: true)
|
289
289
|
```
|
290
290
|
|
291
291
|
Hash of series
|
@@ -308,6 +308,12 @@ Parquet
|
|
308
308
|
df.write_parquet("file.parquet")
|
309
309
|
```
|
310
310
|
|
311
|
+
Numo array
|
312
|
+
|
313
|
+
```ruby
|
314
|
+
df.to_numo
|
315
|
+
```
|
316
|
+
|
311
317
|
## Types
|
312
318
|
|
313
319
|
You can specify column types when creating a data frame
|
@@ -343,6 +349,38 @@ Cast a column
|
|
343
349
|
df["a"].cast(Polars::Int32)
|
344
350
|
```
|
345
351
|
|
352
|
+
## Visualization
|
353
|
+
|
354
|
+
Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
|
355
|
+
|
356
|
+
```ruby
|
357
|
+
gem "vega"
|
358
|
+
```
|
359
|
+
|
360
|
+
And use:
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
df.plot("a", "b")
|
364
|
+
```
|
365
|
+
|
366
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
df.plot("a", "b", type: "pie")
|
370
|
+
```
|
371
|
+
|
372
|
+
Group data
|
373
|
+
|
374
|
+
```ruby
|
375
|
+
df.groupby("c").plot("a", "b")
|
376
|
+
```
|
377
|
+
|
378
|
+
Stacked columns or bars
|
379
|
+
|
380
|
+
```ruby
|
381
|
+
df.groupby("c").plot("a", "b", stacked: true)
|
382
|
+
```
|
383
|
+
|
346
384
|
## History
|
347
385
|
|
348
386
|
View the [changelog](CHANGELOG.md)
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.3.1"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
-
magnus = "0.
|
15
|
-
polars-core = "0.
|
14
|
+
magnus = "0.5"
|
15
|
+
polars-core = "0.27.0"
|
16
16
|
serde_json = "1"
|
17
17
|
|
18
18
|
[dependencies.polars]
|
19
|
-
version = "0.
|
19
|
+
version = "0.27.0"
|
20
20
|
features = [
|
21
21
|
"abs",
|
22
22
|
"arange",
|
23
23
|
"arg_where",
|
24
24
|
"asof_join",
|
25
25
|
"avro",
|
26
|
+
"binary_encoding",
|
26
27
|
"concat_str",
|
27
28
|
"cse",
|
28
29
|
"csv-file",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RArray, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
4
|
use polars_core::series::SeriesIter;
|
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
|
|
27
27
|
|
28
28
|
for _ in 0..df.height() {
|
29
29
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
30
|
-
let arg = (iter
|
30
|
+
let arg = (RArray::from_iter(iter),);
|
31
31
|
let out: Value = lambda.funcall("call", arg)?;
|
32
32
|
|
33
33
|
if out.is_nil() {
|
@@ -141,7 +141,7 @@ where
|
|
141
141
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
142
|
((init_null_count + skip)..df.height()).map(move |_| {
|
143
143
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
144
|
-
let tpl = (iter
|
144
|
+
let tpl = (RArray::from_iter(iter),);
|
145
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
146
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
147
147
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
|
|
158
158
|
) -> ChunkedArray<D>
|
159
159
|
where
|
160
160
|
D: RbArrowPrimitiveType,
|
161
|
-
D::Native:
|
161
|
+
D::Native: IntoValue + TryConvert,
|
162
162
|
{
|
163
163
|
let skip = usize::from(first_value.is_some());
|
164
164
|
if init_null_count == df.height() {
|
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
216
216
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
217
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
218
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
219
|
-
let tpl = (iter
|
219
|
+
let tpl = (RArray::from_iter(iter),);
|
220
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
221
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
222
222
|
Ok(val) => val
|
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
254
254
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
255
|
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
256
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
257
|
-
let tpl = (iter
|
257
|
+
let tpl = (RArray::from_iter(iter),);
|
258
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
259
259
|
Ok(val) => {
|
260
260
|
match val.try_convert::<RArray>().ok() {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RHash, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RHash, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
|
|
85
85
|
) -> RbResult<ChunkedArray<D>>
|
86
86
|
where
|
87
87
|
D: RbArrowPrimitiveType,
|
88
|
-
D::Native:
|
88
|
+
D::Native: IntoValue + TryConvert;
|
89
89
|
|
90
90
|
/// Apply a lambda with a boolean output type
|
91
91
|
fn apply_lambda_with_bool_out_type(
|
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
|
|
130
130
|
|
131
131
|
pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
|
132
132
|
where
|
133
|
-
T:
|
133
|
+
T: IntoValue,
|
134
134
|
{
|
135
135
|
lambda.funcall("call", (in_val,))
|
136
136
|
}
|
137
137
|
|
138
138
|
pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
|
139
139
|
where
|
140
|
-
T:
|
140
|
+
T: IntoValue,
|
141
141
|
S: TryConvert,
|
142
142
|
{
|
143
143
|
match call_lambda(lambda, in_val) {
|
@@ -148,7 +148,7 @@ where
|
|
148
148
|
|
149
149
|
fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
|
150
150
|
where
|
151
|
-
T:
|
151
|
+
T: IntoValue,
|
152
152
|
{
|
153
153
|
let out: Value = lambda.funcall("call", (in_val,))?;
|
154
154
|
let py_series: Value = out.funcall("_s", ())?;
|
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
216
216
|
) -> RbResult<ChunkedArray<D>>
|
217
217
|
where
|
218
218
|
D: RbArrowPrimitiveType,
|
219
|
-
D::Native:
|
219
|
+
D::Native: IntoValue + TryConvert,
|
220
220
|
{
|
221
221
|
let skip = usize::from(first_value.is_some());
|
222
222
|
if init_null_count == self.len() {
|
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
435
435
|
impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
|
436
436
|
where
|
437
437
|
T: RbArrowPrimitiveType + PolarsNumericType,
|
438
|
-
T::Native:
|
438
|
+
T::Native: IntoValue + TryConvert,
|
439
439
|
ChunkedArray<T>: IntoSeries,
|
440
440
|
{
|
441
441
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
@@ -493,7 +493,7 @@ where
|
|
493
493
|
) -> RbResult<ChunkedArray<D>>
|
494
494
|
where
|
495
495
|
D: RbArrowPrimitiveType,
|
496
|
-
D::Native:
|
496
|
+
D::Native: IntoValue + TryConvert,
|
497
497
|
{
|
498
498
|
let skip = usize::from(first_value.is_some());
|
499
499
|
if init_null_count == self.len() {
|
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
765
765
|
) -> RbResult<ChunkedArray<D>>
|
766
766
|
where
|
767
767
|
D: RbArrowPrimitiveType,
|
768
|
-
D::Native:
|
768
|
+
D::Native: IntoValue + TryConvert,
|
769
769
|
{
|
770
770
|
let skip = usize::from(first_value.is_some());
|
771
771
|
if init_null_count == self.len() {
|
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1036
1036
|
) -> RbResult<ChunkedArray<D>>
|
1037
1037
|
where
|
1038
1038
|
D: RbArrowPrimitiveType,
|
1039
|
-
D::Native:
|
1039
|
+
D::Native: IntoValue + TryConvert,
|
1040
1040
|
{
|
1041
1041
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1042
1042
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
|
|
84
84
|
.with_n_rows(n_rows)
|
85
85
|
.with_delimiter(sep.as_bytes()[0])
|
86
86
|
.with_skip_rows(skip_rows)
|
87
|
-
.
|
87
|
+
.with_ignore_errors(ignore_errors)
|
88
88
|
.with_projection(projection)
|
89
89
|
.with_rechunk(rechunk)
|
90
90
|
.with_chunk_size(chunk_size)
|
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
|
|
109
109
|
})
|
110
110
|
}
|
111
111
|
|
112
|
-
pub fn next_batches(&self, n: usize) -> RbResult<Option<
|
112
|
+
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
113
|
let batches = self
|
114
114
|
.reader
|
115
115
|
.borrow_mut()
|
116
116
|
.next_batches(n)
|
117
117
|
.map_err(RbPolarsErr::from)?;
|
118
|
-
Ok(batches.map(|batches|
|
118
|
+
Ok(batches.map(|batches| {
|
119
|
+
RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
|
120
|
+
}))
|
119
121
|
}
|
120
122
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use magnus::{
|
2
|
-
class, r_hash::ForEach,
|
3
|
-
Value, QNIL,
|
2
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
|
+
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
4
|
};
|
5
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
98
|
}
|
99
99
|
}
|
100
100
|
|
101
|
-
impl
|
102
|
-
fn
|
103
|
-
match
|
101
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
|
+
match self.0 {
|
104
104
|
AnyValue::UInt8(v) => Value::from(v),
|
105
105
|
AnyValue::UInt16(v) => Value::from(v),
|
106
106
|
AnyValue::UInt32(v) => Value::from(v),
|
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
114
114
|
AnyValue::Null => *QNIL,
|
115
115
|
AnyValue::Boolean(v) => Value::from(v),
|
116
116
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
+
AnyValue::Utf8Owned(_v) => todo!(),
|
118
|
+
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
117
119
|
AnyValue::Date(v) => class::time()
|
118
120
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
119
121
|
.unwrap()
|
@@ -123,7 +125,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
123
125
|
.unwrap(),
|
124
126
|
AnyValue::Datetime(v, tu, tz) => {
|
125
127
|
let t = match tu {
|
126
|
-
TimeUnit::Nanoseconds =>
|
128
|
+
TimeUnit::Nanoseconds => {
|
129
|
+
let sec = v / 1000000000;
|
130
|
+
let subsec = v % 1000000000;
|
131
|
+
class::time()
|
132
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
133
|
+
.unwrap()
|
134
|
+
}
|
127
135
|
TimeUnit::Microseconds => {
|
128
136
|
let sec = v / 1000000;
|
129
137
|
let subsec = v % 1000000;
|
@@ -131,7 +139,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
131
139
|
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
132
140
|
.unwrap()
|
133
141
|
}
|
134
|
-
TimeUnit::Milliseconds =>
|
142
|
+
TimeUnit::Milliseconds => {
|
143
|
+
let sec = v / 1000;
|
144
|
+
let subsec = v % 1000;
|
145
|
+
class::time()
|
146
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
147
|
+
.unwrap()
|
148
|
+
}
|
135
149
|
};
|
136
150
|
|
137
151
|
if tz.is_some() {
|
@@ -140,16 +154,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
140
154
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
141
155
|
}
|
142
156
|
}
|
143
|
-
|
157
|
+
AnyValue::Duration(_v, _tu) => todo!(),
|
158
|
+
AnyValue::Time(_v) => todo!(),
|
159
|
+
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
160
|
+
ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
|
161
|
+
AnyValue::StructOwned(_payload) => todo!(),
|
162
|
+
AnyValue::Object(_v) => todo!(),
|
163
|
+
AnyValue::ObjectOwned(_v) => todo!(),
|
164
|
+
AnyValue::Binary(_v) => todo!(),
|
165
|
+
AnyValue::BinaryOwned(_v) => todo!(),
|
144
166
|
}
|
145
167
|
}
|
146
168
|
}
|
147
169
|
|
148
|
-
impl
|
149
|
-
fn
|
170
|
+
impl IntoValue for Wrap<DataType> {
|
171
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
150
172
|
let pl = crate::rb_modules::polars();
|
151
173
|
|
152
|
-
match
|
174
|
+
match self.0 {
|
153
175
|
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
154
176
|
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
155
177
|
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
@@ -160,11 +182,12 @@ impl From<Wrap<DataType>> for Value {
|
|
160
182
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
161
183
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
162
184
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
185
|
+
DataType::Decimal128(_) => todo!(),
|
163
186
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
164
187
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
165
188
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
166
189
|
DataType::List(inner) => {
|
167
|
-
let inner = Wrap(*inner
|
190
|
+
let inner = Wrap(*inner);
|
168
191
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
169
192
|
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
170
193
|
}
|
@@ -172,7 +195,7 @@ impl From<Wrap<DataType>> for Value {
|
|
172
195
|
DataType::Datetime(tu, tz) => {
|
173
196
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
174
197
|
datetime_class
|
175
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz
|
198
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
176
199
|
.unwrap()
|
177
200
|
}
|
178
201
|
DataType::Duration(tu) => {
|
@@ -423,9 +446,9 @@ impl ObjectValue {
|
|
423
446
|
}
|
424
447
|
}
|
425
448
|
|
426
|
-
impl
|
427
|
-
fn
|
428
|
-
|
449
|
+
impl IntoValue for ObjectValue {
|
450
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
451
|
+
self.inner
|
429
452
|
}
|
430
453
|
}
|
431
454
|
|
@@ -767,6 +790,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
767
790
|
}
|
768
791
|
}
|
769
792
|
|
793
|
+
impl TryConvert for Wrap<SearchSortedSide> {
|
794
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
795
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
796
|
+
"any" => SearchSortedSide::Any,
|
797
|
+
"left" => SearchSortedSide::Left,
|
798
|
+
"right" => SearchSortedSide::Right,
|
799
|
+
v => {
|
800
|
+
return Err(RbValueError::new_err(format!(
|
801
|
+
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
802
|
+
)))
|
803
|
+
}
|
804
|
+
};
|
805
|
+
Ok(Wrap(parsed))
|
806
|
+
}
|
807
|
+
}
|
808
|
+
|
770
809
|
pub fn parse_fill_null_strategy(
|
771
810
|
strategy: &str,
|
772
811
|
limit: FillNullLimit,
|
@@ -780,7 +819,7 @@ pub fn parse_fill_null_strategy(
|
|
780
819
|
"zero" => FillNullStrategy::Zero,
|
781
820
|
"one" => FillNullStrategy::One,
|
782
821
|
e => {
|
783
|
-
return Err(magnus::Error::runtime_error(format!(
|
822
|
+
return Err(magnus::Error::new(exception::runtime_error(), format!(
|
784
823
|
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
|
785
824
|
e,
|
786
825
|
)))
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
1
|
+
use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
|
2
2
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
3
|
use polars::frame::NullStrategy;
|
4
4
|
use polars::io::avro::AvroCompression;
|
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
|
|
6
6
|
use polars::io::RowCount;
|
7
7
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
8
8
|
use polars::prelude::*;
|
9
|
+
use polars_core::utils::try_get_supertype;
|
9
10
|
use std::cell::RefCell;
|
10
11
|
use std::io::{BufWriter, Cursor};
|
11
12
|
use std::ops::Deref;
|
@@ -68,7 +69,7 @@ impl RbDataFrame {
|
|
68
69
|
*dtype_ = dtype;
|
69
70
|
}
|
70
71
|
} else {
|
71
|
-
schema.with_column(name, dtype)
|
72
|
+
schema.with_column(name, dtype);
|
72
73
|
}
|
73
74
|
}
|
74
75
|
}
|
@@ -159,7 +160,7 @@ impl RbDataFrame {
|
|
159
160
|
.with_n_rows(n_rows)
|
160
161
|
.with_delimiter(sep.as_bytes()[0])
|
161
162
|
.with_skip_rows(skip_rows)
|
162
|
-
.
|
163
|
+
.with_ignore_errors(ignore_errors)
|
163
164
|
.with_projection(projection)
|
164
165
|
.with_rechunk(rechunk)
|
165
166
|
.with_chunk_size(chunk_size)
|
@@ -457,7 +458,7 @@ impl RbDataFrame {
|
|
457
458
|
} else {
|
458
459
|
idx as usize
|
459
460
|
};
|
460
|
-
RArray::
|
461
|
+
RArray::from_iter(
|
461
462
|
self.df
|
462
463
|
.borrow()
|
463
464
|
.get_columns()
|
@@ -467,39 +468,51 @@ impl RbDataFrame {
|
|
467
468
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
468
469
|
obj.unwrap().to_object()
|
469
470
|
}
|
470
|
-
_ => Wrap(s.get(idx).unwrap()).
|
471
|
-
})
|
472
|
-
.collect(),
|
471
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
472
|
+
}),
|
473
473
|
)
|
474
474
|
.into()
|
475
475
|
}
|
476
476
|
|
477
477
|
pub fn row_tuples(&self) -> Value {
|
478
478
|
let df = &self.df;
|
479
|
-
RArray::
|
480
|
-
(
|
481
|
-
.
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
.map(|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
})
|
495
|
-
.collect(),
|
496
|
-
)
|
497
|
-
})
|
498
|
-
.collect(),
|
499
|
-
)
|
479
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
480
|
+
RArray::from_iter(
|
481
|
+
self.df
|
482
|
+
.borrow()
|
483
|
+
.get_columns()
|
484
|
+
.iter()
|
485
|
+
.map(|s| match s.dtype() {
|
486
|
+
DataType::Object(_) => {
|
487
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
488
|
+
obj.unwrap().to_object()
|
489
|
+
}
|
490
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
491
|
+
}),
|
492
|
+
)
|
493
|
+
}))
|
500
494
|
.into()
|
501
495
|
}
|
502
496
|
|
497
|
+
pub fn to_numo(&self) -> Option<Value> {
|
498
|
+
let mut st = None;
|
499
|
+
for s in self.df.borrow().iter() {
|
500
|
+
let dt_i = s.dtype();
|
501
|
+
match st {
|
502
|
+
None => st = Some(dt_i.clone()),
|
503
|
+
Some(ref mut st) => {
|
504
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
let st = st?;
|
509
|
+
|
510
|
+
match st {
|
511
|
+
// TODO
|
512
|
+
_ => None,
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
503
516
|
pub fn write_parquet(
|
504
517
|
&self,
|
505
518
|
rb_f: Value,
|
@@ -613,7 +626,7 @@ impl RbDataFrame {
|
|
613
626
|
format!("{}", self.df.borrow())
|
614
627
|
}
|
615
628
|
|
616
|
-
pub fn get_columns(&self) ->
|
629
|
+
pub fn get_columns(&self) -> RArray {
|
617
630
|
let cols = self.df.borrow().get_columns().clone();
|
618
631
|
to_rbseries_collection(cols)
|
619
632
|
}
|
@@ -635,12 +648,13 @@ impl RbDataFrame {
|
|
635
648
|
Ok(())
|
636
649
|
}
|
637
650
|
|
638
|
-
pub fn dtypes(&self) ->
|
639
|
-
|
640
|
-
.
|
641
|
-
|
642
|
-
|
643
|
-
|
651
|
+
pub fn dtypes(&self) -> RArray {
|
652
|
+
RArray::from_iter(
|
653
|
+
self.df
|
654
|
+
.borrow()
|
655
|
+
.iter()
|
656
|
+
.map(|s| Wrap(s.dtype().clone()).into_value()),
|
657
|
+
)
|
644
658
|
}
|
645
659
|
|
646
660
|
pub fn n_chunks(&self) -> usize {
|
@@ -777,6 +791,7 @@ impl RbDataFrame {
|
|
777
791
|
SortOptions {
|
778
792
|
descending: reverse,
|
779
793
|
nulls_last,
|
794
|
+
multithreaded: true,
|
780
795
|
},
|
781
796
|
)
|
782
797
|
.map_err(RbPolarsErr::from)?;
|
@@ -876,6 +891,7 @@ impl RbDataFrame {
|
|
876
891
|
Ok(RbDataFrame::new(df))
|
877
892
|
}
|
878
893
|
|
894
|
+
#[allow(clippy::too_many_arguments)]
|
879
895
|
pub fn pivot_expr(
|
880
896
|
&self,
|
881
897
|
values: Vec<String>,
|
@@ -884,6 +900,7 @@ impl RbDataFrame {
|
|
884
900
|
aggregate_expr: &RbExpr,
|
885
901
|
maintain_order: bool,
|
886
902
|
sort_columns: bool,
|
903
|
+
separator: Option<String>,
|
887
904
|
) -> RbResult<Self> {
|
888
905
|
let fun = match maintain_order {
|
889
906
|
true => pivot_stable,
|
@@ -896,19 +913,20 @@ impl RbDataFrame {
|
|
896
913
|
columns,
|
897
914
|
aggregate_expr.inner.clone(),
|
898
915
|
sort_columns,
|
916
|
+
separator.as_deref(),
|
899
917
|
)
|
900
918
|
.map_err(RbPolarsErr::from)?;
|
901
919
|
Ok(RbDataFrame::new(df))
|
902
920
|
}
|
903
921
|
|
904
|
-
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<
|
922
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
|
905
923
|
let out = if stable {
|
906
924
|
self.df.borrow().partition_by_stable(groups)
|
907
925
|
} else {
|
908
926
|
self.df.borrow().partition_by(groups)
|
909
927
|
}
|
910
928
|
.map_err(RbPolarsErr::from)?;
|
911
|
-
Ok(out.into_iter().map(RbDataFrame::new)
|
929
|
+
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
912
930
|
}
|
913
931
|
|
914
932
|
pub fn shift(&self, periods: i64) -> Self {
|
@@ -1003,13 +1021,17 @@ impl RbDataFrame {
|
|
1003
1021
|
Ok(df.into())
|
1004
1022
|
}
|
1005
1023
|
|
1006
|
-
pub fn to_dummies(
|
1024
|
+
pub fn to_dummies(
|
1025
|
+
&self,
|
1026
|
+
columns: Option<Vec<String>>,
|
1027
|
+
separator: Option<String>,
|
1028
|
+
) -> RbResult<Self> {
|
1007
1029
|
let df = match columns {
|
1008
|
-
Some(cols) => self
|
1009
|
-
.
|
1010
|
-
.
|
1011
|
-
|
1012
|
-
None => self.df.borrow().to_dummies(),
|
1030
|
+
Some(cols) => self.df.borrow().columns_to_dummies(
|
1031
|
+
cols.iter().map(|x| x as &str).collect(),
|
1032
|
+
separator.as_deref(),
|
1033
|
+
),
|
1034
|
+
None => self.df.borrow().to_dummies(separator.as_deref()),
|
1013
1035
|
}
|
1014
1036
|
.map_err(RbPolarsErr::from)?;
|
1015
1037
|
Ok(df.into())
|