polars-df 0.2.5 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/README.md +40 -2
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +56 -17
- data/ext/polars/src/dataframe.rs +65 -43
- data/ext/polars/src/error.rs +16 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +33 -29
- data/ext/polars/src/numo.rs +57 -0
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +67 -53
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +165 -209
- data/lib/polars/data_types.rb +4 -0
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +79 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/plot.rb +109 -0
- data/lib/polars/series.rb +61 -19
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +5 -3
data/Cargo.toml
CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
6
|
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/README.md
CHANGED
@@ -282,10 +282,10 @@ df.to_dummies
|
|
282
282
|
|
283
283
|
## Conversion
|
284
284
|
|
285
|
-
Array of
|
285
|
+
Array of hashes
|
286
286
|
|
287
287
|
```ruby
|
288
|
-
df.rows
|
288
|
+
df.rows(named: true)
|
289
289
|
```
|
290
290
|
|
291
291
|
Hash of series
|
@@ -308,6 +308,12 @@ Parquet
|
|
308
308
|
df.write_parquet("file.parquet")
|
309
309
|
```
|
310
310
|
|
311
|
+
Numo array
|
312
|
+
|
313
|
+
```ruby
|
314
|
+
df.to_numo
|
315
|
+
```
|
316
|
+
|
311
317
|
## Types
|
312
318
|
|
313
319
|
You can specify column types when creating a data frame
|
@@ -343,6 +349,38 @@ Cast a column
|
|
343
349
|
df["a"].cast(Polars::Int32)
|
344
350
|
```
|
345
351
|
|
352
|
+
## Visualization
|
353
|
+
|
354
|
+
Add [Vega](https://github.com/ankane/vega-ruby) to your application’s Gemfile:
|
355
|
+
|
356
|
+
```ruby
|
357
|
+
gem "vega"
|
358
|
+
```
|
359
|
+
|
360
|
+
And use:
|
361
|
+
|
362
|
+
```ruby
|
363
|
+
df.plot("a", "b")
|
364
|
+
```
|
365
|
+
|
366
|
+
Specify the chart type (`line`, `pie`, `column`, `bar`, `area`, or `scatter`)
|
367
|
+
|
368
|
+
```ruby
|
369
|
+
df.plot("a", "b", type: "pie")
|
370
|
+
```
|
371
|
+
|
372
|
+
Group data
|
373
|
+
|
374
|
+
```ruby
|
375
|
+
df.groupby("c").plot("a", "b")
|
376
|
+
```
|
377
|
+
|
378
|
+
Stacked columns or bars
|
379
|
+
|
380
|
+
```ruby
|
381
|
+
df.groupby("c").plot("a", "b", stacked: true)
|
382
|
+
```
|
383
|
+
|
346
384
|
## History
|
347
385
|
|
348
386
|
View the [changelog](CHANGELOG.md)
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.3.1"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
-
magnus = "0.
|
15
|
-
polars-core = "0.
|
14
|
+
magnus = "0.5"
|
15
|
+
polars-core = "0.27.0"
|
16
16
|
serde_json = "1"
|
17
17
|
|
18
18
|
[dependencies.polars]
|
19
|
-
version = "0.
|
19
|
+
version = "0.27.0"
|
20
20
|
features = [
|
21
21
|
"abs",
|
22
22
|
"arange",
|
23
23
|
"arg_where",
|
24
24
|
"asof_join",
|
25
25
|
"avro",
|
26
|
+
"binary_encoding",
|
26
27
|
"concat_str",
|
27
28
|
"cse",
|
28
29
|
"csv-file",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RArray, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
4
|
use polars_core::series::SeriesIter;
|
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
|
|
27
27
|
|
28
28
|
for _ in 0..df.height() {
|
29
29
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
30
|
-
let arg = (iter
|
30
|
+
let arg = (RArray::from_iter(iter),);
|
31
31
|
let out: Value = lambda.funcall("call", arg)?;
|
32
32
|
|
33
33
|
if out.is_nil() {
|
@@ -141,7 +141,7 @@ where
|
|
141
141
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
142
|
((init_null_count + skip)..df.height()).map(move |_| {
|
143
143
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
144
|
-
let tpl = (iter
|
144
|
+
let tpl = (RArray::from_iter(iter),);
|
145
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
146
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
147
147
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
|
|
158
158
|
) -> ChunkedArray<D>
|
159
159
|
where
|
160
160
|
D: RbArrowPrimitiveType,
|
161
|
-
D::Native:
|
161
|
+
D::Native: IntoValue + TryConvert,
|
162
162
|
{
|
163
163
|
let skip = usize::from(first_value.is_some());
|
164
164
|
if init_null_count == df.height() {
|
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
216
216
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
217
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
218
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
219
|
-
let tpl = (iter
|
219
|
+
let tpl = (RArray::from_iter(iter),);
|
220
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
221
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
222
222
|
Ok(val) => val
|
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
254
254
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
255
|
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
256
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
257
|
-
let tpl = (iter
|
257
|
+
let tpl = (RArray::from_iter(iter),);
|
258
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
259
259
|
Ok(val) => {
|
260
260
|
match val.try_convert::<RArray>().ok() {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RHash, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RHash, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
|
|
85
85
|
) -> RbResult<ChunkedArray<D>>
|
86
86
|
where
|
87
87
|
D: RbArrowPrimitiveType,
|
88
|
-
D::Native:
|
88
|
+
D::Native: IntoValue + TryConvert;
|
89
89
|
|
90
90
|
/// Apply a lambda with a boolean output type
|
91
91
|
fn apply_lambda_with_bool_out_type(
|
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
|
|
130
130
|
|
131
131
|
pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
|
132
132
|
where
|
133
|
-
T:
|
133
|
+
T: IntoValue,
|
134
134
|
{
|
135
135
|
lambda.funcall("call", (in_val,))
|
136
136
|
}
|
137
137
|
|
138
138
|
pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
|
139
139
|
where
|
140
|
-
T:
|
140
|
+
T: IntoValue,
|
141
141
|
S: TryConvert,
|
142
142
|
{
|
143
143
|
match call_lambda(lambda, in_val) {
|
@@ -148,7 +148,7 @@ where
|
|
148
148
|
|
149
149
|
fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
|
150
150
|
where
|
151
|
-
T:
|
151
|
+
T: IntoValue,
|
152
152
|
{
|
153
153
|
let out: Value = lambda.funcall("call", (in_val,))?;
|
154
154
|
let py_series: Value = out.funcall("_s", ())?;
|
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
216
216
|
) -> RbResult<ChunkedArray<D>>
|
217
217
|
where
|
218
218
|
D: RbArrowPrimitiveType,
|
219
|
-
D::Native:
|
219
|
+
D::Native: IntoValue + TryConvert,
|
220
220
|
{
|
221
221
|
let skip = usize::from(first_value.is_some());
|
222
222
|
if init_null_count == self.len() {
|
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
435
435
|
impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
|
436
436
|
where
|
437
437
|
T: RbArrowPrimitiveType + PolarsNumericType,
|
438
|
-
T::Native:
|
438
|
+
T::Native: IntoValue + TryConvert,
|
439
439
|
ChunkedArray<T>: IntoSeries,
|
440
440
|
{
|
441
441
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
@@ -493,7 +493,7 @@ where
|
|
493
493
|
) -> RbResult<ChunkedArray<D>>
|
494
494
|
where
|
495
495
|
D: RbArrowPrimitiveType,
|
496
|
-
D::Native:
|
496
|
+
D::Native: IntoValue + TryConvert,
|
497
497
|
{
|
498
498
|
let skip = usize::from(first_value.is_some());
|
499
499
|
if init_null_count == self.len() {
|
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
765
765
|
) -> RbResult<ChunkedArray<D>>
|
766
766
|
where
|
767
767
|
D: RbArrowPrimitiveType,
|
768
|
-
D::Native:
|
768
|
+
D::Native: IntoValue + TryConvert,
|
769
769
|
{
|
770
770
|
let skip = usize::from(first_value.is_some());
|
771
771
|
if init_null_count == self.len() {
|
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1036
1036
|
) -> RbResult<ChunkedArray<D>>
|
1037
1037
|
where
|
1038
1038
|
D: RbArrowPrimitiveType,
|
1039
|
-
D::Native:
|
1039
|
+
D::Native: IntoValue + TryConvert,
|
1040
1040
|
{
|
1041
1041
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1042
1042
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
|
|
84
84
|
.with_n_rows(n_rows)
|
85
85
|
.with_delimiter(sep.as_bytes()[0])
|
86
86
|
.with_skip_rows(skip_rows)
|
87
|
-
.
|
87
|
+
.with_ignore_errors(ignore_errors)
|
88
88
|
.with_projection(projection)
|
89
89
|
.with_rechunk(rechunk)
|
90
90
|
.with_chunk_size(chunk_size)
|
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
|
|
109
109
|
})
|
110
110
|
}
|
111
111
|
|
112
|
-
pub fn next_batches(&self, n: usize) -> RbResult<Option<
|
112
|
+
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
113
|
let batches = self
|
114
114
|
.reader
|
115
115
|
.borrow_mut()
|
116
116
|
.next_batches(n)
|
117
117
|
.map_err(RbPolarsErr::from)?;
|
118
|
-
Ok(batches.map(|batches|
|
118
|
+
Ok(batches.map(|batches| {
|
119
|
+
RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
|
120
|
+
}))
|
119
121
|
}
|
120
122
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use magnus::{
|
2
|
-
class, r_hash::ForEach,
|
3
|
-
Value, QNIL,
|
2
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
|
+
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
4
|
};
|
5
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
98
|
}
|
99
99
|
}
|
100
100
|
|
101
|
-
impl
|
102
|
-
fn
|
103
|
-
match
|
101
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
|
+
match self.0 {
|
104
104
|
AnyValue::UInt8(v) => Value::from(v),
|
105
105
|
AnyValue::UInt16(v) => Value::from(v),
|
106
106
|
AnyValue::UInt32(v) => Value::from(v),
|
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
114
114
|
AnyValue::Null => *QNIL,
|
115
115
|
AnyValue::Boolean(v) => Value::from(v),
|
116
116
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
+
AnyValue::Utf8Owned(_v) => todo!(),
|
118
|
+
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
117
119
|
AnyValue::Date(v) => class::time()
|
118
120
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
119
121
|
.unwrap()
|
@@ -123,7 +125,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
123
125
|
.unwrap(),
|
124
126
|
AnyValue::Datetime(v, tu, tz) => {
|
125
127
|
let t = match tu {
|
126
|
-
TimeUnit::Nanoseconds =>
|
128
|
+
TimeUnit::Nanoseconds => {
|
129
|
+
let sec = v / 1000000000;
|
130
|
+
let subsec = v % 1000000000;
|
131
|
+
class::time()
|
132
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
133
|
+
.unwrap()
|
134
|
+
}
|
127
135
|
TimeUnit::Microseconds => {
|
128
136
|
let sec = v / 1000000;
|
129
137
|
let subsec = v % 1000000;
|
@@ -131,7 +139,13 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
131
139
|
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
132
140
|
.unwrap()
|
133
141
|
}
|
134
|
-
TimeUnit::Milliseconds =>
|
142
|
+
TimeUnit::Milliseconds => {
|
143
|
+
let sec = v / 1000;
|
144
|
+
let subsec = v % 1000;
|
145
|
+
class::time()
|
146
|
+
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
147
|
+
.unwrap()
|
148
|
+
}
|
135
149
|
};
|
136
150
|
|
137
151
|
if tz.is_some() {
|
@@ -140,16 +154,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
140
154
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
141
155
|
}
|
142
156
|
}
|
143
|
-
|
157
|
+
AnyValue::Duration(_v, _tu) => todo!(),
|
158
|
+
AnyValue::Time(_v) => todo!(),
|
159
|
+
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
160
|
+
ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
|
161
|
+
AnyValue::StructOwned(_payload) => todo!(),
|
162
|
+
AnyValue::Object(_v) => todo!(),
|
163
|
+
AnyValue::ObjectOwned(_v) => todo!(),
|
164
|
+
AnyValue::Binary(_v) => todo!(),
|
165
|
+
AnyValue::BinaryOwned(_v) => todo!(),
|
144
166
|
}
|
145
167
|
}
|
146
168
|
}
|
147
169
|
|
148
|
-
impl
|
149
|
-
fn
|
170
|
+
impl IntoValue for Wrap<DataType> {
|
171
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
150
172
|
let pl = crate::rb_modules::polars();
|
151
173
|
|
152
|
-
match
|
174
|
+
match self.0 {
|
153
175
|
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
154
176
|
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
155
177
|
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
@@ -160,11 +182,12 @@ impl From<Wrap<DataType>> for Value {
|
|
160
182
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
161
183
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
162
184
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
185
|
+
DataType::Decimal128(_) => todo!(),
|
163
186
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
164
187
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
165
188
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
166
189
|
DataType::List(inner) => {
|
167
|
-
let inner = Wrap(*inner
|
190
|
+
let inner = Wrap(*inner);
|
168
191
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
169
192
|
list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
170
193
|
}
|
@@ -172,7 +195,7 @@ impl From<Wrap<DataType>> for Value {
|
|
172
195
|
DataType::Datetime(tu, tz) => {
|
173
196
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
174
197
|
datetime_class
|
175
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz
|
198
|
+
.funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
|
176
199
|
.unwrap()
|
177
200
|
}
|
178
201
|
DataType::Duration(tu) => {
|
@@ -423,9 +446,9 @@ impl ObjectValue {
|
|
423
446
|
}
|
424
447
|
}
|
425
448
|
|
426
|
-
impl
|
427
|
-
fn
|
428
|
-
|
449
|
+
impl IntoValue for ObjectValue {
|
450
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
451
|
+
self.inner
|
429
452
|
}
|
430
453
|
}
|
431
454
|
|
@@ -767,6 +790,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
767
790
|
}
|
768
791
|
}
|
769
792
|
|
793
|
+
impl TryConvert for Wrap<SearchSortedSide> {
|
794
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
795
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
796
|
+
"any" => SearchSortedSide::Any,
|
797
|
+
"left" => SearchSortedSide::Left,
|
798
|
+
"right" => SearchSortedSide::Right,
|
799
|
+
v => {
|
800
|
+
return Err(RbValueError::new_err(format!(
|
801
|
+
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
802
|
+
)))
|
803
|
+
}
|
804
|
+
};
|
805
|
+
Ok(Wrap(parsed))
|
806
|
+
}
|
807
|
+
}
|
808
|
+
|
770
809
|
pub fn parse_fill_null_strategy(
|
771
810
|
strategy: &str,
|
772
811
|
limit: FillNullLimit,
|
@@ -780,7 +819,7 @@ pub fn parse_fill_null_strategy(
|
|
780
819
|
"zero" => FillNullStrategy::Zero,
|
781
820
|
"one" => FillNullStrategy::One,
|
782
821
|
e => {
|
783
|
-
return Err(magnus::Error::runtime_error(format!(
|
822
|
+
return Err(magnus::Error::new(exception::runtime_error(), format!(
|
784
823
|
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
|
785
824
|
e,
|
786
825
|
)))
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
1
|
+
use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
|
2
2
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
3
|
use polars::frame::NullStrategy;
|
4
4
|
use polars::io::avro::AvroCompression;
|
@@ -6,6 +6,7 @@ use polars::io::mmap::ReaderBytes;
|
|
6
6
|
use polars::io::RowCount;
|
7
7
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
8
8
|
use polars::prelude::*;
|
9
|
+
use polars_core::utils::try_get_supertype;
|
9
10
|
use std::cell::RefCell;
|
10
11
|
use std::io::{BufWriter, Cursor};
|
11
12
|
use std::ops::Deref;
|
@@ -68,7 +69,7 @@ impl RbDataFrame {
|
|
68
69
|
*dtype_ = dtype;
|
69
70
|
}
|
70
71
|
} else {
|
71
|
-
schema.with_column(name, dtype)
|
72
|
+
schema.with_column(name, dtype);
|
72
73
|
}
|
73
74
|
}
|
74
75
|
}
|
@@ -159,7 +160,7 @@ impl RbDataFrame {
|
|
159
160
|
.with_n_rows(n_rows)
|
160
161
|
.with_delimiter(sep.as_bytes()[0])
|
161
162
|
.with_skip_rows(skip_rows)
|
162
|
-
.
|
163
|
+
.with_ignore_errors(ignore_errors)
|
163
164
|
.with_projection(projection)
|
164
165
|
.with_rechunk(rechunk)
|
165
166
|
.with_chunk_size(chunk_size)
|
@@ -457,7 +458,7 @@ impl RbDataFrame {
|
|
457
458
|
} else {
|
458
459
|
idx as usize
|
459
460
|
};
|
460
|
-
RArray::
|
461
|
+
RArray::from_iter(
|
461
462
|
self.df
|
462
463
|
.borrow()
|
463
464
|
.get_columns()
|
@@ -467,39 +468,51 @@ impl RbDataFrame {
|
|
467
468
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
468
469
|
obj.unwrap().to_object()
|
469
470
|
}
|
470
|
-
_ => Wrap(s.get(idx).unwrap()).
|
471
|
-
})
|
472
|
-
.collect(),
|
471
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
472
|
+
}),
|
473
473
|
)
|
474
474
|
.into()
|
475
475
|
}
|
476
476
|
|
477
477
|
pub fn row_tuples(&self) -> Value {
|
478
478
|
let df = &self.df;
|
479
|
-
RArray::
|
480
|
-
(
|
481
|
-
.
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
.map(|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
})
|
495
|
-
.collect(),
|
496
|
-
)
|
497
|
-
})
|
498
|
-
.collect(),
|
499
|
-
)
|
479
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
480
|
+
RArray::from_iter(
|
481
|
+
self.df
|
482
|
+
.borrow()
|
483
|
+
.get_columns()
|
484
|
+
.iter()
|
485
|
+
.map(|s| match s.dtype() {
|
486
|
+
DataType::Object(_) => {
|
487
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
488
|
+
obj.unwrap().to_object()
|
489
|
+
}
|
490
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
491
|
+
}),
|
492
|
+
)
|
493
|
+
}))
|
500
494
|
.into()
|
501
495
|
}
|
502
496
|
|
497
|
+
pub fn to_numo(&self) -> Option<Value> {
|
498
|
+
let mut st = None;
|
499
|
+
for s in self.df.borrow().iter() {
|
500
|
+
let dt_i = s.dtype();
|
501
|
+
match st {
|
502
|
+
None => st = Some(dt_i.clone()),
|
503
|
+
Some(ref mut st) => {
|
504
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
505
|
+
}
|
506
|
+
}
|
507
|
+
}
|
508
|
+
let st = st?;
|
509
|
+
|
510
|
+
match st {
|
511
|
+
// TODO
|
512
|
+
_ => None,
|
513
|
+
}
|
514
|
+
}
|
515
|
+
|
503
516
|
pub fn write_parquet(
|
504
517
|
&self,
|
505
518
|
rb_f: Value,
|
@@ -613,7 +626,7 @@ impl RbDataFrame {
|
|
613
626
|
format!("{}", self.df.borrow())
|
614
627
|
}
|
615
628
|
|
616
|
-
pub fn get_columns(&self) ->
|
629
|
+
pub fn get_columns(&self) -> RArray {
|
617
630
|
let cols = self.df.borrow().get_columns().clone();
|
618
631
|
to_rbseries_collection(cols)
|
619
632
|
}
|
@@ -635,12 +648,13 @@ impl RbDataFrame {
|
|
635
648
|
Ok(())
|
636
649
|
}
|
637
650
|
|
638
|
-
pub fn dtypes(&self) ->
|
639
|
-
|
640
|
-
.
|
641
|
-
|
642
|
-
|
643
|
-
|
651
|
+
pub fn dtypes(&self) -> RArray {
|
652
|
+
RArray::from_iter(
|
653
|
+
self.df
|
654
|
+
.borrow()
|
655
|
+
.iter()
|
656
|
+
.map(|s| Wrap(s.dtype().clone()).into_value()),
|
657
|
+
)
|
644
658
|
}
|
645
659
|
|
646
660
|
pub fn n_chunks(&self) -> usize {
|
@@ -777,6 +791,7 @@ impl RbDataFrame {
|
|
777
791
|
SortOptions {
|
778
792
|
descending: reverse,
|
779
793
|
nulls_last,
|
794
|
+
multithreaded: true,
|
780
795
|
},
|
781
796
|
)
|
782
797
|
.map_err(RbPolarsErr::from)?;
|
@@ -876,6 +891,7 @@ impl RbDataFrame {
|
|
876
891
|
Ok(RbDataFrame::new(df))
|
877
892
|
}
|
878
893
|
|
894
|
+
#[allow(clippy::too_many_arguments)]
|
879
895
|
pub fn pivot_expr(
|
880
896
|
&self,
|
881
897
|
values: Vec<String>,
|
@@ -884,6 +900,7 @@ impl RbDataFrame {
|
|
884
900
|
aggregate_expr: &RbExpr,
|
885
901
|
maintain_order: bool,
|
886
902
|
sort_columns: bool,
|
903
|
+
separator: Option<String>,
|
887
904
|
) -> RbResult<Self> {
|
888
905
|
let fun = match maintain_order {
|
889
906
|
true => pivot_stable,
|
@@ -896,19 +913,20 @@ impl RbDataFrame {
|
|
896
913
|
columns,
|
897
914
|
aggregate_expr.inner.clone(),
|
898
915
|
sort_columns,
|
916
|
+
separator.as_deref(),
|
899
917
|
)
|
900
918
|
.map_err(RbPolarsErr::from)?;
|
901
919
|
Ok(RbDataFrame::new(df))
|
902
920
|
}
|
903
921
|
|
904
|
-
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<
|
922
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
|
905
923
|
let out = if stable {
|
906
924
|
self.df.borrow().partition_by_stable(groups)
|
907
925
|
} else {
|
908
926
|
self.df.borrow().partition_by(groups)
|
909
927
|
}
|
910
928
|
.map_err(RbPolarsErr::from)?;
|
911
|
-
Ok(out.into_iter().map(RbDataFrame::new)
|
929
|
+
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
912
930
|
}
|
913
931
|
|
914
932
|
pub fn shift(&self, periods: i64) -> Self {
|
@@ -1003,13 +1021,17 @@ impl RbDataFrame {
|
|
1003
1021
|
Ok(df.into())
|
1004
1022
|
}
|
1005
1023
|
|
1006
|
-
pub fn to_dummies(
|
1024
|
+
pub fn to_dummies(
|
1025
|
+
&self,
|
1026
|
+
columns: Option<Vec<String>>,
|
1027
|
+
separator: Option<String>,
|
1028
|
+
) -> RbResult<Self> {
|
1007
1029
|
let df = match columns {
|
1008
|
-
Some(cols) => self
|
1009
|
-
.
|
1010
|
-
.
|
1011
|
-
|
1012
|
-
None => self.df.borrow().to_dummies(),
|
1030
|
+
Some(cols) => self.df.borrow().columns_to_dummies(
|
1031
|
+
cols.iter().map(|x| x as &str).collect(),
|
1032
|
+
separator.as_deref(),
|
1033
|
+
),
|
1034
|
+
None => self.df.borrow().to_dummies(separator.as_deref()),
|
1013
1035
|
}
|
1014
1036
|
.map_err(RbPolarsErr::from)?;
|
1015
1037
|
Ok(df.into())
|