polars-df 0.2.5 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +40 -13
- data/ext/polars/src/dataframe.rs +45 -43
- data/ext/polars/src/error.rs +8 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +28 -29
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +23 -21
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +139 -204
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +68 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/series.rb +20 -16
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/Cargo.toml
CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
6
|
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.3.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
-
magnus = "0.
|
15
|
-
polars-core = "0.
|
14
|
+
magnus = "0.5"
|
15
|
+
polars-core = "0.27.0"
|
16
16
|
serde_json = "1"
|
17
17
|
|
18
18
|
[dependencies.polars]
|
19
|
-
version = "0.
|
19
|
+
version = "0.27.0"
|
20
20
|
features = [
|
21
21
|
"abs",
|
22
22
|
"arange",
|
23
23
|
"arg_where",
|
24
24
|
"asof_join",
|
25
25
|
"avro",
|
26
|
+
"binary_encoding",
|
26
27
|
"concat_str",
|
27
28
|
"cse",
|
28
29
|
"csv-file",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RArray, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
4
|
use polars_core::series::SeriesIter;
|
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
|
|
27
27
|
|
28
28
|
for _ in 0..df.height() {
|
29
29
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
30
|
-
let arg = (iter
|
30
|
+
let arg = (RArray::from_iter(iter),);
|
31
31
|
let out: Value = lambda.funcall("call", arg)?;
|
32
32
|
|
33
33
|
if out.is_nil() {
|
@@ -141,7 +141,7 @@ where
|
|
141
141
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
142
|
((init_null_count + skip)..df.height()).map(move |_| {
|
143
143
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
144
|
-
let tpl = (iter
|
144
|
+
let tpl = (RArray::from_iter(iter),);
|
145
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
146
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
147
147
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
|
|
158
158
|
) -> ChunkedArray<D>
|
159
159
|
where
|
160
160
|
D: RbArrowPrimitiveType,
|
161
|
-
D::Native:
|
161
|
+
D::Native: IntoValue + TryConvert,
|
162
162
|
{
|
163
163
|
let skip = usize::from(first_value.is_some());
|
164
164
|
if init_null_count == df.height() {
|
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
216
216
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
217
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
218
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
219
|
-
let tpl = (iter
|
219
|
+
let tpl = (RArray::from_iter(iter),);
|
220
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
221
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
222
222
|
Ok(val) => val
|
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
254
254
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
255
|
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
256
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
257
|
-
let tpl = (iter
|
257
|
+
let tpl = (RArray::from_iter(iter),);
|
258
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
259
259
|
Ok(val) => {
|
260
260
|
match val.try_convert::<RArray>().ok() {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RHash, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RHash, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
|
|
85
85
|
) -> RbResult<ChunkedArray<D>>
|
86
86
|
where
|
87
87
|
D: RbArrowPrimitiveType,
|
88
|
-
D::Native:
|
88
|
+
D::Native: IntoValue + TryConvert;
|
89
89
|
|
90
90
|
/// Apply a lambda with a boolean output type
|
91
91
|
fn apply_lambda_with_bool_out_type(
|
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
|
|
130
130
|
|
131
131
|
pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
|
132
132
|
where
|
133
|
-
T:
|
133
|
+
T: IntoValue,
|
134
134
|
{
|
135
135
|
lambda.funcall("call", (in_val,))
|
136
136
|
}
|
137
137
|
|
138
138
|
pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
|
139
139
|
where
|
140
|
-
T:
|
140
|
+
T: IntoValue,
|
141
141
|
S: TryConvert,
|
142
142
|
{
|
143
143
|
match call_lambda(lambda, in_val) {
|
@@ -148,7 +148,7 @@ where
|
|
148
148
|
|
149
149
|
fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
|
150
150
|
where
|
151
|
-
T:
|
151
|
+
T: IntoValue,
|
152
152
|
{
|
153
153
|
let out: Value = lambda.funcall("call", (in_val,))?;
|
154
154
|
let py_series: Value = out.funcall("_s", ())?;
|
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
216
216
|
) -> RbResult<ChunkedArray<D>>
|
217
217
|
where
|
218
218
|
D: RbArrowPrimitiveType,
|
219
|
-
D::Native:
|
219
|
+
D::Native: IntoValue + TryConvert,
|
220
220
|
{
|
221
221
|
let skip = usize::from(first_value.is_some());
|
222
222
|
if init_null_count == self.len() {
|
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
435
435
|
impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
|
436
436
|
where
|
437
437
|
T: RbArrowPrimitiveType + PolarsNumericType,
|
438
|
-
T::Native:
|
438
|
+
T::Native: IntoValue + TryConvert,
|
439
439
|
ChunkedArray<T>: IntoSeries,
|
440
440
|
{
|
441
441
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
@@ -493,7 +493,7 @@ where
|
|
493
493
|
) -> RbResult<ChunkedArray<D>>
|
494
494
|
where
|
495
495
|
D: RbArrowPrimitiveType,
|
496
|
-
D::Native:
|
496
|
+
D::Native: IntoValue + TryConvert,
|
497
497
|
{
|
498
498
|
let skip = usize::from(first_value.is_some());
|
499
499
|
if init_null_count == self.len() {
|
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
765
765
|
) -> RbResult<ChunkedArray<D>>
|
766
766
|
where
|
767
767
|
D: RbArrowPrimitiveType,
|
768
|
-
D::Native:
|
768
|
+
D::Native: IntoValue + TryConvert,
|
769
769
|
{
|
770
770
|
let skip = usize::from(first_value.is_some());
|
771
771
|
if init_null_count == self.len() {
|
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1036
1036
|
) -> RbResult<ChunkedArray<D>>
|
1037
1037
|
where
|
1038
1038
|
D: RbArrowPrimitiveType,
|
1039
|
-
D::Native:
|
1039
|
+
D::Native: IntoValue + TryConvert,
|
1040
1040
|
{
|
1041
1041
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1042
1042
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
|
|
84
84
|
.with_n_rows(n_rows)
|
85
85
|
.with_delimiter(sep.as_bytes()[0])
|
86
86
|
.with_skip_rows(skip_rows)
|
87
|
-
.
|
87
|
+
.with_ignore_errors(ignore_errors)
|
88
88
|
.with_projection(projection)
|
89
89
|
.with_rechunk(rechunk)
|
90
90
|
.with_chunk_size(chunk_size)
|
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
|
|
109
109
|
})
|
110
110
|
}
|
111
111
|
|
112
|
-
pub fn next_batches(&self, n: usize) -> RbResult<Option<
|
112
|
+
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
113
|
let batches = self
|
114
114
|
.reader
|
115
115
|
.borrow_mut()
|
116
116
|
.next_batches(n)
|
117
117
|
.map_err(RbPolarsErr::from)?;
|
118
|
-
Ok(batches.map(|batches|
|
118
|
+
Ok(batches.map(|batches| {
|
119
|
+
RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
|
120
|
+
}))
|
119
121
|
}
|
120
122
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use magnus::{
|
2
|
-
class, r_hash::ForEach,
|
3
|
-
Value, QNIL,
|
2
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
|
+
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
4
|
};
|
5
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
98
|
}
|
99
99
|
}
|
100
100
|
|
101
|
-
impl
|
102
|
-
fn
|
103
|
-
match
|
101
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
|
+
match self.0 {
|
104
104
|
AnyValue::UInt8(v) => Value::from(v),
|
105
105
|
AnyValue::UInt16(v) => Value::from(v),
|
106
106
|
AnyValue::UInt32(v) => Value::from(v),
|
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
114
114
|
AnyValue::Null => *QNIL,
|
115
115
|
AnyValue::Boolean(v) => Value::from(v),
|
116
116
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
+
AnyValue::Utf8Owned(_v) => todo!(),
|
118
|
+
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
117
119
|
AnyValue::Date(v) => class::time()
|
118
120
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
119
121
|
.unwrap()
|
@@ -140,16 +142,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
140
142
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
141
143
|
}
|
142
144
|
}
|
143
|
-
|
145
|
+
AnyValue::Duration(_v, _tu) => todo!(),
|
146
|
+
AnyValue::Time(_v) => todo!(),
|
147
|
+
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
148
|
+
ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
|
149
|
+
AnyValue::StructOwned(_payload) => todo!(),
|
150
|
+
AnyValue::Object(_v) => todo!(),
|
151
|
+
AnyValue::ObjectOwned(_v) => todo!(),
|
152
|
+
AnyValue::Binary(_v) => todo!(),
|
153
|
+
AnyValue::BinaryOwned(_v) => todo!(),
|
144
154
|
}
|
145
155
|
}
|
146
156
|
}
|
147
157
|
|
148
|
-
impl
|
149
|
-
fn
|
158
|
+
impl IntoValue for Wrap<DataType> {
|
159
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
150
160
|
let pl = crate::rb_modules::polars();
|
151
161
|
|
152
|
-
match
|
162
|
+
match self.0 {
|
153
163
|
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
154
164
|
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
155
165
|
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
@@ -160,6 +170,7 @@ impl From<Wrap<DataType>> for Value {
|
|
160
170
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
161
171
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
162
172
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
173
|
+
DataType::Decimal128(_) => todo!(),
|
163
174
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
164
175
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
165
176
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
@@ -423,9 +434,9 @@ impl ObjectValue {
|
|
423
434
|
}
|
424
435
|
}
|
425
436
|
|
426
|
-
impl
|
427
|
-
fn
|
428
|
-
|
437
|
+
impl IntoValue for ObjectValue {
|
438
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
439
|
+
self.inner
|
429
440
|
}
|
430
441
|
}
|
431
442
|
|
@@ -767,6 +778,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
767
778
|
}
|
768
779
|
}
|
769
780
|
|
781
|
+
impl TryConvert for Wrap<SearchSortedSide> {
|
782
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
783
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
784
|
+
"any" => SearchSortedSide::Any,
|
785
|
+
"left" => SearchSortedSide::Left,
|
786
|
+
"right" => SearchSortedSide::Right,
|
787
|
+
v => {
|
788
|
+
return Err(RbValueError::new_err(format!(
|
789
|
+
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
790
|
+
)))
|
791
|
+
}
|
792
|
+
};
|
793
|
+
Ok(Wrap(parsed))
|
794
|
+
}
|
795
|
+
}
|
796
|
+
|
770
797
|
pub fn parse_fill_null_strategy(
|
771
798
|
strategy: &str,
|
772
799
|
limit: FillNullLimit,
|
@@ -780,7 +807,7 @@ pub fn parse_fill_null_strategy(
|
|
780
807
|
"zero" => FillNullStrategy::Zero,
|
781
808
|
"one" => FillNullStrategy::One,
|
782
809
|
e => {
|
783
|
-
return Err(magnus::Error::runtime_error(format!(
|
810
|
+
return Err(magnus::Error::new(exception::runtime_error(), format!(
|
784
811
|
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
|
785
812
|
e,
|
786
813
|
)))
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
1
|
+
use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
|
2
2
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
3
|
use polars::frame::NullStrategy;
|
4
4
|
use polars::io::avro::AvroCompression;
|
@@ -68,7 +68,7 @@ impl RbDataFrame {
|
|
68
68
|
*dtype_ = dtype;
|
69
69
|
}
|
70
70
|
} else {
|
71
|
-
schema.with_column(name, dtype)
|
71
|
+
schema.with_column(name, dtype);
|
72
72
|
}
|
73
73
|
}
|
74
74
|
}
|
@@ -159,7 +159,7 @@ impl RbDataFrame {
|
|
159
159
|
.with_n_rows(n_rows)
|
160
160
|
.with_delimiter(sep.as_bytes()[0])
|
161
161
|
.with_skip_rows(skip_rows)
|
162
|
-
.
|
162
|
+
.with_ignore_errors(ignore_errors)
|
163
163
|
.with_projection(projection)
|
164
164
|
.with_rechunk(rechunk)
|
165
165
|
.with_chunk_size(chunk_size)
|
@@ -457,7 +457,7 @@ impl RbDataFrame {
|
|
457
457
|
} else {
|
458
458
|
idx as usize
|
459
459
|
};
|
460
|
-
RArray::
|
460
|
+
RArray::from_iter(
|
461
461
|
self.df
|
462
462
|
.borrow()
|
463
463
|
.get_columns()
|
@@ -467,36 +467,29 @@ impl RbDataFrame {
|
|
467
467
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
468
468
|
obj.unwrap().to_object()
|
469
469
|
}
|
470
|
-
_ => Wrap(s.get(idx).unwrap()).
|
471
|
-
})
|
472
|
-
.collect(),
|
470
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
471
|
+
}),
|
473
472
|
)
|
474
473
|
.into()
|
475
474
|
}
|
476
475
|
|
477
476
|
pub fn row_tuples(&self) -> Value {
|
478
477
|
let df = &self.df;
|
479
|
-
RArray::
|
480
|
-
(
|
481
|
-
.
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
.map(|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
})
|
495
|
-
.collect(),
|
496
|
-
)
|
497
|
-
})
|
498
|
-
.collect(),
|
499
|
-
)
|
478
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
479
|
+
RArray::from_iter(
|
480
|
+
self.df
|
481
|
+
.borrow()
|
482
|
+
.get_columns()
|
483
|
+
.iter()
|
484
|
+
.map(|s| match s.dtype() {
|
485
|
+
DataType::Object(_) => {
|
486
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
487
|
+
obj.unwrap().to_object()
|
488
|
+
}
|
489
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
490
|
+
}),
|
491
|
+
)
|
492
|
+
}))
|
500
493
|
.into()
|
501
494
|
}
|
502
495
|
|
@@ -613,7 +606,7 @@ impl RbDataFrame {
|
|
613
606
|
format!("{}", self.df.borrow())
|
614
607
|
}
|
615
608
|
|
616
|
-
pub fn get_columns(&self) ->
|
609
|
+
pub fn get_columns(&self) -> RArray {
|
617
610
|
let cols = self.df.borrow().get_columns().clone();
|
618
611
|
to_rbseries_collection(cols)
|
619
612
|
}
|
@@ -635,12 +628,13 @@ impl RbDataFrame {
|
|
635
628
|
Ok(())
|
636
629
|
}
|
637
630
|
|
638
|
-
pub fn dtypes(&self) ->
|
639
|
-
|
640
|
-
.
|
641
|
-
|
642
|
-
|
643
|
-
|
631
|
+
pub fn dtypes(&self) -> RArray {
|
632
|
+
RArray::from_iter(
|
633
|
+
self.df
|
634
|
+
.borrow()
|
635
|
+
.iter()
|
636
|
+
.map(|s| Wrap(s.dtype().clone()).into_value()),
|
637
|
+
)
|
644
638
|
}
|
645
639
|
|
646
640
|
pub fn n_chunks(&self) -> usize {
|
@@ -777,6 +771,7 @@ impl RbDataFrame {
|
|
777
771
|
SortOptions {
|
778
772
|
descending: reverse,
|
779
773
|
nulls_last,
|
774
|
+
multithreaded: true,
|
780
775
|
},
|
781
776
|
)
|
782
777
|
.map_err(RbPolarsErr::from)?;
|
@@ -876,6 +871,7 @@ impl RbDataFrame {
|
|
876
871
|
Ok(RbDataFrame::new(df))
|
877
872
|
}
|
878
873
|
|
874
|
+
#[allow(clippy::too_many_arguments)]
|
879
875
|
pub fn pivot_expr(
|
880
876
|
&self,
|
881
877
|
values: Vec<String>,
|
@@ -884,6 +880,7 @@ impl RbDataFrame {
|
|
884
880
|
aggregate_expr: &RbExpr,
|
885
881
|
maintain_order: bool,
|
886
882
|
sort_columns: bool,
|
883
|
+
separator: Option<String>,
|
887
884
|
) -> RbResult<Self> {
|
888
885
|
let fun = match maintain_order {
|
889
886
|
true => pivot_stable,
|
@@ -896,19 +893,20 @@ impl RbDataFrame {
|
|
896
893
|
columns,
|
897
894
|
aggregate_expr.inner.clone(),
|
898
895
|
sort_columns,
|
896
|
+
separator.as_deref(),
|
899
897
|
)
|
900
898
|
.map_err(RbPolarsErr::from)?;
|
901
899
|
Ok(RbDataFrame::new(df))
|
902
900
|
}
|
903
901
|
|
904
|
-
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<
|
902
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
|
905
903
|
let out = if stable {
|
906
904
|
self.df.borrow().partition_by_stable(groups)
|
907
905
|
} else {
|
908
906
|
self.df.borrow().partition_by(groups)
|
909
907
|
}
|
910
908
|
.map_err(RbPolarsErr::from)?;
|
911
|
-
Ok(out.into_iter().map(RbDataFrame::new)
|
909
|
+
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
912
910
|
}
|
913
911
|
|
914
912
|
pub fn shift(&self, periods: i64) -> Self {
|
@@ -1003,13 +1001,17 @@ impl RbDataFrame {
|
|
1003
1001
|
Ok(df.into())
|
1004
1002
|
}
|
1005
1003
|
|
1006
|
-
pub fn to_dummies(
|
1004
|
+
pub fn to_dummies(
|
1005
|
+
&self,
|
1006
|
+
columns: Option<Vec<String>>,
|
1007
|
+
separator: Option<String>,
|
1008
|
+
) -> RbResult<Self> {
|
1007
1009
|
let df = match columns {
|
1008
|
-
Some(cols) => self
|
1009
|
-
.
|
1010
|
-
.
|
1011
|
-
|
1012
|
-
None => self.df.borrow().to_dummies(),
|
1010
|
+
Some(cols) => self.df.borrow().columns_to_dummies(
|
1011
|
+
cols.iter().map(|x| x as &str).collect(),
|
1012
|
+
separator.as_deref(),
|
1013
|
+
),
|
1014
|
+
None => self.df.borrow().to_dummies(separator.as_deref()),
|
1013
1015
|
}
|
1014
1016
|
.map_err(RbPolarsErr::from)?;
|
1015
1017
|
Ok(df.into())
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::exception
|
1
|
+
use magnus::exception;
|
2
2
|
use magnus::Error;
|
3
3
|
use polars::error::ArrowError;
|
4
4
|
use polars::prelude::PolarsError;
|
@@ -8,23 +8,23 @@ pub struct RbPolarsErr {}
|
|
8
8
|
impl RbPolarsErr {
|
9
9
|
// convert to Error instead of Self
|
10
10
|
pub fn from(e: PolarsError) -> Error {
|
11
|
-
Error::runtime_error(e.to_string())
|
11
|
+
Error::new(exception::runtime_error(), e.to_string())
|
12
12
|
}
|
13
13
|
|
14
14
|
pub fn arrow(e: ArrowError) -> Error {
|
15
|
-
Error::runtime_error(e.to_string())
|
15
|
+
Error::new(exception::runtime_error(), e.to_string())
|
16
16
|
}
|
17
17
|
|
18
18
|
pub fn io(e: std::io::Error) -> Error {
|
19
|
-
Error::runtime_error(e.to_string())
|
19
|
+
Error::new(exception::runtime_error(), e.to_string())
|
20
20
|
}
|
21
21
|
|
22
22
|
pub fn other(message: String) -> Error {
|
23
|
-
Error::runtime_error(message)
|
23
|
+
Error::new(exception::runtime_error(), message)
|
24
24
|
}
|
25
25
|
|
26
26
|
pub fn todo() -> Error {
|
27
|
-
Error::runtime_error("not implemented yet")
|
27
|
+
Error::new(exception::runtime_error(), "not implemented yet")
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
@@ -32,7 +32,7 @@ pub struct RbValueError {}
|
|
32
32
|
|
33
33
|
impl RbValueError {
|
34
34
|
pub fn new_err(message: String) -> Error {
|
35
|
-
Error::new(arg_error(), message)
|
35
|
+
Error::new(exception::arg_error(), message)
|
36
36
|
}
|
37
37
|
}
|
38
38
|
|
@@ -40,6 +40,6 @@ pub struct ComputeError {}
|
|
40
40
|
|
41
41
|
impl ComputeError {
|
42
42
|
pub fn new_err(message: String) -> Error {
|
43
|
-
Error::runtime_error(message)
|
43
|
+
Error::new(exception::runtime_error(), message)
|
44
44
|
}
|
45
45
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{Error, RString, Value};
|
1
|
+
use magnus::{exception, Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
@@ -9,9 +9,10 @@ use crate::RbResult;
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
10
|
let str_slice = f.try_convert::<PathBuf>()?;
|
11
11
|
let f = if truncate {
|
12
|
-
File::create(str_slice)
|
12
|
+
File::create(str_slice)
|
13
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
13
14
|
} else {
|
14
|
-
File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
15
|
+
File::open(str_slice).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
15
16
|
};
|
16
17
|
Ok(f)
|
17
18
|
}
|
@@ -23,7 +24,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
|
|
23
24
|
Ok(Box::new(Cursor::new(bytes.to_vec())))
|
24
25
|
} else {
|
25
26
|
let p = rb_f.try_convert::<PathBuf>()?;
|
26
|
-
let f = File::open(p).map_err(|e| Error::runtime_error(e.to_string()))?;
|
27
|
+
let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
27
28
|
Ok(Box::new(f))
|
28
29
|
}
|
29
30
|
}
|
@@ -4,7 +4,7 @@ use polars::prelude::*;
|
|
4
4
|
use crate::lazy::dsl::RbExpr;
|
5
5
|
use crate::Wrap;
|
6
6
|
|
7
|
-
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series
|
7
|
+
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
|
8
8
|
todo!();
|
9
9
|
}
|
10
10
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
@@ -140,7 +140,7 @@ impl RbLazyFrame {
|
|
140
140
|
.with_infer_schema_length(infer_schema_length)
|
141
141
|
.with_delimiter(delimiter)
|
142
142
|
.has_header(has_header)
|
143
|
-
.
|
143
|
+
.with_ignore_errors(ignore_errors)
|
144
144
|
.with_skip_rows(skip_rows)
|
145
145
|
.with_n_rows(n_rows)
|
146
146
|
.with_cache(cache)
|
@@ -180,6 +180,8 @@ impl RbLazyFrame {
|
|
180
180
|
rechunk,
|
181
181
|
row_count,
|
182
182
|
low_memory,
|
183
|
+
// TODO support cloud options
|
184
|
+
cloud_options: None,
|
183
185
|
};
|
184
186
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
185
187
|
Ok(lf.into())
|
@@ -254,6 +256,7 @@ impl RbLazyFrame {
|
|
254
256
|
SortOptions {
|
255
257
|
descending: reverse,
|
256
258
|
nulls_last,
|
259
|
+
multithreaded: true,
|
257
260
|
},
|
258
261
|
)
|
259
262
|
.into()
|
@@ -597,10 +600,10 @@ impl RbLazyFrame {
|
|
597
600
|
Ok(self.get_schema()?.iter_names().cloned().collect())
|
598
601
|
}
|
599
602
|
|
600
|
-
pub fn dtypes(&self) -> RbResult<
|
603
|
+
pub fn dtypes(&self) -> RbResult<RArray> {
|
601
604
|
let schema = self.get_schema()?;
|
602
|
-
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).
|
603
|
-
Ok(iter
|
605
|
+
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
|
606
|
+
Ok(RArray::from_iter(iter))
|
604
607
|
}
|
605
608
|
|
606
609
|
pub fn schema(&self) -> RbResult<RHash> {
|
@@ -610,7 +613,10 @@ impl RbLazyFrame {
|
|
610
613
|
schema.iter_fields().for_each(|fld| {
|
611
614
|
// TODO remove unwrap
|
612
615
|
schema_dict
|
613
|
-
.aset::<String, Value>(
|
616
|
+
.aset::<String, Value>(
|
617
|
+
fld.name().clone(),
|
618
|
+
Wrap(fld.data_type().clone()).into_value(),
|
619
|
+
)
|
614
620
|
.unwrap();
|
615
621
|
});
|
616
622
|
Ok(schema_dict)
|