polars-df 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +290 -137
- data/Cargo.toml +1 -1
- data/ext/polars/Cargo.toml +5 -4
- data/ext/polars/src/apply/dataframe.rs +6 -6
- data/ext/polars/src/apply/series.rs +10 -10
- data/ext/polars/src/batched_csv.rs +6 -4
- data/ext/polars/src/conversion.rs +40 -13
- data/ext/polars/src/dataframe.rs +45 -43
- data/ext/polars/src/error.rs +8 -8
- data/ext/polars/src/file.rs +5 -4
- data/ext/polars/src/lazy/apply.rs +1 -1
- data/ext/polars/src/lazy/dataframe.rs +12 -6
- data/ext/polars/src/lazy/dsl.rs +99 -45
- data/ext/polars/src/lazy/meta.rs +10 -9
- data/ext/polars/src/lib.rs +28 -29
- data/ext/polars/src/object.rs +2 -1
- data/ext/polars/src/series.rs +23 -21
- data/lib/polars/cat_expr.rb +0 -4
- data/lib/polars/cat_name_space.rb +0 -4
- data/lib/polars/convert.rb +0 -7
- data/lib/polars/data_frame.rb +139 -204
- data/lib/polars/date_time_expr.rb +19 -151
- data/lib/polars/date_time_name_space.rb +17 -17
- data/lib/polars/expr.rb +68 -315
- data/lib/polars/group_by.rb +68 -51
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +1 -103
- data/lib/polars/lazy_functions.rb +0 -26
- data/lib/polars/lazy_group_by.rb +0 -8
- data/lib/polars/list_expr.rb +5 -27
- data/lib/polars/list_name_space.rb +5 -8
- data/lib/polars/series.rb +20 -16
- data/lib/polars/string_expr.rb +20 -76
- data/lib/polars/string_name_space.rb +5 -15
- data/lib/polars/struct_expr.rb +0 -2
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
data/Cargo.toml
CHANGED
@@ -4,7 +4,7 @@ members = ["ext/polars"]
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
6
|
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "
|
7
|
+
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
8
|
|
9
9
|
[profile.release]
|
10
10
|
strip = true
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.3.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,18 +11,19 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
-
magnus = "0.
|
15
|
-
polars-core = "0.
|
14
|
+
magnus = "0.5"
|
15
|
+
polars-core = "0.27.0"
|
16
16
|
serde_json = "1"
|
17
17
|
|
18
18
|
[dependencies.polars]
|
19
|
-
version = "0.
|
19
|
+
version = "0.27.0"
|
20
20
|
features = [
|
21
21
|
"abs",
|
22
22
|
"arange",
|
23
23
|
"arg_where",
|
24
24
|
"asof_join",
|
25
25
|
"avro",
|
26
|
+
"binary_encoding",
|
26
27
|
"concat_str",
|
27
28
|
"cse",
|
28
29
|
"csv-file",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RArray, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RArray, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
use polars_core::frame::row::{rows_to_schema_first_non_null, Row};
|
4
4
|
use polars_core::series::SeriesIter;
|
@@ -27,7 +27,7 @@ pub fn apply_lambda_unknown<'a>(
|
|
27
27
|
|
28
28
|
for _ in 0..df.height() {
|
29
29
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
30
|
-
let arg = (iter
|
30
|
+
let arg = (RArray::from_iter(iter),);
|
31
31
|
let out: Value = lambda.funcall("call", arg)?;
|
32
32
|
|
33
33
|
if out.is_nil() {
|
@@ -141,7 +141,7 @@ where
|
|
141
141
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
142
142
|
((init_null_count + skip)..df.height()).map(move |_| {
|
143
143
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
144
|
-
let tpl = (iter
|
144
|
+
let tpl = (RArray::from_iter(iter),);
|
145
145
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
146
146
|
Ok(val) => val.try_convert::<T>().ok(),
|
147
147
|
Err(e) => panic!("ruby function failed {}", e),
|
@@ -158,7 +158,7 @@ pub fn apply_lambda_with_primitive_out_type<D>(
|
|
158
158
|
) -> ChunkedArray<D>
|
159
159
|
where
|
160
160
|
D: RbArrowPrimitiveType,
|
161
|
-
D::Native:
|
161
|
+
D::Native: IntoValue + TryConvert,
|
162
162
|
{
|
163
163
|
let skip = usize::from(first_value.is_some());
|
164
164
|
if init_null_count == df.height() {
|
@@ -216,7 +216,7 @@ pub fn apply_lambda_with_list_out_type<'a>(
|
|
216
216
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
217
217
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
218
218
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
219
|
-
let tpl = (iter
|
219
|
+
let tpl = (RArray::from_iter(iter),);
|
220
220
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
221
221
|
Ok(val) => match val.funcall::<_, _, Value>("_s", ()) {
|
222
222
|
Ok(val) => val
|
@@ -254,7 +254,7 @@ pub fn apply_lambda_with_rows_output<'a>(
|
|
254
254
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
255
255
|
let mut row_iter = ((init_null_count + skip)..df.height()).map(|_| {
|
256
256
|
let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
|
257
|
-
let tpl = (iter
|
257
|
+
let tpl = (RArray::from_iter(iter),);
|
258
258
|
match lambda.funcall::<_, _, Value>("call", tpl) {
|
259
259
|
Ok(val) => {
|
260
260
|
match val.try_convert::<RArray>().ok() {
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{class, RHash, TryConvert, Value};
|
1
|
+
use magnus::{class, IntoValue, RHash, TryConvert, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
4
|
use super::*;
|
@@ -85,7 +85,7 @@ pub trait ApplyLambda<'a> {
|
|
85
85
|
) -> RbResult<ChunkedArray<D>>
|
86
86
|
where
|
87
87
|
D: RbArrowPrimitiveType,
|
88
|
-
D::Native:
|
88
|
+
D::Native: IntoValue + TryConvert;
|
89
89
|
|
90
90
|
/// Apply a lambda with a boolean output type
|
91
91
|
fn apply_lambda_with_bool_out_type(
|
@@ -130,14 +130,14 @@ pub trait ApplyLambda<'a> {
|
|
130
130
|
|
131
131
|
pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
|
132
132
|
where
|
133
|
-
T:
|
133
|
+
T: IntoValue,
|
134
134
|
{
|
135
135
|
lambda.funcall("call", (in_val,))
|
136
136
|
}
|
137
137
|
|
138
138
|
pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
|
139
139
|
where
|
140
|
-
T:
|
140
|
+
T: IntoValue,
|
141
141
|
S: TryConvert,
|
142
142
|
{
|
143
143
|
match call_lambda(lambda, in_val) {
|
@@ -148,7 +148,7 @@ where
|
|
148
148
|
|
149
149
|
fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
|
150
150
|
where
|
151
|
-
T:
|
151
|
+
T: IntoValue,
|
152
152
|
{
|
153
153
|
let out: Value = lambda.funcall("call", (in_val,))?;
|
154
154
|
let py_series: Value = out.funcall("_s", ())?;
|
@@ -216,7 +216,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
216
216
|
) -> RbResult<ChunkedArray<D>>
|
217
217
|
where
|
218
218
|
D: RbArrowPrimitiveType,
|
219
|
-
D::Native:
|
219
|
+
D::Native: IntoValue + TryConvert,
|
220
220
|
{
|
221
221
|
let skip = usize::from(first_value.is_some());
|
222
222
|
if init_null_count == self.len() {
|
@@ -435,7 +435,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
435
435
|
impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
|
436
436
|
where
|
437
437
|
T: RbArrowPrimitiveType + PolarsNumericType,
|
438
|
-
T::Native:
|
438
|
+
T::Native: IntoValue + TryConvert,
|
439
439
|
ChunkedArray<T>: IntoSeries,
|
440
440
|
{
|
441
441
|
fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
|
@@ -493,7 +493,7 @@ where
|
|
493
493
|
) -> RbResult<ChunkedArray<D>>
|
494
494
|
where
|
495
495
|
D: RbArrowPrimitiveType,
|
496
|
-
D::Native:
|
496
|
+
D::Native: IntoValue + TryConvert,
|
497
497
|
{
|
498
498
|
let skip = usize::from(first_value.is_some());
|
499
499
|
if init_null_count == self.len() {
|
@@ -765,7 +765,7 @@ impl<'a> ApplyLambda<'a> for Utf8Chunked {
|
|
765
765
|
) -> RbResult<ChunkedArray<D>>
|
766
766
|
where
|
767
767
|
D: RbArrowPrimitiveType,
|
768
|
-
D::Native:
|
768
|
+
D::Native: IntoValue + TryConvert,
|
769
769
|
{
|
770
770
|
let skip = usize::from(first_value.is_some());
|
771
771
|
if init_null_count == self.len() {
|
@@ -1036,7 +1036,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1036
1036
|
) -> RbResult<ChunkedArray<D>>
|
1037
1037
|
where
|
1038
1038
|
D: RbArrowPrimitiveType,
|
1039
|
-
D::Native:
|
1039
|
+
D::Native: IntoValue + TryConvert,
|
1040
1040
|
{
|
1041
1041
|
let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
|
1042
1042
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{RArray, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use polars::io::RowCount;
|
4
4
|
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
@@ -84,7 +84,7 @@ impl RbBatchedCsv {
|
|
84
84
|
.with_n_rows(n_rows)
|
85
85
|
.with_delimiter(sep.as_bytes()[0])
|
86
86
|
.with_skip_rows(skip_rows)
|
87
|
-
.
|
87
|
+
.with_ignore_errors(ignore_errors)
|
88
88
|
.with_projection(projection)
|
89
89
|
.with_rechunk(rechunk)
|
90
90
|
.with_chunk_size(chunk_size)
|
@@ -109,12 +109,14 @@ impl RbBatchedCsv {
|
|
109
109
|
})
|
110
110
|
}
|
111
111
|
|
112
|
-
pub fn next_batches(&self, n: usize) -> RbResult<Option<
|
112
|
+
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
113
|
let batches = self
|
114
114
|
.reader
|
115
115
|
.borrow_mut()
|
116
116
|
.next_batches(n)
|
117
117
|
.map_err(RbPolarsErr::from)?;
|
118
|
-
Ok(batches.map(|batches|
|
118
|
+
Ok(batches.map(|batches| {
|
119
|
+
RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
|
120
|
+
}))
|
119
121
|
}
|
120
122
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use magnus::{
|
2
|
-
class, r_hash::ForEach,
|
3
|
-
Value, QNIL,
|
2
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
|
+
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
4
|
};
|
5
5
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
6
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -98,9 +98,9 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
98
|
}
|
99
99
|
}
|
100
100
|
|
101
|
-
impl
|
102
|
-
fn
|
103
|
-
match
|
101
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
|
+
match self.0 {
|
104
104
|
AnyValue::UInt8(v) => Value::from(v),
|
105
105
|
AnyValue::UInt16(v) => Value::from(v),
|
106
106
|
AnyValue::UInt32(v) => Value::from(v),
|
@@ -114,6 +114,8 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
114
114
|
AnyValue::Null => *QNIL,
|
115
115
|
AnyValue::Boolean(v) => Value::from(v),
|
116
116
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
+
AnyValue::Utf8Owned(_v) => todo!(),
|
118
|
+
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
117
119
|
AnyValue::Date(v) => class::time()
|
118
120
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
119
121
|
.unwrap()
|
@@ -140,16 +142,24 @@ impl From<Wrap<AnyValue<'_>>> for Value {
|
|
140
142
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
141
143
|
}
|
142
144
|
}
|
143
|
-
|
145
|
+
AnyValue::Duration(_v, _tu) => todo!(),
|
146
|
+
AnyValue::Time(_v) => todo!(),
|
147
|
+
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
148
|
+
ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
|
149
|
+
AnyValue::StructOwned(_payload) => todo!(),
|
150
|
+
AnyValue::Object(_v) => todo!(),
|
151
|
+
AnyValue::ObjectOwned(_v) => todo!(),
|
152
|
+
AnyValue::Binary(_v) => todo!(),
|
153
|
+
AnyValue::BinaryOwned(_v) => todo!(),
|
144
154
|
}
|
145
155
|
}
|
146
156
|
}
|
147
157
|
|
148
|
-
impl
|
149
|
-
fn
|
158
|
+
impl IntoValue for Wrap<DataType> {
|
159
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
150
160
|
let pl = crate::rb_modules::polars();
|
151
161
|
|
152
|
-
match
|
162
|
+
match self.0 {
|
153
163
|
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
154
164
|
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
155
165
|
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
@@ -160,6 +170,7 @@ impl From<Wrap<DataType>> for Value {
|
|
160
170
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
161
171
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
162
172
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
173
|
+
DataType::Decimal128(_) => todo!(),
|
163
174
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
164
175
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
165
176
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
@@ -423,9 +434,9 @@ impl ObjectValue {
|
|
423
434
|
}
|
424
435
|
}
|
425
436
|
|
426
|
-
impl
|
427
|
-
fn
|
428
|
-
|
437
|
+
impl IntoValue for ObjectValue {
|
438
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
439
|
+
self.inner
|
429
440
|
}
|
430
441
|
}
|
431
442
|
|
@@ -767,6 +778,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
767
778
|
}
|
768
779
|
}
|
769
780
|
|
781
|
+
impl TryConvert for Wrap<SearchSortedSide> {
|
782
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
783
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
784
|
+
"any" => SearchSortedSide::Any,
|
785
|
+
"left" => SearchSortedSide::Left,
|
786
|
+
"right" => SearchSortedSide::Right,
|
787
|
+
v => {
|
788
|
+
return Err(RbValueError::new_err(format!(
|
789
|
+
"side must be one of {{'any', 'left', 'right'}}, got {v}",
|
790
|
+
)))
|
791
|
+
}
|
792
|
+
};
|
793
|
+
Ok(Wrap(parsed))
|
794
|
+
}
|
795
|
+
}
|
796
|
+
|
770
797
|
pub fn parse_fill_null_strategy(
|
771
798
|
strategy: &str,
|
772
799
|
limit: FillNullLimit,
|
@@ -780,7 +807,7 @@ pub fn parse_fill_null_strategy(
|
|
780
807
|
"zero" => FillNullStrategy::Zero,
|
781
808
|
"one" => FillNullStrategy::One,
|
782
809
|
e => {
|
783
|
-
return Err(magnus::Error::runtime_error(format!(
|
810
|
+
return Err(magnus::Error::new(exception::runtime_error(), format!(
|
784
811
|
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}",
|
785
812
|
e,
|
786
813
|
)))
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{r_hash::ForEach, RArray, RHash, RString, Value};
|
1
|
+
use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
|
2
2
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
3
|
use polars::frame::NullStrategy;
|
4
4
|
use polars::io::avro::AvroCompression;
|
@@ -68,7 +68,7 @@ impl RbDataFrame {
|
|
68
68
|
*dtype_ = dtype;
|
69
69
|
}
|
70
70
|
} else {
|
71
|
-
schema.with_column(name, dtype)
|
71
|
+
schema.with_column(name, dtype);
|
72
72
|
}
|
73
73
|
}
|
74
74
|
}
|
@@ -159,7 +159,7 @@ impl RbDataFrame {
|
|
159
159
|
.with_n_rows(n_rows)
|
160
160
|
.with_delimiter(sep.as_bytes()[0])
|
161
161
|
.with_skip_rows(skip_rows)
|
162
|
-
.
|
162
|
+
.with_ignore_errors(ignore_errors)
|
163
163
|
.with_projection(projection)
|
164
164
|
.with_rechunk(rechunk)
|
165
165
|
.with_chunk_size(chunk_size)
|
@@ -457,7 +457,7 @@ impl RbDataFrame {
|
|
457
457
|
} else {
|
458
458
|
idx as usize
|
459
459
|
};
|
460
|
-
RArray::
|
460
|
+
RArray::from_iter(
|
461
461
|
self.df
|
462
462
|
.borrow()
|
463
463
|
.get_columns()
|
@@ -467,36 +467,29 @@ impl RbDataFrame {
|
|
467
467
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
468
468
|
obj.unwrap().to_object()
|
469
469
|
}
|
470
|
-
_ => Wrap(s.get(idx).unwrap()).
|
471
|
-
})
|
472
|
-
.collect(),
|
470
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
471
|
+
}),
|
473
472
|
)
|
474
473
|
.into()
|
475
474
|
}
|
476
475
|
|
477
476
|
pub fn row_tuples(&self) -> Value {
|
478
477
|
let df = &self.df;
|
479
|
-
RArray::
|
480
|
-
(
|
481
|
-
.
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
.map(|
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
})
|
495
|
-
.collect(),
|
496
|
-
)
|
497
|
-
})
|
498
|
-
.collect(),
|
499
|
-
)
|
478
|
+
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
479
|
+
RArray::from_iter(
|
480
|
+
self.df
|
481
|
+
.borrow()
|
482
|
+
.get_columns()
|
483
|
+
.iter()
|
484
|
+
.map(|s| match s.dtype() {
|
485
|
+
DataType::Object(_) => {
|
486
|
+
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
487
|
+
obj.unwrap().to_object()
|
488
|
+
}
|
489
|
+
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
490
|
+
}),
|
491
|
+
)
|
492
|
+
}))
|
500
493
|
.into()
|
501
494
|
}
|
502
495
|
|
@@ -613,7 +606,7 @@ impl RbDataFrame {
|
|
613
606
|
format!("{}", self.df.borrow())
|
614
607
|
}
|
615
608
|
|
616
|
-
pub fn get_columns(&self) ->
|
609
|
+
pub fn get_columns(&self) -> RArray {
|
617
610
|
let cols = self.df.borrow().get_columns().clone();
|
618
611
|
to_rbseries_collection(cols)
|
619
612
|
}
|
@@ -635,12 +628,13 @@ impl RbDataFrame {
|
|
635
628
|
Ok(())
|
636
629
|
}
|
637
630
|
|
638
|
-
pub fn dtypes(&self) ->
|
639
|
-
|
640
|
-
.
|
641
|
-
|
642
|
-
|
643
|
-
|
631
|
+
pub fn dtypes(&self) -> RArray {
|
632
|
+
RArray::from_iter(
|
633
|
+
self.df
|
634
|
+
.borrow()
|
635
|
+
.iter()
|
636
|
+
.map(|s| Wrap(s.dtype().clone()).into_value()),
|
637
|
+
)
|
644
638
|
}
|
645
639
|
|
646
640
|
pub fn n_chunks(&self) -> usize {
|
@@ -777,6 +771,7 @@ impl RbDataFrame {
|
|
777
771
|
SortOptions {
|
778
772
|
descending: reverse,
|
779
773
|
nulls_last,
|
774
|
+
multithreaded: true,
|
780
775
|
},
|
781
776
|
)
|
782
777
|
.map_err(RbPolarsErr::from)?;
|
@@ -876,6 +871,7 @@ impl RbDataFrame {
|
|
876
871
|
Ok(RbDataFrame::new(df))
|
877
872
|
}
|
878
873
|
|
874
|
+
#[allow(clippy::too_many_arguments)]
|
879
875
|
pub fn pivot_expr(
|
880
876
|
&self,
|
881
877
|
values: Vec<String>,
|
@@ -884,6 +880,7 @@ impl RbDataFrame {
|
|
884
880
|
aggregate_expr: &RbExpr,
|
885
881
|
maintain_order: bool,
|
886
882
|
sort_columns: bool,
|
883
|
+
separator: Option<String>,
|
887
884
|
) -> RbResult<Self> {
|
888
885
|
let fun = match maintain_order {
|
889
886
|
true => pivot_stable,
|
@@ -896,19 +893,20 @@ impl RbDataFrame {
|
|
896
893
|
columns,
|
897
894
|
aggregate_expr.inner.clone(),
|
898
895
|
sort_columns,
|
896
|
+
separator.as_deref(),
|
899
897
|
)
|
900
898
|
.map_err(RbPolarsErr::from)?;
|
901
899
|
Ok(RbDataFrame::new(df))
|
902
900
|
}
|
903
901
|
|
904
|
-
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<
|
902
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<RArray> {
|
905
903
|
let out = if stable {
|
906
904
|
self.df.borrow().partition_by_stable(groups)
|
907
905
|
} else {
|
908
906
|
self.df.borrow().partition_by(groups)
|
909
907
|
}
|
910
908
|
.map_err(RbPolarsErr::from)?;
|
911
|
-
Ok(out.into_iter().map(RbDataFrame::new)
|
909
|
+
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
912
910
|
}
|
913
911
|
|
914
912
|
pub fn shift(&self, periods: i64) -> Self {
|
@@ -1003,13 +1001,17 @@ impl RbDataFrame {
|
|
1003
1001
|
Ok(df.into())
|
1004
1002
|
}
|
1005
1003
|
|
1006
|
-
pub fn to_dummies(
|
1004
|
+
pub fn to_dummies(
|
1005
|
+
&self,
|
1006
|
+
columns: Option<Vec<String>>,
|
1007
|
+
separator: Option<String>,
|
1008
|
+
) -> RbResult<Self> {
|
1007
1009
|
let df = match columns {
|
1008
|
-
Some(cols) => self
|
1009
|
-
.
|
1010
|
-
.
|
1011
|
-
|
1012
|
-
None => self.df.borrow().to_dummies(),
|
1010
|
+
Some(cols) => self.df.borrow().columns_to_dummies(
|
1011
|
+
cols.iter().map(|x| x as &str).collect(),
|
1012
|
+
separator.as_deref(),
|
1013
|
+
),
|
1014
|
+
None => self.df.borrow().to_dummies(separator.as_deref()),
|
1013
1015
|
}
|
1014
1016
|
.map_err(RbPolarsErr::from)?;
|
1015
1017
|
Ok(df.into())
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::exception
|
1
|
+
use magnus::exception;
|
2
2
|
use magnus::Error;
|
3
3
|
use polars::error::ArrowError;
|
4
4
|
use polars::prelude::PolarsError;
|
@@ -8,23 +8,23 @@ pub struct RbPolarsErr {}
|
|
8
8
|
impl RbPolarsErr {
|
9
9
|
// convert to Error instead of Self
|
10
10
|
pub fn from(e: PolarsError) -> Error {
|
11
|
-
Error::runtime_error(e.to_string())
|
11
|
+
Error::new(exception::runtime_error(), e.to_string())
|
12
12
|
}
|
13
13
|
|
14
14
|
pub fn arrow(e: ArrowError) -> Error {
|
15
|
-
Error::runtime_error(e.to_string())
|
15
|
+
Error::new(exception::runtime_error(), e.to_string())
|
16
16
|
}
|
17
17
|
|
18
18
|
pub fn io(e: std::io::Error) -> Error {
|
19
|
-
Error::runtime_error(e.to_string())
|
19
|
+
Error::new(exception::runtime_error(), e.to_string())
|
20
20
|
}
|
21
21
|
|
22
22
|
pub fn other(message: String) -> Error {
|
23
|
-
Error::runtime_error(message)
|
23
|
+
Error::new(exception::runtime_error(), message)
|
24
24
|
}
|
25
25
|
|
26
26
|
pub fn todo() -> Error {
|
27
|
-
Error::runtime_error("not implemented yet")
|
27
|
+
Error::new(exception::runtime_error(), "not implemented yet")
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
@@ -32,7 +32,7 @@ pub struct RbValueError {}
|
|
32
32
|
|
33
33
|
impl RbValueError {
|
34
34
|
pub fn new_err(message: String) -> Error {
|
35
|
-
Error::new(arg_error(), message)
|
35
|
+
Error::new(exception::arg_error(), message)
|
36
36
|
}
|
37
37
|
}
|
38
38
|
|
@@ -40,6 +40,6 @@ pub struct ComputeError {}
|
|
40
40
|
|
41
41
|
impl ComputeError {
|
42
42
|
pub fn new_err(message: String) -> Error {
|
43
|
-
Error::runtime_error(message)
|
43
|
+
Error::new(exception::runtime_error(), message)
|
44
44
|
}
|
45
45
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{Error, RString, Value};
|
1
|
+
use magnus::{exception, Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
@@ -9,9 +9,10 @@ use crate::RbResult;
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
10
|
let str_slice = f.try_convert::<PathBuf>()?;
|
11
11
|
let f = if truncate {
|
12
|
-
File::create(str_slice)
|
12
|
+
File::create(str_slice)
|
13
|
+
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
13
14
|
} else {
|
14
|
-
File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
15
|
+
File::open(str_slice).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
15
16
|
};
|
16
17
|
Ok(f)
|
17
18
|
}
|
@@ -23,7 +24,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
|
|
23
24
|
Ok(Box::new(Cursor::new(bytes.to_vec())))
|
24
25
|
} else {
|
25
26
|
let p = rb_f.try_convert::<PathBuf>()?;
|
26
|
-
let f = File::open(p).map_err(|e| Error::runtime_error(e.to_string()))?;
|
27
|
+
let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
27
28
|
Ok(Box::new(f))
|
28
29
|
}
|
29
30
|
}
|
@@ -4,7 +4,7 @@ use polars::prelude::*;
|
|
4
4
|
use crate::lazy::dsl::RbExpr;
|
5
5
|
use crate::Wrap;
|
6
6
|
|
7
|
-
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Series
|
7
|
+
pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
|
8
8
|
todo!();
|
9
9
|
}
|
10
10
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{RArray, RHash, Value};
|
1
|
+
use magnus::{IntoValue, RArray, RHash, Value};
|
2
2
|
use polars::io::RowCount;
|
3
3
|
use polars::lazy::frame::{LazyFrame, LazyGroupBy};
|
4
4
|
use polars::prelude::*;
|
@@ -140,7 +140,7 @@ impl RbLazyFrame {
|
|
140
140
|
.with_infer_schema_length(infer_schema_length)
|
141
141
|
.with_delimiter(delimiter)
|
142
142
|
.has_header(has_header)
|
143
|
-
.
|
143
|
+
.with_ignore_errors(ignore_errors)
|
144
144
|
.with_skip_rows(skip_rows)
|
145
145
|
.with_n_rows(n_rows)
|
146
146
|
.with_cache(cache)
|
@@ -180,6 +180,8 @@ impl RbLazyFrame {
|
|
180
180
|
rechunk,
|
181
181
|
row_count,
|
182
182
|
low_memory,
|
183
|
+
// TODO support cloud options
|
184
|
+
cloud_options: None,
|
183
185
|
};
|
184
186
|
let lf = LazyFrame::scan_parquet(path, args).map_err(RbPolarsErr::from)?;
|
185
187
|
Ok(lf.into())
|
@@ -254,6 +256,7 @@ impl RbLazyFrame {
|
|
254
256
|
SortOptions {
|
255
257
|
descending: reverse,
|
256
258
|
nulls_last,
|
259
|
+
multithreaded: true,
|
257
260
|
},
|
258
261
|
)
|
259
262
|
.into()
|
@@ -597,10 +600,10 @@ impl RbLazyFrame {
|
|
597
600
|
Ok(self.get_schema()?.iter_names().cloned().collect())
|
598
601
|
}
|
599
602
|
|
600
|
-
pub fn dtypes(&self) -> RbResult<
|
603
|
+
pub fn dtypes(&self) -> RbResult<RArray> {
|
601
604
|
let schema = self.get_schema()?;
|
602
|
-
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).
|
603
|
-
Ok(iter
|
605
|
+
let iter = schema.iter_dtypes().map(|dt| Wrap(dt.clone()).into_value());
|
606
|
+
Ok(RArray::from_iter(iter))
|
604
607
|
}
|
605
608
|
|
606
609
|
pub fn schema(&self) -> RbResult<RHash> {
|
@@ -610,7 +613,10 @@ impl RbLazyFrame {
|
|
610
613
|
schema.iter_fields().for_each(|fld| {
|
611
614
|
// TODO remove unwrap
|
612
615
|
schema_dict
|
613
|
-
.aset::<String, Value>(
|
616
|
+
.aset::<String, Value>(
|
617
|
+
fld.name().clone(),
|
618
|
+
Wrap(fld.data_type().clone()).into_value(),
|
619
|
+
)
|
614
620
|
.unwrap();
|
615
621
|
});
|
616
622
|
Ok(schema_dict)
|