polars-df 0.11.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.12.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,15 +14,15 @@ crate-type = ["cdylib"]
|
|
14
14
|
ahash = "0.8"
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
|
-
magnus = "0.
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
17
|
+
magnus = "0.7"
|
18
|
+
polars-core = "=0.41.3"
|
19
|
+
polars-parquet = "=0.41.3"
|
20
|
+
polars-utils = "=0.41.3"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.41.3"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -41,7 +41,6 @@ features = [
|
|
41
41
|
"cumulative_eval",
|
42
42
|
"cutqcut",
|
43
43
|
"dataframe_arithmetic",
|
44
|
-
"date_offset",
|
45
44
|
"diagonal_concat",
|
46
45
|
"diff",
|
47
46
|
"dot_product",
|
@@ -56,6 +55,7 @@ features = [
|
|
56
55
|
"interpolate",
|
57
56
|
"ipc",
|
58
57
|
"ipc_streaming",
|
58
|
+
"is_between",
|
59
59
|
"is_first_distinct",
|
60
60
|
"is_in",
|
61
61
|
"is_last_distinct",
|
@@ -74,6 +74,9 @@ features = [
|
|
74
74
|
"meta",
|
75
75
|
"mode",
|
76
76
|
"moment",
|
77
|
+
"month_start",
|
78
|
+
"month_end",
|
79
|
+
"offset_by",
|
77
80
|
"object",
|
78
81
|
"parquet",
|
79
82
|
"partition_by",
|
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
|
|
95
95
|
.with_projection(projection.map(Arc::new))
|
96
96
|
.with_rechunk(rechunk)
|
97
97
|
.with_chunk_size(chunk_size)
|
98
|
-
.with_columns(columns.map(Arc::
|
98
|
+
.with_columns(columns.map(Arc::from))
|
99
99
|
.with_n_threads(n_threads)
|
100
100
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
101
|
.with_low_memory(low_memory)
|
@@ -0,0 +1,261 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value,
|
4
|
+
};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
7
|
+
|
8
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
9
|
+
|
10
|
+
use crate::error::RbOverflowError;
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
any_value_into_rb_object(self.0, ruby)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
rb_object_to_any_value(ob, true).map(Wrap)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
27
|
+
match av {
|
28
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
29
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
30
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
31
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
32
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
33
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
34
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
35
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
37
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
38
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
39
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
40
|
+
AnyValue::String(v) => ruby.into_value(v),
|
41
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
42
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
43
|
+
let s = if arr.is_null() {
|
44
|
+
rev.get(idx)
|
45
|
+
} else {
|
46
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
47
|
+
};
|
48
|
+
s.into_value()
|
49
|
+
}
|
50
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
51
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
|
+
let time_unit = time_unit.to_ascii();
|
53
|
+
utils()
|
54
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
55
|
+
.unwrap()
|
56
|
+
}
|
57
|
+
AnyValue::Duration(v, time_unit) => {
|
58
|
+
let time_unit = time_unit.to_ascii();
|
59
|
+
utils()
|
60
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
61
|
+
.unwrap()
|
62
|
+
}
|
63
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
64
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
65
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
67
|
+
AnyValue::Object(v) => {
|
68
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
69
|
+
object.to_object()
|
70
|
+
}
|
71
|
+
AnyValue::ObjectOwned(v) => {
|
72
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
73
|
+
object.to_object()
|
74
|
+
}
|
75
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
76
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
77
|
+
AnyValue::Decimal(v, scale) => utils()
|
78
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
79
|
+
.unwrap(),
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
|
84
|
+
// Conversion functions.
|
85
|
+
fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
86
|
+
Ok(AnyValue::Null)
|
87
|
+
}
|
88
|
+
|
89
|
+
fn get_bool(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
90
|
+
let b = bool::try_convert(ob)?;
|
91
|
+
Ok(AnyValue::Boolean(b))
|
92
|
+
}
|
93
|
+
|
94
|
+
fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
|
95
|
+
if let Ok(v) = i64::try_convert(ob) {
|
96
|
+
Ok(AnyValue::Int64(v))
|
97
|
+
} else if let Ok(v) = u64::try_convert(ob) {
|
98
|
+
Ok(AnyValue::UInt64(v))
|
99
|
+
} else if !strict {
|
100
|
+
let f = f64::try_convert(ob)?;
|
101
|
+
Ok(AnyValue::Float64(f))
|
102
|
+
} else {
|
103
|
+
Err(RbOverflowError::new_err(format!(
|
104
|
+
"int value too large for Polars integer types: {ob}"
|
105
|
+
)))
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
fn get_float(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
110
|
+
Ok(AnyValue::Float64(f64::try_convert(ob)?))
|
111
|
+
}
|
112
|
+
|
113
|
+
fn get_str(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
114
|
+
let v = RString::from_value(ob).unwrap();
|
115
|
+
if v.enc_get() == Index::utf8() {
|
116
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()))
|
117
|
+
} else {
|
118
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()))
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
|
+
let v = RArray::from_value(ob).unwrap();
|
124
|
+
if v.is_empty() {
|
125
|
+
Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
|
126
|
+
} else {
|
127
|
+
let list = v;
|
128
|
+
|
129
|
+
let mut avs = Vec::with_capacity(25);
|
130
|
+
let mut iter = list.into_iter();
|
131
|
+
|
132
|
+
for item in (&mut iter).take(25) {
|
133
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
134
|
+
}
|
135
|
+
|
136
|
+
let (dtype, _n_types) =
|
137
|
+
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
138
|
+
|
139
|
+
// push the rest
|
140
|
+
avs.reserve(list.len());
|
141
|
+
for item in iter {
|
142
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
|
+
}
|
144
|
+
|
145
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
146
|
+
.map_err(RbPolarsErr::from)?;
|
147
|
+
Ok(AnyValue::List(s))
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
fn get_list_from_series(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
152
|
+
let s = super::get_series(ob)?;
|
153
|
+
Ok(AnyValue::List(s))
|
154
|
+
}
|
155
|
+
|
156
|
+
fn get_struct(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
157
|
+
let dict = RHash::from_value(ob).unwrap();
|
158
|
+
let len = dict.len();
|
159
|
+
let mut keys = Vec::with_capacity(len);
|
160
|
+
let mut vals = Vec::with_capacity(len);
|
161
|
+
dict.foreach(|k: Value, v: Value| {
|
162
|
+
let key = String::try_convert(k)?;
|
163
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
|
+
let dtype = DataType::from(&val);
|
165
|
+
keys.push(Field::new(&key, dtype));
|
166
|
+
vals.push(val);
|
167
|
+
Ok(ForEach::Continue)
|
168
|
+
})?;
|
169
|
+
Ok(AnyValue::StructOwned(Box::new((vals, keys))))
|
170
|
+
}
|
171
|
+
|
172
|
+
fn get_date(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
173
|
+
// convert to DateTime for UTC
|
174
|
+
let v = ob
|
175
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
176
|
+
.funcall::<_, _, Value>("to_time", ())?
|
177
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
178
|
+
Ok(AnyValue::Date((v / 86400) as i32))
|
179
|
+
}
|
180
|
+
|
181
|
+
fn get_time(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
182
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
183
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
184
|
+
let v = sec * 1_000_000_000 + nsec;
|
185
|
+
// TODO support time zone when possible
|
186
|
+
// https://github.com/pola-rs/polars/issues/9103
|
187
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
|
188
|
+
}
|
189
|
+
|
190
|
+
fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
191
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
192
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
193
|
+
Ok(AnyValue::Datetime(
|
194
|
+
sec * 1_000_000_000 + nsec,
|
195
|
+
TimeUnit::Nanoseconds,
|
196
|
+
&None,
|
197
|
+
))
|
198
|
+
}
|
199
|
+
|
200
|
+
fn get_decimal(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
201
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
202
|
+
let exp = exp - (digits.len() as i32);
|
203
|
+
match digits.parse::<i128>() {
|
204
|
+
Ok(mut v) => {
|
205
|
+
let scale = if exp > 0 {
|
206
|
+
v = 10_i128
|
207
|
+
.checked_pow(exp as u32)
|
208
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
209
|
+
0
|
210
|
+
} else {
|
211
|
+
(-exp) as usize
|
212
|
+
};
|
213
|
+
Some((v, scale))
|
214
|
+
}
|
215
|
+
Err(_) => None,
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
220
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
221
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
222
|
+
})?;
|
223
|
+
if sign < 0 {
|
224
|
+
// TODO better error
|
225
|
+
v = v.checked_neg().unwrap();
|
226
|
+
}
|
227
|
+
Ok(AnyValue::Decimal(v, scale))
|
228
|
+
}
|
229
|
+
|
230
|
+
if ob.is_nil() {
|
231
|
+
get_null(ob, strict)
|
232
|
+
} else if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
233
|
+
get_bool(ob, strict)
|
234
|
+
} else if ob.is_kind_of(class::integer()) {
|
235
|
+
get_int(ob, strict)
|
236
|
+
} else if ob.is_kind_of(class::float()) {
|
237
|
+
get_float(ob, strict)
|
238
|
+
} else if ob.is_kind_of(class::string()) {
|
239
|
+
get_str(ob, strict)
|
240
|
+
} else if ob.is_kind_of(class::array()) {
|
241
|
+
get_list(ob, strict)
|
242
|
+
} else if ob.is_kind_of(class::hash()) {
|
243
|
+
get_struct(ob, strict)
|
244
|
+
} else if ob.respond_to("_s", true)? {
|
245
|
+
get_list_from_series(ob, strict)
|
246
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
247
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
248
|
+
get_time(ob, strict)
|
249
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
250
|
+
get_datetime(ob, strict)
|
251
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
252
|
+
get_date(ob, strict)
|
253
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
254
|
+
get_decimal(ob, strict)
|
255
|
+
} else {
|
256
|
+
Err(RbPolarsErr::other(format!(
|
257
|
+
"object type not supported {:?}",
|
258
|
+
ob
|
259
|
+
)))
|
260
|
+
}
|
261
|
+
}
|
@@ -11,8 +11,8 @@ impl TryConvert for Wrap<StringChunked> {
|
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
12
|
let mut builder = StringChunkedBuilder::new("", len);
|
13
13
|
|
14
|
-
for res in seq.
|
15
|
-
let item = res
|
14
|
+
for res in seq.into_iter() {
|
15
|
+
let item = res;
|
16
16
|
match String::try_convert(item) {
|
17
17
|
Ok(val) => builder.append_value(&val),
|
18
18
|
Err(_) => builder.append_null(),
|
@@ -27,8 +27,8 @@ impl TryConvert for Wrap<BinaryChunked> {
|
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
28
|
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
29
|
|
30
|
-
for res in seq.
|
31
|
-
let item = res
|
30
|
+
for res in seq.into_iter() {
|
31
|
+
let item = res;
|
32
32
|
match RString::try_convert(item) {
|
33
33
|
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
34
|
Err(_) => builder.append_null(),
|
@@ -1,4 +1,4 @@
|
|
1
|
-
pub(crate) mod
|
1
|
+
pub(crate) mod any_value;
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
@@ -7,7 +7,7 @@ use std::num::NonZeroUsize;
|
|
7
7
|
|
8
8
|
use magnus::{
|
9
9
|
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
-
Ruby, TryConvert, Value,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
11
11
|
};
|
12
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -75,7 +75,7 @@ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
|
75
75
|
|
76
76
|
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
77
77
|
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
78
|
-
Ok(rbdf.ldf.clone())
|
78
|
+
Ok(rbdf.ldf.borrow().clone())
|
79
79
|
}
|
80
80
|
|
81
81
|
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
@@ -379,8 +379,8 @@ impl TryConvert for Wrap<DataType> {
|
|
379
379
|
"Polars::Struct" => {
|
380
380
|
let arr: RArray = ob.funcall("fields", ())?;
|
381
381
|
let mut fields = Vec::with_capacity(arr.len());
|
382
|
-
for v in arr.
|
383
|
-
fields.push(Wrap::<Field>::try_convert(v
|
382
|
+
for v in arr.into_iter() {
|
383
|
+
fields.push(Wrap::<Field>::try_convert(v)?.0);
|
384
384
|
}
|
385
385
|
DataType::Struct(fields)
|
386
386
|
}
|
@@ -427,11 +427,36 @@ impl TryConvert for Wrap<DataType> {
|
|
427
427
|
}
|
428
428
|
}
|
429
429
|
|
430
|
+
impl TryConvert for Wrap<StatisticsOptions> {
|
431
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
|
+
let mut statistics = StatisticsOptions::empty();
|
433
|
+
|
434
|
+
let dict = RHash::try_convert(ob)?;
|
435
|
+
dict.foreach(|key: Symbol, val: bool| {
|
436
|
+
match key.name()?.as_ref() {
|
437
|
+
"min" => statistics.min_value = val,
|
438
|
+
"max" => statistics.max_value = val,
|
439
|
+
"distinct_count" => statistics.distinct_count = val,
|
440
|
+
"null_count" => statistics.null_count = val,
|
441
|
+
_ => {
|
442
|
+
return Err(RbTypeError::new_err(format!(
|
443
|
+
"'{key}' is not a valid statistic option",
|
444
|
+
)))
|
445
|
+
}
|
446
|
+
}
|
447
|
+
Ok(ForEach::Continue)
|
448
|
+
})
|
449
|
+
.unwrap();
|
450
|
+
|
451
|
+
Ok(Wrap(statistics))
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
430
455
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
431
456
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
457
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
433
|
-
for item in RArray::try_convert(ob)?.
|
434
|
-
vals.push(Wrap::<AnyValue<'s>>::try_convert(item
|
458
|
+
for item in RArray::try_convert(ob)?.into_iter() {
|
459
|
+
vals.push(Wrap::<AnyValue<'s>>::try_convert(item)?);
|
435
460
|
}
|
436
461
|
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
437
462
|
Ok(Wrap(Row(vals)))
|
@@ -686,14 +711,13 @@ impl TryConvert for Wrap<JoinType> {
|
|
686
711
|
let parsed = match String::try_convert(ob)?.as_str() {
|
687
712
|
"inner" => JoinType::Inner,
|
688
713
|
"left" => JoinType::Left,
|
689
|
-
"
|
690
|
-
"outer_coalesce" => JoinType::Outer,
|
714
|
+
"full" => JoinType::Full,
|
691
715
|
"semi" => JoinType::Semi,
|
692
716
|
"anti" => JoinType::Anti,
|
693
717
|
"cross" => JoinType::Cross,
|
694
718
|
v => {
|
695
719
|
return Err(RbValueError::new_err(format!(
|
696
|
-
"how must be one of {{'inner', 'left', '
|
720
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {}",
|
697
721
|
v
|
698
722
|
)))
|
699
723
|
}
|
@@ -903,6 +927,23 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
903
927
|
}
|
904
928
|
}
|
905
929
|
|
930
|
+
impl TryConvert for Wrap<ClosedInterval> {
|
931
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
932
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
933
|
+
"both" => ClosedInterval::Both,
|
934
|
+
"left" => ClosedInterval::Left,
|
935
|
+
"right" => ClosedInterval::Right,
|
936
|
+
"none" => ClosedInterval::None,
|
937
|
+
v => {
|
938
|
+
return Err(RbValueError::new_err(format!(
|
939
|
+
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
940
|
+
)))
|
941
|
+
}
|
942
|
+
};
|
943
|
+
Ok(Wrap(parsed))
|
944
|
+
}
|
945
|
+
}
|
946
|
+
|
906
947
|
impl TryConvert for Wrap<WindowMapping> {
|
907
948
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
908
949
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -13,11 +13,11 @@ impl RbDataFrame {
|
|
13
13
|
schema: Option<Wrap<Schema>>,
|
14
14
|
) -> RbResult<Self> {
|
15
15
|
let mut data = Vec::with_capacity(rb_rows.len());
|
16
|
-
for v in rb_rows.
|
17
|
-
let rb_row = RArray::try_convert(v
|
16
|
+
for v in rb_rows.into_iter() {
|
17
|
+
let rb_row = RArray::try_convert(v)?;
|
18
18
|
let mut row = Vec::with_capacity(rb_row.len());
|
19
|
-
for val in rb_row.
|
20
|
-
row.push(Wrap::<AnyValue>::try_convert(val
|
19
|
+
for val in rb_row.into_iter() {
|
20
|
+
row.push(Wrap::<AnyValue>::try_convert(val)?.0);
|
21
21
|
}
|
22
22
|
data.push(Row(row));
|
23
23
|
}
|
@@ -130,8 +130,7 @@ where
|
|
130
130
|
fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
|
131
131
|
let (data, len) = get_rbseq(*data)?;
|
132
132
|
let mut rows = Vec::with_capacity(len);
|
133
|
-
for d in data.
|
134
|
-
let d = d?;
|
133
|
+
for d in data.into_iter() {
|
135
134
|
let d = RHash::try_convert(d)?;
|
136
135
|
|
137
136
|
let mut row = Vec::with_capacity(names.len());
|
@@ -170,8 +169,7 @@ fn infer_schema_names_from_data(
|
|
170
169
|
.unwrap_or(data_len);
|
171
170
|
|
172
171
|
let mut names = PlIndexSet::new();
|
173
|
-
for d in data.
|
174
|
-
let d = d?;
|
172
|
+
for d in data.into_iter().take(infer_schema_length) {
|
175
173
|
let d = RHash::try_convert(d)?;
|
176
174
|
d.foreach(|name: Value, _value: Value| {
|
177
175
|
if let Some(v) = Symbol::from_value(name) {
|
@@ -15,8 +15,8 @@ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
|
15
15
|
impl RbDataFrame {
|
16
16
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
17
17
|
let mut cols = Vec::new();
|
18
|
-
for i in columns.
|
19
|
-
cols.push(<&RbSeries>::try_convert(i
|
18
|
+
for i in columns.into_iter() {
|
19
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
20
20
|
}
|
21
21
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
22
22
|
Ok(RbDataFrame::new(df))
|
@@ -341,45 +341,42 @@ impl RbDataFrame {
|
|
341
341
|
RbDataFrame::new(self.df.borrow().clone())
|
342
342
|
}
|
343
343
|
|
344
|
-
pub fn
|
344
|
+
pub fn unpivot(
|
345
345
|
&self,
|
346
|
-
|
347
|
-
|
346
|
+
on: Vec<String>,
|
347
|
+
index: Vec<String>,
|
348
348
|
value_name: Option<String>,
|
349
349
|
variable_name: Option<String>,
|
350
350
|
) -> RbResult<Self> {
|
351
|
-
let args =
|
352
|
-
|
353
|
-
|
351
|
+
let args = UnpivotArgs {
|
352
|
+
on: strings_to_smartstrings(on),
|
353
|
+
index: strings_to_smartstrings(index),
|
354
354
|
value_name: value_name.map(|s| s.into()),
|
355
355
|
variable_name: variable_name.map(|s| s.into()),
|
356
356
|
streamable: false,
|
357
357
|
};
|
358
358
|
|
359
|
-
let df = self.df.borrow().
|
359
|
+
let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
|
360
360
|
Ok(RbDataFrame::new(df))
|
361
361
|
}
|
362
362
|
|
363
363
|
#[allow(clippy::too_many_arguments)]
|
364
364
|
pub fn pivot_expr(
|
365
365
|
&self,
|
366
|
-
|
367
|
-
|
366
|
+
on: Vec<String>,
|
367
|
+
index: Option<Vec<String>>,
|
368
368
|
values: Option<Vec<String>>,
|
369
369
|
maintain_order: bool,
|
370
370
|
sort_columns: bool,
|
371
371
|
aggregate_expr: Option<&RbExpr>,
|
372
372
|
separator: Option<String>,
|
373
373
|
) -> RbResult<Self> {
|
374
|
-
let fun =
|
375
|
-
true => pivot_stable,
|
376
|
-
false => pivot,
|
377
|
-
};
|
374
|
+
let fun = if maintain_order { pivot_stable } else { pivot };
|
378
375
|
let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
|
379
376
|
let df = fun(
|
380
377
|
&self.df.borrow(),
|
378
|
+
on,
|
381
379
|
index,
|
382
|
-
columns,
|
383
380
|
values,
|
384
381
|
sort_columns,
|
385
382
|
agg_expr,
|
@@ -569,23 +566,16 @@ impl RbDataFrame {
|
|
569
566
|
by: Vec<String>,
|
570
567
|
index_column: String,
|
571
568
|
every: String,
|
572
|
-
offset: String,
|
573
569
|
stable: bool,
|
574
570
|
) -> RbResult<Self> {
|
575
571
|
let out = if stable {
|
576
|
-
self.df
|
577
|
-
|
578
|
-
&index_column,
|
579
|
-
Duration::parse(&every),
|
580
|
-
Duration::parse(&offset),
|
581
|
-
)
|
572
|
+
self.df
|
573
|
+
.borrow()
|
574
|
+
.upsample_stable(by, &index_column, Duration::parse(&every))
|
582
575
|
} else {
|
583
|
-
self.df
|
584
|
-
|
585
|
-
&index_column,
|
586
|
-
Duration::parse(&every),
|
587
|
-
Duration::parse(&offset),
|
588
|
-
)
|
576
|
+
self.df
|
577
|
+
.borrow()
|
578
|
+
.upsample(by, &index_column, Duration::parse(&every))
|
589
579
|
};
|
590
580
|
let out = out.map_err(RbPolarsErr::from)?;
|
591
581
|
Ok(out.into())
|