polars-df 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +360 -361
- data/ext/polars/Cargo.toml +10 -7
- data/ext/polars/src/batched_csv.rs +1 -1
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +51 -10
- data/ext/polars/src/dataframe/construction.rs +6 -8
- data/ext/polars/src/dataframe/general.rs +19 -29
- data/ext/polars/src/dataframe/io.rs +43 -33
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -12
- data/ext/polars/src/expr/general.rs +123 -110
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +17 -9
- data/ext/polars/src/expr/string.rs +2 -6
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +21 -21
- data/ext/polars/src/functions/range.rs +6 -12
- data/ext/polars/src/interop/numo/to_numo_series.rs +2 -1
- data/ext/polars/src/lazyframe/mod.rs +81 -98
- data/ext/polars/src/lib.rs +55 -45
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +4 -2
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/mod.rs +31 -10
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +2 -2
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/data_frame.rb +93 -101
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -573
- data/lib/polars/date_time_name_space.rb +263 -464
- data/lib/polars/dynamic_group_by.rb +3 -3
- data/lib/polars/exceptions.rb +3 -0
- data/lib/polars/expr.rb +367 -330
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +18 -77
- data/lib/polars/functions/range/datetime_range.rb +4 -4
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +4 -4
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/io/csv.rb +8 -8
- data/lib/polars/io/ipc.rb +3 -3
- data/lib/polars/io/json.rb +13 -2
- data/lib/polars/io/ndjson.rb +15 -4
- data/lib/polars/io/parquet.rb +5 -4
- data/lib/polars/lazy_frame.rb +120 -106
- data/lib/polars/lazy_group_by.rb +1 -1
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +5 -7
- data/lib/polars/series.rb +105 -189
- data/lib/polars/string_expr.rb +42 -67
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +4 -330
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +11 -0
- metadata +9 -4
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.12.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,15 +14,15 @@ crate-type = ["cdylib"]
|
|
14
14
|
ahash = "0.8"
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
|
-
magnus = "0.
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
17
|
+
magnus = "0.7"
|
18
|
+
polars-core = "=0.41.3"
|
19
|
+
polars-parquet = "=0.41.3"
|
20
|
+
polars-utils = "=0.41.3"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.41.3"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -41,7 +41,6 @@ features = [
|
|
41
41
|
"cumulative_eval",
|
42
42
|
"cutqcut",
|
43
43
|
"dataframe_arithmetic",
|
44
|
-
"date_offset",
|
45
44
|
"diagonal_concat",
|
46
45
|
"diff",
|
47
46
|
"dot_product",
|
@@ -56,6 +55,7 @@ features = [
|
|
56
55
|
"interpolate",
|
57
56
|
"ipc",
|
58
57
|
"ipc_streaming",
|
58
|
+
"is_between",
|
59
59
|
"is_first_distinct",
|
60
60
|
"is_in",
|
61
61
|
"is_last_distinct",
|
@@ -74,6 +74,9 @@ features = [
|
|
74
74
|
"meta",
|
75
75
|
"mode",
|
76
76
|
"moment",
|
77
|
+
"month_start",
|
78
|
+
"month_end",
|
79
|
+
"offset_by",
|
77
80
|
"object",
|
78
81
|
"parquet",
|
79
82
|
"partition_by",
|
@@ -95,7 +95,7 @@ impl RbBatchedCsv {
|
|
95
95
|
.with_projection(projection.map(Arc::new))
|
96
96
|
.with_rechunk(rechunk)
|
97
97
|
.with_chunk_size(chunk_size)
|
98
|
-
.with_columns(columns.map(Arc::
|
98
|
+
.with_columns(columns.map(Arc::from))
|
99
99
|
.with_n_threads(n_threads)
|
100
100
|
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
101
|
.with_low_memory(low_memory)
|
@@ -0,0 +1,261 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value,
|
4
|
+
};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
7
|
+
|
8
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
9
|
+
|
10
|
+
use crate::error::RbOverflowError;
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
any_value_into_rb_object(self.0, ruby)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
rb_object_to_any_value(ob, true).map(Wrap)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
27
|
+
match av {
|
28
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
29
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
30
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
31
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
32
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
33
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
34
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
35
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
37
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
38
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
39
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
40
|
+
AnyValue::String(v) => ruby.into_value(v),
|
41
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
42
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
43
|
+
let s = if arr.is_null() {
|
44
|
+
rev.get(idx)
|
45
|
+
} else {
|
46
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
47
|
+
};
|
48
|
+
s.into_value()
|
49
|
+
}
|
50
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
51
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
|
+
let time_unit = time_unit.to_ascii();
|
53
|
+
utils()
|
54
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
55
|
+
.unwrap()
|
56
|
+
}
|
57
|
+
AnyValue::Duration(v, time_unit) => {
|
58
|
+
let time_unit = time_unit.to_ascii();
|
59
|
+
utils()
|
60
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
61
|
+
.unwrap()
|
62
|
+
}
|
63
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
64
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
65
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
67
|
+
AnyValue::Object(v) => {
|
68
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
69
|
+
object.to_object()
|
70
|
+
}
|
71
|
+
AnyValue::ObjectOwned(v) => {
|
72
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
73
|
+
object.to_object()
|
74
|
+
}
|
75
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
76
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
77
|
+
AnyValue::Decimal(v, scale) => utils()
|
78
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
79
|
+
.unwrap(),
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
|
84
|
+
// Conversion functions.
|
85
|
+
fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
86
|
+
Ok(AnyValue::Null)
|
87
|
+
}
|
88
|
+
|
89
|
+
fn get_bool(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
90
|
+
let b = bool::try_convert(ob)?;
|
91
|
+
Ok(AnyValue::Boolean(b))
|
92
|
+
}
|
93
|
+
|
94
|
+
fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
|
95
|
+
if let Ok(v) = i64::try_convert(ob) {
|
96
|
+
Ok(AnyValue::Int64(v))
|
97
|
+
} else if let Ok(v) = u64::try_convert(ob) {
|
98
|
+
Ok(AnyValue::UInt64(v))
|
99
|
+
} else if !strict {
|
100
|
+
let f = f64::try_convert(ob)?;
|
101
|
+
Ok(AnyValue::Float64(f))
|
102
|
+
} else {
|
103
|
+
Err(RbOverflowError::new_err(format!(
|
104
|
+
"int value too large for Polars integer types: {ob}"
|
105
|
+
)))
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
fn get_float(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
110
|
+
Ok(AnyValue::Float64(f64::try_convert(ob)?))
|
111
|
+
}
|
112
|
+
|
113
|
+
fn get_str(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
114
|
+
let v = RString::from_value(ob).unwrap();
|
115
|
+
if v.enc_get() == Index::utf8() {
|
116
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()))
|
117
|
+
} else {
|
118
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()))
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
|
+
let v = RArray::from_value(ob).unwrap();
|
124
|
+
if v.is_empty() {
|
125
|
+
Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
|
126
|
+
} else {
|
127
|
+
let list = v;
|
128
|
+
|
129
|
+
let mut avs = Vec::with_capacity(25);
|
130
|
+
let mut iter = list.into_iter();
|
131
|
+
|
132
|
+
for item in (&mut iter).take(25) {
|
133
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
134
|
+
}
|
135
|
+
|
136
|
+
let (dtype, _n_types) =
|
137
|
+
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
138
|
+
|
139
|
+
// push the rest
|
140
|
+
avs.reserve(list.len());
|
141
|
+
for item in iter {
|
142
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
|
+
}
|
144
|
+
|
145
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
146
|
+
.map_err(RbPolarsErr::from)?;
|
147
|
+
Ok(AnyValue::List(s))
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
fn get_list_from_series(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
152
|
+
let s = super::get_series(ob)?;
|
153
|
+
Ok(AnyValue::List(s))
|
154
|
+
}
|
155
|
+
|
156
|
+
fn get_struct(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
157
|
+
let dict = RHash::from_value(ob).unwrap();
|
158
|
+
let len = dict.len();
|
159
|
+
let mut keys = Vec::with_capacity(len);
|
160
|
+
let mut vals = Vec::with_capacity(len);
|
161
|
+
dict.foreach(|k: Value, v: Value| {
|
162
|
+
let key = String::try_convert(k)?;
|
163
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
|
+
let dtype = DataType::from(&val);
|
165
|
+
keys.push(Field::new(&key, dtype));
|
166
|
+
vals.push(val);
|
167
|
+
Ok(ForEach::Continue)
|
168
|
+
})?;
|
169
|
+
Ok(AnyValue::StructOwned(Box::new((vals, keys))))
|
170
|
+
}
|
171
|
+
|
172
|
+
fn get_date(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
173
|
+
// convert to DateTime for UTC
|
174
|
+
let v = ob
|
175
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
176
|
+
.funcall::<_, _, Value>("to_time", ())?
|
177
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
178
|
+
Ok(AnyValue::Date((v / 86400) as i32))
|
179
|
+
}
|
180
|
+
|
181
|
+
fn get_time(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
182
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
183
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
184
|
+
let v = sec * 1_000_000_000 + nsec;
|
185
|
+
// TODO support time zone when possible
|
186
|
+
// https://github.com/pola-rs/polars/issues/9103
|
187
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
|
188
|
+
}
|
189
|
+
|
190
|
+
fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
191
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
192
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
193
|
+
Ok(AnyValue::Datetime(
|
194
|
+
sec * 1_000_000_000 + nsec,
|
195
|
+
TimeUnit::Nanoseconds,
|
196
|
+
&None,
|
197
|
+
))
|
198
|
+
}
|
199
|
+
|
200
|
+
fn get_decimal(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
201
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
202
|
+
let exp = exp - (digits.len() as i32);
|
203
|
+
match digits.parse::<i128>() {
|
204
|
+
Ok(mut v) => {
|
205
|
+
let scale = if exp > 0 {
|
206
|
+
v = 10_i128
|
207
|
+
.checked_pow(exp as u32)
|
208
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
209
|
+
0
|
210
|
+
} else {
|
211
|
+
(-exp) as usize
|
212
|
+
};
|
213
|
+
Some((v, scale))
|
214
|
+
}
|
215
|
+
Err(_) => None,
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
220
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
221
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
222
|
+
})?;
|
223
|
+
if sign < 0 {
|
224
|
+
// TODO better error
|
225
|
+
v = v.checked_neg().unwrap();
|
226
|
+
}
|
227
|
+
Ok(AnyValue::Decimal(v, scale))
|
228
|
+
}
|
229
|
+
|
230
|
+
if ob.is_nil() {
|
231
|
+
get_null(ob, strict)
|
232
|
+
} else if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
233
|
+
get_bool(ob, strict)
|
234
|
+
} else if ob.is_kind_of(class::integer()) {
|
235
|
+
get_int(ob, strict)
|
236
|
+
} else if ob.is_kind_of(class::float()) {
|
237
|
+
get_float(ob, strict)
|
238
|
+
} else if ob.is_kind_of(class::string()) {
|
239
|
+
get_str(ob, strict)
|
240
|
+
} else if ob.is_kind_of(class::array()) {
|
241
|
+
get_list(ob, strict)
|
242
|
+
} else if ob.is_kind_of(class::hash()) {
|
243
|
+
get_struct(ob, strict)
|
244
|
+
} else if ob.respond_to("_s", true)? {
|
245
|
+
get_list_from_series(ob, strict)
|
246
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
247
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
248
|
+
get_time(ob, strict)
|
249
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
250
|
+
get_datetime(ob, strict)
|
251
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
252
|
+
get_date(ob, strict)
|
253
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
254
|
+
get_decimal(ob, strict)
|
255
|
+
} else {
|
256
|
+
Err(RbPolarsErr::other(format!(
|
257
|
+
"object type not supported {:?}",
|
258
|
+
ob
|
259
|
+
)))
|
260
|
+
}
|
261
|
+
}
|
@@ -11,8 +11,8 @@ impl TryConvert for Wrap<StringChunked> {
|
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
12
|
let mut builder = StringChunkedBuilder::new("", len);
|
13
13
|
|
14
|
-
for res in seq.
|
15
|
-
let item = res
|
14
|
+
for res in seq.into_iter() {
|
15
|
+
let item = res;
|
16
16
|
match String::try_convert(item) {
|
17
17
|
Ok(val) => builder.append_value(&val),
|
18
18
|
Err(_) => builder.append_null(),
|
@@ -27,8 +27,8 @@ impl TryConvert for Wrap<BinaryChunked> {
|
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
28
|
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
29
|
|
30
|
-
for res in seq.
|
31
|
-
let item = res
|
30
|
+
for res in seq.into_iter() {
|
31
|
+
let item = res;
|
32
32
|
match RString::try_convert(item) {
|
33
33
|
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
34
|
Err(_) => builder.append_null(),
|
@@ -1,4 +1,4 @@
|
|
1
|
-
pub(crate) mod
|
1
|
+
pub(crate) mod any_value;
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
@@ -7,7 +7,7 @@ use std::num::NonZeroUsize;
|
|
7
7
|
|
8
8
|
use magnus::{
|
9
9
|
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
-
Ruby, TryConvert, Value,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
11
11
|
};
|
12
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -75,7 +75,7 @@ pub(crate) fn get_df(obj: Value) -> RbResult<DataFrame> {
|
|
75
75
|
|
76
76
|
pub(crate) fn get_lf(obj: Value) -> RbResult<LazyFrame> {
|
77
77
|
let rbdf = obj.funcall::<_, _, &RbLazyFrame>("_ldf", ())?;
|
78
|
-
Ok(rbdf.ldf.clone())
|
78
|
+
Ok(rbdf.ldf.borrow().clone())
|
79
79
|
}
|
80
80
|
|
81
81
|
pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
@@ -379,8 +379,8 @@ impl TryConvert for Wrap<DataType> {
|
|
379
379
|
"Polars::Struct" => {
|
380
380
|
let arr: RArray = ob.funcall("fields", ())?;
|
381
381
|
let mut fields = Vec::with_capacity(arr.len());
|
382
|
-
for v in arr.
|
383
|
-
fields.push(Wrap::<Field>::try_convert(v
|
382
|
+
for v in arr.into_iter() {
|
383
|
+
fields.push(Wrap::<Field>::try_convert(v)?.0);
|
384
384
|
}
|
385
385
|
DataType::Struct(fields)
|
386
386
|
}
|
@@ -427,11 +427,36 @@ impl TryConvert for Wrap<DataType> {
|
|
427
427
|
}
|
428
428
|
}
|
429
429
|
|
430
|
+
impl TryConvert for Wrap<StatisticsOptions> {
|
431
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
|
+
let mut statistics = StatisticsOptions::empty();
|
433
|
+
|
434
|
+
let dict = RHash::try_convert(ob)?;
|
435
|
+
dict.foreach(|key: Symbol, val: bool| {
|
436
|
+
match key.name()?.as_ref() {
|
437
|
+
"min" => statistics.min_value = val,
|
438
|
+
"max" => statistics.max_value = val,
|
439
|
+
"distinct_count" => statistics.distinct_count = val,
|
440
|
+
"null_count" => statistics.null_count = val,
|
441
|
+
_ => {
|
442
|
+
return Err(RbTypeError::new_err(format!(
|
443
|
+
"'{key}' is not a valid statistic option",
|
444
|
+
)))
|
445
|
+
}
|
446
|
+
}
|
447
|
+
Ok(ForEach::Continue)
|
448
|
+
})
|
449
|
+
.unwrap();
|
450
|
+
|
451
|
+
Ok(Wrap(statistics))
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
430
455
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
431
456
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
432
457
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
433
|
-
for item in RArray::try_convert(ob)?.
|
434
|
-
vals.push(Wrap::<AnyValue<'s>>::try_convert(item
|
458
|
+
for item in RArray::try_convert(ob)?.into_iter() {
|
459
|
+
vals.push(Wrap::<AnyValue<'s>>::try_convert(item)?);
|
435
460
|
}
|
436
461
|
let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
|
437
462
|
Ok(Wrap(Row(vals)))
|
@@ -686,14 +711,13 @@ impl TryConvert for Wrap<JoinType> {
|
|
686
711
|
let parsed = match String::try_convert(ob)?.as_str() {
|
687
712
|
"inner" => JoinType::Inner,
|
688
713
|
"left" => JoinType::Left,
|
689
|
-
"
|
690
|
-
"outer_coalesce" => JoinType::Outer,
|
714
|
+
"full" => JoinType::Full,
|
691
715
|
"semi" => JoinType::Semi,
|
692
716
|
"anti" => JoinType::Anti,
|
693
717
|
"cross" => JoinType::Cross,
|
694
718
|
v => {
|
695
719
|
return Err(RbValueError::new_err(format!(
|
696
|
-
"how must be one of {{'inner', 'left', '
|
720
|
+
"how must be one of {{'inner', 'left', 'full', 'semi', 'anti', 'cross'}}, got {}",
|
697
721
|
v
|
698
722
|
)))
|
699
723
|
}
|
@@ -903,6 +927,23 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
903
927
|
}
|
904
928
|
}
|
905
929
|
|
930
|
+
impl TryConvert for Wrap<ClosedInterval> {
|
931
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
932
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
933
|
+
"both" => ClosedInterval::Both,
|
934
|
+
"left" => ClosedInterval::Left,
|
935
|
+
"right" => ClosedInterval::Right,
|
936
|
+
"none" => ClosedInterval::None,
|
937
|
+
v => {
|
938
|
+
return Err(RbValueError::new_err(format!(
|
939
|
+
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
|
940
|
+
)))
|
941
|
+
}
|
942
|
+
};
|
943
|
+
Ok(Wrap(parsed))
|
944
|
+
}
|
945
|
+
}
|
946
|
+
|
906
947
|
impl TryConvert for Wrap<WindowMapping> {
|
907
948
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
908
949
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -13,11 +13,11 @@ impl RbDataFrame {
|
|
13
13
|
schema: Option<Wrap<Schema>>,
|
14
14
|
) -> RbResult<Self> {
|
15
15
|
let mut data = Vec::with_capacity(rb_rows.len());
|
16
|
-
for v in rb_rows.
|
17
|
-
let rb_row = RArray::try_convert(v
|
16
|
+
for v in rb_rows.into_iter() {
|
17
|
+
let rb_row = RArray::try_convert(v)?;
|
18
18
|
let mut row = Vec::with_capacity(rb_row.len());
|
19
|
-
for val in rb_row.
|
20
|
-
row.push(Wrap::<AnyValue>::try_convert(val
|
19
|
+
for val in rb_row.into_iter() {
|
20
|
+
row.push(Wrap::<AnyValue>::try_convert(val)?.0);
|
21
21
|
}
|
22
22
|
data.push(Row(row));
|
23
23
|
}
|
@@ -130,8 +130,7 @@ where
|
|
130
130
|
fn dicts_to_rows<'a>(data: &Value, names: &'a [String], _strict: bool) -> RbResult<Vec<Row<'a>>> {
|
131
131
|
let (data, len) = get_rbseq(*data)?;
|
132
132
|
let mut rows = Vec::with_capacity(len);
|
133
|
-
for d in data.
|
134
|
-
let d = d?;
|
133
|
+
for d in data.into_iter() {
|
135
134
|
let d = RHash::try_convert(d)?;
|
136
135
|
|
137
136
|
let mut row = Vec::with_capacity(names.len());
|
@@ -170,8 +169,7 @@ fn infer_schema_names_from_data(
|
|
170
169
|
.unwrap_or(data_len);
|
171
170
|
|
172
171
|
let mut names = PlIndexSet::new();
|
173
|
-
for d in data.
|
174
|
-
let d = d?;
|
172
|
+
for d in data.into_iter().take(infer_schema_length) {
|
175
173
|
let d = RHash::try_convert(d)?;
|
176
174
|
d.foreach(|name: Value, _value: Value| {
|
177
175
|
if let Some(v) = Symbol::from_value(name) {
|
@@ -15,8 +15,8 @@ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
|
15
15
|
impl RbDataFrame {
|
16
16
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
17
17
|
let mut cols = Vec::new();
|
18
|
-
for i in columns.
|
19
|
-
cols.push(<&RbSeries>::try_convert(i
|
18
|
+
for i in columns.into_iter() {
|
19
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
20
20
|
}
|
21
21
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
22
22
|
Ok(RbDataFrame::new(df))
|
@@ -341,45 +341,42 @@ impl RbDataFrame {
|
|
341
341
|
RbDataFrame::new(self.df.borrow().clone())
|
342
342
|
}
|
343
343
|
|
344
|
-
pub fn
|
344
|
+
pub fn unpivot(
|
345
345
|
&self,
|
346
|
-
|
347
|
-
|
346
|
+
on: Vec<String>,
|
347
|
+
index: Vec<String>,
|
348
348
|
value_name: Option<String>,
|
349
349
|
variable_name: Option<String>,
|
350
350
|
) -> RbResult<Self> {
|
351
|
-
let args =
|
352
|
-
|
353
|
-
|
351
|
+
let args = UnpivotArgs {
|
352
|
+
on: strings_to_smartstrings(on),
|
353
|
+
index: strings_to_smartstrings(index),
|
354
354
|
value_name: value_name.map(|s| s.into()),
|
355
355
|
variable_name: variable_name.map(|s| s.into()),
|
356
356
|
streamable: false,
|
357
357
|
};
|
358
358
|
|
359
|
-
let df = self.df.borrow().
|
359
|
+
let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
|
360
360
|
Ok(RbDataFrame::new(df))
|
361
361
|
}
|
362
362
|
|
363
363
|
#[allow(clippy::too_many_arguments)]
|
364
364
|
pub fn pivot_expr(
|
365
365
|
&self,
|
366
|
-
|
367
|
-
|
366
|
+
on: Vec<String>,
|
367
|
+
index: Option<Vec<String>>,
|
368
368
|
values: Option<Vec<String>>,
|
369
369
|
maintain_order: bool,
|
370
370
|
sort_columns: bool,
|
371
371
|
aggregate_expr: Option<&RbExpr>,
|
372
372
|
separator: Option<String>,
|
373
373
|
) -> RbResult<Self> {
|
374
|
-
let fun =
|
375
|
-
true => pivot_stable,
|
376
|
-
false => pivot,
|
377
|
-
};
|
374
|
+
let fun = if maintain_order { pivot_stable } else { pivot };
|
378
375
|
let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
|
379
376
|
let df = fun(
|
380
377
|
&self.df.borrow(),
|
378
|
+
on,
|
381
379
|
index,
|
382
|
-
columns,
|
383
380
|
values,
|
384
381
|
sort_columns,
|
385
382
|
agg_expr,
|
@@ -569,23 +566,16 @@ impl RbDataFrame {
|
|
569
566
|
by: Vec<String>,
|
570
567
|
index_column: String,
|
571
568
|
every: String,
|
572
|
-
offset: String,
|
573
569
|
stable: bool,
|
574
570
|
) -> RbResult<Self> {
|
575
571
|
let out = if stable {
|
576
|
-
self.df
|
577
|
-
|
578
|
-
&index_column,
|
579
|
-
Duration::parse(&every),
|
580
|
-
Duration::parse(&offset),
|
581
|
-
)
|
572
|
+
self.df
|
573
|
+
.borrow()
|
574
|
+
.upsample_stable(by, &index_column, Duration::parse(&every))
|
582
575
|
} else {
|
583
|
-
self.df
|
584
|
-
|
585
|
-
&index_column,
|
586
|
-
Duration::parse(&every),
|
587
|
-
Duration::parse(&offset),
|
588
|
-
)
|
576
|
+
self.df
|
577
|
+
.borrow()
|
578
|
+
.upsample(by, &index_column, Duration::parse(&every))
|
589
579
|
};
|
590
580
|
let out = out.map_err(RbPolarsErr::from)?;
|
591
581
|
Ok(out.into())
|