polars-df 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/ext/polars/Cargo.toml +18 -8
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion/anyvalue.rs +186 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
- data/ext/polars/src/dataframe.rs +108 -66
- data/ext/polars/src/expr/array.rs +78 -0
- data/ext/polars/src/expr/datetime.rs +29 -58
- data/ext/polars/src/expr/general.rs +83 -36
- data/ext/polars/src/expr/list.rs +58 -6
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +62 -11
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +120 -50
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
- data/ext/polars/src/lib.rs +246 -179
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +35 -4
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +37 -8
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,28 @@
|
|
1
|
+
pub(crate) mod anyvalue;
|
2
|
+
mod chunked_array;
|
3
|
+
|
1
4
|
use std::fmt::{Debug, Display, Formatter};
|
2
5
|
use std::hash::{Hash, Hasher};
|
6
|
+
use std::num::NonZeroUsize;
|
3
7
|
|
4
|
-
use magnus::encoding::{EncodingCapable, Index};
|
5
8
|
use magnus::{
|
6
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
7
|
-
|
9
|
+
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
8
11
|
};
|
9
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
14
|
use polars::datatypes::AnyValue;
|
12
|
-
use polars::frame::row::
|
15
|
+
use polars::frame::row::Row;
|
13
16
|
use polars::frame::NullStrategy;
|
14
17
|
use polars::io::avro::AvroCompression;
|
15
18
|
use polars::prelude::*;
|
16
19
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
20
|
+
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_utils::total_ord::{TotalEq, TotalHash};
|
18
22
|
use smartstring::alias::String as SmartString;
|
19
23
|
|
20
24
|
use crate::object::OBJECT_NAME;
|
21
|
-
use crate::rb_modules::
|
25
|
+
use crate::rb_modules::series;
|
22
26
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
23
27
|
|
24
28
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
82
|
Ok(rbs.series.borrow().clone())
|
79
83
|
}
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
for res in seq.each() {
|
87
|
-
let item = res?;
|
88
|
-
match String::try_convert(item) {
|
89
|
-
Ok(val) => builder.append_value(&val),
|
90
|
-
Err(_) => builder.append_null(),
|
91
|
-
}
|
92
|
-
}
|
93
|
-
Ok(Wrap(builder.finish()))
|
94
|
-
}
|
95
|
-
}
|
96
|
-
|
97
|
-
impl TryConvert for Wrap<BinaryChunked> {
|
98
|
-
fn try_convert(obj: Value) -> RbResult<Self> {
|
99
|
-
let (seq, len) = get_rbseq(obj)?;
|
100
|
-
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
101
|
-
|
102
|
-
for res in seq.each() {
|
103
|
-
let item = res?;
|
104
|
-
match RString::try_convert(item) {
|
105
|
-
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
106
|
-
Err(_) => builder.append_null(),
|
107
|
-
}
|
108
|
-
}
|
109
|
-
Ok(Wrap(builder.finish()))
|
110
|
-
}
|
85
|
+
pub(crate) fn to_series(s: RbSeries) -> Value {
|
86
|
+
let series = series();
|
87
|
+
series
|
88
|
+
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
89
|
+
.unwrap()
|
111
90
|
}
|
112
91
|
|
113
92
|
impl TryConvert for Wrap<NullValues> {
|
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
|
|
134
113
|
dict.into_value()
|
135
114
|
}
|
136
115
|
|
137
|
-
impl IntoValue for Wrap<
|
138
|
-
fn into_value_with(self,
|
116
|
+
impl IntoValue for Wrap<DataType> {
|
117
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
118
|
+
let pl = crate::rb_modules::polars();
|
119
|
+
|
139
120
|
match self.0 {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
144
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
145
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
146
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
147
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
148
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
149
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
150
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
151
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::String(v) => ruby.into_value(v),
|
153
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
|
-
AnyValue::Categorical(idx, rev, arr) => {
|
155
|
-
let s = if arr.is_null() {
|
156
|
-
rev.get(idx)
|
157
|
-
} else {
|
158
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
159
|
-
};
|
160
|
-
s.into_value()
|
121
|
+
DataType::Int8 => {
|
122
|
+
let class = pl.const_get::<_, Value>("Int8").unwrap();
|
123
|
+
class.funcall("new", ()).unwrap()
|
161
124
|
}
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
utils()
|
166
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
167
|
-
.unwrap()
|
125
|
+
DataType::Int16 => {
|
126
|
+
let class = pl.const_get::<_, Value>("Int16").unwrap();
|
127
|
+
class.funcall("new", ()).unwrap()
|
168
128
|
}
|
169
|
-
|
170
|
-
let
|
171
|
-
|
172
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
173
|
-
.unwrap()
|
129
|
+
DataType::Int32 => {
|
130
|
+
let class = pl.const_get::<_, Value>("Int32").unwrap();
|
131
|
+
class.funcall("new", ()).unwrap()
|
174
132
|
}
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
179
|
-
AnyValue::Object(v) => {
|
180
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
181
|
-
object.to_object()
|
133
|
+
DataType::Int64 => {
|
134
|
+
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
135
|
+
class.funcall("new", ()).unwrap()
|
182
136
|
}
|
183
|
-
|
184
|
-
let
|
185
|
-
|
137
|
+
DataType::UInt8 => {
|
138
|
+
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
139
|
+
class.funcall("new", ()).unwrap()
|
140
|
+
}
|
141
|
+
DataType::UInt16 => {
|
142
|
+
let class = pl.const_get::<_, Value>("UInt16").unwrap();
|
143
|
+
class.funcall("new", ()).unwrap()
|
144
|
+
}
|
145
|
+
DataType::UInt32 => {
|
146
|
+
let class = pl.const_get::<_, Value>("UInt32").unwrap();
|
147
|
+
class.funcall("new", ()).unwrap()
|
148
|
+
}
|
149
|
+
DataType::UInt64 => {
|
150
|
+
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
153
|
+
DataType::Float32 => {
|
154
|
+
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
|
+
class.funcall("new", ()).unwrap()
|
156
|
+
}
|
157
|
+
DataType::Float64 => {
|
158
|
+
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
|
+
class.funcall("new", ()).unwrap()
|
186
160
|
}
|
187
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
188
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
189
|
-
AnyValue::Decimal(v, scale) => utils()
|
190
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
191
|
-
.unwrap(),
|
192
|
-
}
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
impl IntoValue for Wrap<DataType> {
|
197
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
198
|
-
let pl = crate::rb_modules::polars();
|
199
|
-
|
200
|
-
match self.0 {
|
201
|
-
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
202
|
-
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
203
|
-
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
204
|
-
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
205
|
-
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
206
|
-
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
207
|
-
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
208
|
-
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
209
|
-
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
210
|
-
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
211
161
|
DataType::Decimal(precision, scale) => {
|
212
|
-
let
|
213
|
-
|
162
|
+
let class = pl.const_get::<_, Value>("Decimal").unwrap();
|
163
|
+
class
|
214
164
|
.funcall::<_, _, Value>("new", (precision, scale))
|
215
165
|
.unwrap()
|
216
166
|
}
|
217
|
-
DataType::Boolean =>
|
218
|
-
|
219
|
-
|
167
|
+
DataType::Boolean => {
|
168
|
+
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
|
+
class.funcall("new", ()).unwrap()
|
170
|
+
}
|
171
|
+
DataType::String => {
|
172
|
+
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
|
+
class.funcall("new", ()).unwrap()
|
174
|
+
}
|
175
|
+
DataType::Binary => {
|
176
|
+
let class = pl.const_get::<_, Value>("Binary").unwrap();
|
177
|
+
class.funcall("new", ()).unwrap()
|
178
|
+
}
|
220
179
|
DataType::Array(inner, size) => {
|
180
|
+
let class = pl.const_get::<_, Value>("Array").unwrap();
|
221
181
|
let inner = Wrap(*inner);
|
222
|
-
let
|
223
|
-
|
224
|
-
.funcall::<_, _, Value>("new", (size, inner))
|
225
|
-
.unwrap()
|
182
|
+
let args = (inner, size);
|
183
|
+
class.funcall::<_, _, Value>("new", args).unwrap()
|
226
184
|
}
|
227
185
|
DataType::List(inner) => {
|
186
|
+
let class = pl.const_get::<_, Value>("List").unwrap();
|
228
187
|
let inner = Wrap(*inner);
|
229
|
-
|
230
|
-
|
188
|
+
class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
189
|
+
}
|
190
|
+
DataType::Date => {
|
191
|
+
let class = pl.const_get::<_, Value>("Date").unwrap();
|
192
|
+
class.funcall("new", ()).unwrap()
|
231
193
|
}
|
232
|
-
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
233
194
|
DataType::Datetime(tu, tz) => {
|
234
195
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
235
196
|
datetime_class
|
@@ -242,9 +203,26 @@ impl IntoValue for Wrap<DataType> {
|
|
242
203
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
204
|
.unwrap()
|
244
205
|
}
|
245
|
-
DataType::Object(_, _) =>
|
246
|
-
|
247
|
-
|
206
|
+
DataType::Object(_, _) => {
|
207
|
+
let class = pl.const_get::<_, Value>("Object").unwrap();
|
208
|
+
class.funcall("new", ()).unwrap()
|
209
|
+
}
|
210
|
+
DataType::Categorical(_, ordering) => {
|
211
|
+
let class = pl.const_get::<_, Value>("Categorical").unwrap();
|
212
|
+
class.funcall("new", (Wrap(ordering),)).unwrap()
|
213
|
+
}
|
214
|
+
DataType::Enum(rev_map, _) => {
|
215
|
+
// we should always have an initialized rev_map coming from rust
|
216
|
+
let categories = rev_map.as_ref().unwrap().get_categories();
|
217
|
+
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
|
+
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
|
+
let series = to_series(s.into());
|
220
|
+
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
221
|
+
}
|
222
|
+
DataType::Time => {
|
223
|
+
let class = pl.const_get::<_, Value>("Time").unwrap();
|
224
|
+
class.funcall("new", ()).unwrap()
|
225
|
+
}
|
248
226
|
DataType::Struct(fields) => {
|
249
227
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
250
228
|
let iter = fields.iter().map(|fld| {
|
@@ -260,12 +238,31 @@ impl IntoValue for Wrap<DataType> {
|
|
260
238
|
.funcall::<_, _, Value>("new", (fields,))
|
261
239
|
.unwrap()
|
262
240
|
}
|
263
|
-
DataType::Null =>
|
264
|
-
|
241
|
+
DataType::Null => {
|
242
|
+
let class = pl.const_get::<_, Value>("Null").unwrap();
|
243
|
+
class.funcall("new", ()).unwrap()
|
244
|
+
}
|
245
|
+
DataType::Unknown => {
|
246
|
+
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
247
|
+
class.funcall("new", ()).unwrap()
|
248
|
+
}
|
249
|
+
DataType::BinaryOffset => {
|
250
|
+
unimplemented!()
|
251
|
+
}
|
265
252
|
}
|
266
253
|
}
|
267
254
|
}
|
268
255
|
|
256
|
+
impl IntoValue for Wrap<CategoricalOrdering> {
|
257
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
258
|
+
let ordering = match self.0 {
|
259
|
+
CategoricalOrdering::Physical => "physical",
|
260
|
+
CategoricalOrdering::Lexical => "lexical",
|
261
|
+
};
|
262
|
+
ordering.into_value()
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
269
266
|
impl IntoValue for Wrap<TimeUnit> {
|
270
267
|
fn into_value_with(self, _: &Ruby) -> Value {
|
271
268
|
let tu = match self.0 {
|
@@ -277,114 +274,6 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
274
|
}
|
278
275
|
}
|
279
276
|
|
280
|
-
impl IntoValue for Wrap<&StringChunked> {
|
281
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
let iter = self.0.into_iter();
|
283
|
-
RArray::from_iter(iter).into_value()
|
284
|
-
}
|
285
|
-
}
|
286
|
-
|
287
|
-
impl IntoValue for Wrap<&BinaryChunked> {
|
288
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
289
|
-
let iter = self
|
290
|
-
.0
|
291
|
-
.into_iter()
|
292
|
-
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
293
|
-
RArray::from_iter(iter).into_value()
|
294
|
-
}
|
295
|
-
}
|
296
|
-
|
297
|
-
impl IntoValue for Wrap<&StructChunked> {
|
298
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
299
|
-
let s = self.0.clone().into_series();
|
300
|
-
// todo! iterate its chunks and flatten.
|
301
|
-
// make series::iter() accept a chunk index.
|
302
|
-
let s = s.rechunk();
|
303
|
-
let iter = s.iter().map(|av| {
|
304
|
-
if let AnyValue::Struct(_, _, flds) = av {
|
305
|
-
struct_dict(av._iter_struct_av(), flds)
|
306
|
-
} else {
|
307
|
-
unreachable!()
|
308
|
-
}
|
309
|
-
});
|
310
|
-
|
311
|
-
RArray::from_iter(iter).into_value()
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
impl IntoValue for Wrap<&DurationChunked> {
|
316
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
317
|
-
let utils = utils();
|
318
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
319
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
320
|
-
opt_v.map(|v| {
|
321
|
-
utils
|
322
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
323
|
-
.unwrap()
|
324
|
-
})
|
325
|
-
});
|
326
|
-
RArray::from_iter(iter).into_value()
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
impl IntoValue for Wrap<&DatetimeChunked> {
|
331
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
332
|
-
let utils = utils();
|
333
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
334
|
-
let time_zone = self.0.time_zone().clone().into_value();
|
335
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
336
|
-
opt_v.map(|v| {
|
337
|
-
utils
|
338
|
-
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
339
|
-
.unwrap()
|
340
|
-
})
|
341
|
-
});
|
342
|
-
RArray::from_iter(iter).into_value()
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
impl IntoValue for Wrap<&TimeChunked> {
|
347
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
348
|
-
let utils = utils();
|
349
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
350
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
351
|
-
});
|
352
|
-
RArray::from_iter(iter).into_value()
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
impl IntoValue for Wrap<&DateChunked> {
|
357
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
358
|
-
let utils = utils();
|
359
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
360
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
361
|
-
});
|
362
|
-
RArray::from_iter(iter).into_value()
|
363
|
-
}
|
364
|
-
}
|
365
|
-
|
366
|
-
impl IntoValue for Wrap<&DecimalChunked> {
|
367
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
368
|
-
let utils = utils();
|
369
|
-
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
370
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
371
|
-
opt_v.map(|v| {
|
372
|
-
utils
|
373
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
374
|
-
.unwrap()
|
375
|
-
})
|
376
|
-
});
|
377
|
-
RArray::from_iter(iter).into_value()
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
382
|
-
match digits.parse::<i128>() {
|
383
|
-
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
384
|
-
Err(_) => None,
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
277
|
impl TryConvert for Wrap<Field> {
|
389
278
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
390
279
|
let name: String = ob.funcall("name", ())?;
|
@@ -410,6 +299,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
299
|
"Polars::Binary" => DataType::Binary,
|
411
300
|
"Polars::Boolean" => DataType::Boolean,
|
412
301
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
302
|
+
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
413
303
|
"Polars::Date" => DataType::Date,
|
414
304
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
305
|
"Polars::Time" => DataType::Time,
|
@@ -431,6 +321,36 @@ impl TryConvert for Wrap<DataType> {
|
|
431
321
|
} else if String::try_convert(ob).is_err() {
|
432
322
|
let name = unsafe { ob.class().name() }.into_owned();
|
433
323
|
match name.as_str() {
|
324
|
+
"Polars::Int8" => DataType::Int8,
|
325
|
+
"Polars::Int16" => DataType::Int16,
|
326
|
+
"Polars::Int32" => DataType::Int32,
|
327
|
+
"Polars::Int64" => DataType::Int64,
|
328
|
+
"Polars::UInt8" => DataType::UInt8,
|
329
|
+
"Polars::UInt16" => DataType::UInt16,
|
330
|
+
"Polars::UInt32" => DataType::UInt32,
|
331
|
+
"Polars::UInt64" => DataType::UInt64,
|
332
|
+
"Polars::String" => DataType::String,
|
333
|
+
"Polars::Binary" => DataType::Binary,
|
334
|
+
"Polars::Boolean" => DataType::Boolean,
|
335
|
+
"Polars::Categorical" => {
|
336
|
+
let ordering = ob
|
337
|
+
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
338
|
+
.0;
|
339
|
+
DataType::Categorical(None, ordering)
|
340
|
+
}
|
341
|
+
"Polars::Enum" => {
|
342
|
+
let categories = ob.funcall("categories", ()).unwrap();
|
343
|
+
let s = get_series(categories)?;
|
344
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
345
|
+
let categories = ca.downcast_iter().next().unwrap().clone();
|
346
|
+
create_enum_data_type(categories)
|
347
|
+
}
|
348
|
+
"Polars::Date" => DataType::Date,
|
349
|
+
"Polars::Time" => DataType::Time,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Null" => DataType::Null,
|
353
|
+
"Polars::Unknown" => DataType::Unknown,
|
434
354
|
"Polars::Duration" => {
|
435
355
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
436
356
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -503,102 +423,6 @@ impl TryConvert for Wrap<DataType> {
|
|
503
423
|
}
|
504
424
|
}
|
505
425
|
|
506
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
507
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
508
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
509
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
510
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
511
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
512
|
-
} else if let Some(v) = Float::from_value(ob) {
|
513
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
|
-
} else if let Some(v) = RString::from_value(ob) {
|
515
|
-
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
|
-
} else {
|
518
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
-
}
|
520
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
521
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
522
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
523
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
524
|
-
let v = sec * 1_000_000_000 + nsec;
|
525
|
-
// TODO support time zone when possible
|
526
|
-
// https://github.com/pola-rs/polars/issues/9103
|
527
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
528
|
-
} else if ob.is_nil() {
|
529
|
-
Ok(AnyValue::Null.into())
|
530
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
531
|
-
let len = dict.len();
|
532
|
-
let mut keys = Vec::with_capacity(len);
|
533
|
-
let mut vals = Vec::with_capacity(len);
|
534
|
-
dict.foreach(|k: Value, v: Value| {
|
535
|
-
let key = String::try_convert(k)?;
|
536
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
537
|
-
let dtype = DataType::from(&val);
|
538
|
-
keys.push(Field::new(&key, dtype));
|
539
|
-
vals.push(val);
|
540
|
-
Ok(ForEach::Continue)
|
541
|
-
})?;
|
542
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
543
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
544
|
-
if v.is_empty() {
|
545
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
546
|
-
} else {
|
547
|
-
let list = v;
|
548
|
-
|
549
|
-
let mut avs = Vec::with_capacity(25);
|
550
|
-
let mut iter = list.each();
|
551
|
-
|
552
|
-
for item in (&mut iter).take(25) {
|
553
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
554
|
-
}
|
555
|
-
|
556
|
-
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
-
|
558
|
-
// push the rest
|
559
|
-
avs.reserve(list.len());
|
560
|
-
for item in iter {
|
561
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
562
|
-
}
|
563
|
-
|
564
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
565
|
-
.map_err(RbPolarsErr::from)?;
|
566
|
-
Ok(Wrap(AnyValue::List(s)))
|
567
|
-
}
|
568
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
-
Ok(Wrap(AnyValue::Datetime(
|
572
|
-
sec * 1_000_000_000 + nsec,
|
573
|
-
TimeUnit::Nanoseconds,
|
574
|
-
&None,
|
575
|
-
)))
|
576
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
577
|
-
// convert to DateTime for UTC
|
578
|
-
let v = ob
|
579
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
580
|
-
.funcall::<_, _, Value>("to_time", ())?
|
581
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
582
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
585
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
586
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
587
|
-
})?;
|
588
|
-
if sign < 0 {
|
589
|
-
// TODO better error
|
590
|
-
v = v.checked_neg().unwrap();
|
591
|
-
}
|
592
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
593
|
-
} else {
|
594
|
-
Err(RbPolarsErr::other(format!(
|
595
|
-
"object type not supported {:?}",
|
596
|
-
ob
|
597
|
-
)))
|
598
|
-
}
|
599
|
-
}
|
600
|
-
}
|
601
|
-
|
602
426
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
603
427
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
604
428
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
@@ -662,6 +486,15 @@ impl TotalEq for ObjectValue {
|
|
662
486
|
}
|
663
487
|
}
|
664
488
|
|
489
|
+
impl TotalHash for ObjectValue {
|
490
|
+
fn tot_hash<H>(&self, state: &mut H)
|
491
|
+
where
|
492
|
+
H: Hasher,
|
493
|
+
{
|
494
|
+
self.hash(state);
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
665
498
|
impl Display for ObjectValue {
|
666
499
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
667
500
|
write!(f, "{}", self.to_object())
|
@@ -715,24 +548,33 @@ impl Default for ObjectValue {
|
|
715
548
|
|
716
549
|
pub(crate) fn dicts_to_rows(
|
717
550
|
records: &Value,
|
718
|
-
infer_schema_len: usize
|
551
|
+
infer_schema_len: Option<usize>,
|
552
|
+
schema_columns: PlIndexSet<String>,
|
719
553
|
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
554
|
+
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
720
555
|
let (dicts, len) = get_rbseq(*records)?;
|
721
556
|
|
722
|
-
let
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
557
|
+
let key_names = {
|
558
|
+
if !schema_columns.is_empty() {
|
559
|
+
schema_columns
|
560
|
+
} else {
|
561
|
+
let mut inferred_keys = PlIndexSet::new();
|
562
|
+
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
563
|
+
let d = d?;
|
564
|
+
let d = RHash::try_convert(d)?;
|
565
|
+
|
566
|
+
d.foreach(|name: Value, _value: Value| {
|
567
|
+
if let Some(v) = Symbol::from_value(name) {
|
568
|
+
inferred_keys.insert(v.name()?.into());
|
569
|
+
} else {
|
570
|
+
inferred_keys.insert(String::try_convert(name)?);
|
571
|
+
};
|
572
|
+
Ok(ForEach::Continue)
|
573
|
+
})?;
|
574
|
+
}
|
575
|
+
inferred_keys
|
576
|
+
}
|
577
|
+
};
|
736
578
|
|
737
579
|
let mut rows = Vec::with_capacity(len);
|
738
580
|
|
@@ -895,8 +737,7 @@ impl TryConvert for Wrap<JoinType> {
|
|
895
737
|
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
896
738
|
"semi" => JoinType::Semi,
|
897
739
|
"anti" => JoinType::Anti,
|
898
|
-
|
899
|
-
// "cross" => JoinType::Cross,
|
740
|
+
"cross" => JoinType::Cross,
|
900
741
|
v => {
|
901
742
|
return Err(RbValueError::new_err(format!(
|
902
743
|
"how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
|
@@ -940,6 +781,21 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
|
940
781
|
}
|
941
782
|
}
|
942
783
|
|
784
|
+
impl TryConvert for Wrap<NonExistent> {
|
785
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
786
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
787
|
+
"null" => NonExistent::Null,
|
788
|
+
"raise" => NonExistent::Raise,
|
789
|
+
v => {
|
790
|
+
return Err(RbValueError::new_err(format!(
|
791
|
+
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
|
792
|
+
)))
|
793
|
+
}
|
794
|
+
};
|
795
|
+
Ok(Wrap(parsed))
|
796
|
+
}
|
797
|
+
}
|
798
|
+
|
943
799
|
impl TryConvert for Wrap<NullBehavior> {
|
944
800
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
945
801
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1062,6 +918,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
1062
918
|
}
|
1063
919
|
}
|
1064
920
|
|
921
|
+
impl TryConvert for Wrap<IpcCompression> {
|
922
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
923
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
924
|
+
"lz4" => IpcCompression::LZ4,
|
925
|
+
"zstd" => IpcCompression::ZSTD,
|
926
|
+
v => {
|
927
|
+
return Err(RbValueError::new_err(format!(
|
928
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {}",
|
929
|
+
v
|
930
|
+
)))
|
931
|
+
}
|
932
|
+
};
|
933
|
+
Ok(Wrap(parsed))
|
934
|
+
}
|
935
|
+
}
|
936
|
+
|
1065
937
|
impl TryConvert for Wrap<SearchSortedSide> {
|
1066
938
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
939
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1078,6 +950,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1078
950
|
}
|
1079
951
|
}
|
1080
952
|
|
953
|
+
impl TryConvert for Wrap<WindowMapping> {
|
954
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
955
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
956
|
+
"group_to_rows" => WindowMapping::GroupsToRows,
|
957
|
+
"join" => WindowMapping::Join,
|
958
|
+
"explode" => WindowMapping::Explode,
|
959
|
+
v => {
|
960
|
+
return Err(RbValueError::new_err(format!(
|
961
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
962
|
+
)))
|
963
|
+
}
|
964
|
+
};
|
965
|
+
Ok(Wrap(parsed))
|
966
|
+
}
|
967
|
+
}
|
968
|
+
|
969
|
+
impl TryConvert for Wrap<JoinValidation> {
|
970
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
971
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
972
|
+
"1:1" => JoinValidation::OneToOne,
|
973
|
+
"1:m" => JoinValidation::OneToMany,
|
974
|
+
"m:m" => JoinValidation::ManyToMany,
|
975
|
+
"m:1" => JoinValidation::ManyToOne,
|
976
|
+
v => {
|
977
|
+
return Err(RbValueError::new_err(format!(
|
978
|
+
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
979
|
+
)))
|
980
|
+
}
|
981
|
+
};
|
982
|
+
Ok(Wrap(parsed))
|
983
|
+
}
|
984
|
+
}
|
985
|
+
|
986
|
+
impl TryConvert for Wrap<QuoteStyle> {
|
987
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
988
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
989
|
+
"always" => QuoteStyle::Always,
|
990
|
+
"necessary" => QuoteStyle::Necessary,
|
991
|
+
"non_numeric" => QuoteStyle::NonNumeric,
|
992
|
+
"never" => QuoteStyle::Never,
|
993
|
+
v => {
|
994
|
+
return Err(RbValueError::new_err(format!(
|
995
|
+
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
996
|
+
)))
|
997
|
+
},
|
998
|
+
};
|
999
|
+
Ok(Wrap(parsed))
|
1000
|
+
}
|
1001
|
+
}
|
1002
|
+
|
1081
1003
|
pub fn parse_fill_null_strategy(
|
1082
1004
|
strategy: &str,
|
1083
1005
|
limit: FillNullLimit,
|
@@ -1150,3 +1072,12 @@ where
|
|
1150
1072
|
{
|
1151
1073
|
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1152
1074
|
}
|
1075
|
+
|
1076
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1077
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1078
|
+
let v = usize::try_convert(ob)?;
|
1079
|
+
NonZeroUsize::new(v)
|
1080
|
+
.map(Wrap)
|
1081
|
+
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1082
|
+
}
|
1083
|
+
}
|