polars-df 0.8.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +42 -1
- data/Cargo.lock +159 -66
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/ext/polars/Cargo.toml +18 -8
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion/anyvalue.rs +186 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
- data/ext/polars/src/dataframe.rs +108 -66
- data/ext/polars/src/expr/array.rs +78 -0
- data/ext/polars/src/expr/datetime.rs +29 -58
- data/ext/polars/src/expr/general.rs +83 -36
- data/ext/polars/src/expr/list.rs +58 -6
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +62 -11
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +120 -50
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
- data/ext/polars/src/lib.rs +246 -179
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +35 -4
- data/lib/polars/array_expr.rb +453 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/batched_csv_reader.rb +4 -2
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +306 -96
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +41 -18
- data/lib/polars/date_time_name_space.rb +9 -3
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +898 -215
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +96 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +36 -31
- data/lib/polars/lazy_frame.rb +405 -88
- data/lib/polars/list_expr.rb +158 -8
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +282 -41
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +413 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +106 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +16 -4
- metadata +37 -8
- data/lib/polars/lazy_functions.rb +0 -1181
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,28 @@
|
|
1
|
+
pub(crate) mod anyvalue;
|
2
|
+
mod chunked_array;
|
3
|
+
|
1
4
|
use std::fmt::{Debug, Display, Formatter};
|
2
5
|
use std::hash::{Hash, Hasher};
|
6
|
+
use std::num::NonZeroUsize;
|
3
7
|
|
4
|
-
use magnus::encoding::{EncodingCapable, Index};
|
5
8
|
use magnus::{
|
6
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
7
|
-
|
9
|
+
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
8
11
|
};
|
9
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
14
|
use polars::datatypes::AnyValue;
|
12
|
-
use polars::frame::row::
|
15
|
+
use polars::frame::row::Row;
|
13
16
|
use polars::frame::NullStrategy;
|
14
17
|
use polars::io::avro::AvroCompression;
|
15
18
|
use polars::prelude::*;
|
16
19
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
20
|
+
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_utils::total_ord::{TotalEq, TotalHash};
|
18
22
|
use smartstring::alias::String as SmartString;
|
19
23
|
|
20
24
|
use crate::object::OBJECT_NAME;
|
21
|
-
use crate::rb_modules::
|
25
|
+
use crate::rb_modules::series;
|
22
26
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
23
27
|
|
24
28
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
82
|
Ok(rbs.series.borrow().clone())
|
79
83
|
}
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
for res in seq.each() {
|
87
|
-
let item = res?;
|
88
|
-
match String::try_convert(item) {
|
89
|
-
Ok(val) => builder.append_value(&val),
|
90
|
-
Err(_) => builder.append_null(),
|
91
|
-
}
|
92
|
-
}
|
93
|
-
Ok(Wrap(builder.finish()))
|
94
|
-
}
|
95
|
-
}
|
96
|
-
|
97
|
-
impl TryConvert for Wrap<BinaryChunked> {
|
98
|
-
fn try_convert(obj: Value) -> RbResult<Self> {
|
99
|
-
let (seq, len) = get_rbseq(obj)?;
|
100
|
-
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
101
|
-
|
102
|
-
for res in seq.each() {
|
103
|
-
let item = res?;
|
104
|
-
match RString::try_convert(item) {
|
105
|
-
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
106
|
-
Err(_) => builder.append_null(),
|
107
|
-
}
|
108
|
-
}
|
109
|
-
Ok(Wrap(builder.finish()))
|
110
|
-
}
|
85
|
+
pub(crate) fn to_series(s: RbSeries) -> Value {
|
86
|
+
let series = series();
|
87
|
+
series
|
88
|
+
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
89
|
+
.unwrap()
|
111
90
|
}
|
112
91
|
|
113
92
|
impl TryConvert for Wrap<NullValues> {
|
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
|
|
134
113
|
dict.into_value()
|
135
114
|
}
|
136
115
|
|
137
|
-
impl IntoValue for Wrap<
|
138
|
-
fn into_value_with(self,
|
116
|
+
impl IntoValue for Wrap<DataType> {
|
117
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
118
|
+
let pl = crate::rb_modules::polars();
|
119
|
+
|
139
120
|
match self.0 {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
144
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
145
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
146
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
147
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
148
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
149
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
150
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
151
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::String(v) => ruby.into_value(v),
|
153
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
|
-
AnyValue::Categorical(idx, rev, arr) => {
|
155
|
-
let s = if arr.is_null() {
|
156
|
-
rev.get(idx)
|
157
|
-
} else {
|
158
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
159
|
-
};
|
160
|
-
s.into_value()
|
121
|
+
DataType::Int8 => {
|
122
|
+
let class = pl.const_get::<_, Value>("Int8").unwrap();
|
123
|
+
class.funcall("new", ()).unwrap()
|
161
124
|
}
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
utils()
|
166
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
167
|
-
.unwrap()
|
125
|
+
DataType::Int16 => {
|
126
|
+
let class = pl.const_get::<_, Value>("Int16").unwrap();
|
127
|
+
class.funcall("new", ()).unwrap()
|
168
128
|
}
|
169
|
-
|
170
|
-
let
|
171
|
-
|
172
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
173
|
-
.unwrap()
|
129
|
+
DataType::Int32 => {
|
130
|
+
let class = pl.const_get::<_, Value>("Int32").unwrap();
|
131
|
+
class.funcall("new", ()).unwrap()
|
174
132
|
}
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
179
|
-
AnyValue::Object(v) => {
|
180
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
181
|
-
object.to_object()
|
133
|
+
DataType::Int64 => {
|
134
|
+
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
135
|
+
class.funcall("new", ()).unwrap()
|
182
136
|
}
|
183
|
-
|
184
|
-
let
|
185
|
-
|
137
|
+
DataType::UInt8 => {
|
138
|
+
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
139
|
+
class.funcall("new", ()).unwrap()
|
140
|
+
}
|
141
|
+
DataType::UInt16 => {
|
142
|
+
let class = pl.const_get::<_, Value>("UInt16").unwrap();
|
143
|
+
class.funcall("new", ()).unwrap()
|
144
|
+
}
|
145
|
+
DataType::UInt32 => {
|
146
|
+
let class = pl.const_get::<_, Value>("UInt32").unwrap();
|
147
|
+
class.funcall("new", ()).unwrap()
|
148
|
+
}
|
149
|
+
DataType::UInt64 => {
|
150
|
+
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
153
|
+
DataType::Float32 => {
|
154
|
+
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
|
+
class.funcall("new", ()).unwrap()
|
156
|
+
}
|
157
|
+
DataType::Float64 => {
|
158
|
+
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
|
+
class.funcall("new", ()).unwrap()
|
186
160
|
}
|
187
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
188
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
189
|
-
AnyValue::Decimal(v, scale) => utils()
|
190
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
191
|
-
.unwrap(),
|
192
|
-
}
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
impl IntoValue for Wrap<DataType> {
|
197
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
198
|
-
let pl = crate::rb_modules::polars();
|
199
|
-
|
200
|
-
match self.0 {
|
201
|
-
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
202
|
-
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
203
|
-
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
204
|
-
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
205
|
-
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
206
|
-
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
207
|
-
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
208
|
-
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
209
|
-
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
210
|
-
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
211
161
|
DataType::Decimal(precision, scale) => {
|
212
|
-
let
|
213
|
-
|
162
|
+
let class = pl.const_get::<_, Value>("Decimal").unwrap();
|
163
|
+
class
|
214
164
|
.funcall::<_, _, Value>("new", (precision, scale))
|
215
165
|
.unwrap()
|
216
166
|
}
|
217
|
-
DataType::Boolean =>
|
218
|
-
|
219
|
-
|
167
|
+
DataType::Boolean => {
|
168
|
+
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
|
+
class.funcall("new", ()).unwrap()
|
170
|
+
}
|
171
|
+
DataType::String => {
|
172
|
+
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
|
+
class.funcall("new", ()).unwrap()
|
174
|
+
}
|
175
|
+
DataType::Binary => {
|
176
|
+
let class = pl.const_get::<_, Value>("Binary").unwrap();
|
177
|
+
class.funcall("new", ()).unwrap()
|
178
|
+
}
|
220
179
|
DataType::Array(inner, size) => {
|
180
|
+
let class = pl.const_get::<_, Value>("Array").unwrap();
|
221
181
|
let inner = Wrap(*inner);
|
222
|
-
let
|
223
|
-
|
224
|
-
.funcall::<_, _, Value>("new", (size, inner))
|
225
|
-
.unwrap()
|
182
|
+
let args = (inner, size);
|
183
|
+
class.funcall::<_, _, Value>("new", args).unwrap()
|
226
184
|
}
|
227
185
|
DataType::List(inner) => {
|
186
|
+
let class = pl.const_get::<_, Value>("List").unwrap();
|
228
187
|
let inner = Wrap(*inner);
|
229
|
-
|
230
|
-
|
188
|
+
class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
189
|
+
}
|
190
|
+
DataType::Date => {
|
191
|
+
let class = pl.const_get::<_, Value>("Date").unwrap();
|
192
|
+
class.funcall("new", ()).unwrap()
|
231
193
|
}
|
232
|
-
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
233
194
|
DataType::Datetime(tu, tz) => {
|
234
195
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
235
196
|
datetime_class
|
@@ -242,9 +203,26 @@ impl IntoValue for Wrap<DataType> {
|
|
242
203
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
204
|
.unwrap()
|
244
205
|
}
|
245
|
-
DataType::Object(_, _) =>
|
246
|
-
|
247
|
-
|
206
|
+
DataType::Object(_, _) => {
|
207
|
+
let class = pl.const_get::<_, Value>("Object").unwrap();
|
208
|
+
class.funcall("new", ()).unwrap()
|
209
|
+
}
|
210
|
+
DataType::Categorical(_, ordering) => {
|
211
|
+
let class = pl.const_get::<_, Value>("Categorical").unwrap();
|
212
|
+
class.funcall("new", (Wrap(ordering),)).unwrap()
|
213
|
+
}
|
214
|
+
DataType::Enum(rev_map, _) => {
|
215
|
+
// we should always have an initialized rev_map coming from rust
|
216
|
+
let categories = rev_map.as_ref().unwrap().get_categories();
|
217
|
+
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
|
+
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
|
+
let series = to_series(s.into());
|
220
|
+
class.funcall::<_, _, Value>("new", (series,)).unwrap()
|
221
|
+
}
|
222
|
+
DataType::Time => {
|
223
|
+
let class = pl.const_get::<_, Value>("Time").unwrap();
|
224
|
+
class.funcall("new", ()).unwrap()
|
225
|
+
}
|
248
226
|
DataType::Struct(fields) => {
|
249
227
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
250
228
|
let iter = fields.iter().map(|fld| {
|
@@ -260,12 +238,31 @@ impl IntoValue for Wrap<DataType> {
|
|
260
238
|
.funcall::<_, _, Value>("new", (fields,))
|
261
239
|
.unwrap()
|
262
240
|
}
|
263
|
-
DataType::Null =>
|
264
|
-
|
241
|
+
DataType::Null => {
|
242
|
+
let class = pl.const_get::<_, Value>("Null").unwrap();
|
243
|
+
class.funcall("new", ()).unwrap()
|
244
|
+
}
|
245
|
+
DataType::Unknown => {
|
246
|
+
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
247
|
+
class.funcall("new", ()).unwrap()
|
248
|
+
}
|
249
|
+
DataType::BinaryOffset => {
|
250
|
+
unimplemented!()
|
251
|
+
}
|
265
252
|
}
|
266
253
|
}
|
267
254
|
}
|
268
255
|
|
256
|
+
impl IntoValue for Wrap<CategoricalOrdering> {
|
257
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
258
|
+
let ordering = match self.0 {
|
259
|
+
CategoricalOrdering::Physical => "physical",
|
260
|
+
CategoricalOrdering::Lexical => "lexical",
|
261
|
+
};
|
262
|
+
ordering.into_value()
|
263
|
+
}
|
264
|
+
}
|
265
|
+
|
269
266
|
impl IntoValue for Wrap<TimeUnit> {
|
270
267
|
fn into_value_with(self, _: &Ruby) -> Value {
|
271
268
|
let tu = match self.0 {
|
@@ -277,114 +274,6 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
274
|
}
|
278
275
|
}
|
279
276
|
|
280
|
-
impl IntoValue for Wrap<&StringChunked> {
|
281
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
let iter = self.0.into_iter();
|
283
|
-
RArray::from_iter(iter).into_value()
|
284
|
-
}
|
285
|
-
}
|
286
|
-
|
287
|
-
impl IntoValue for Wrap<&BinaryChunked> {
|
288
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
289
|
-
let iter = self
|
290
|
-
.0
|
291
|
-
.into_iter()
|
292
|
-
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
293
|
-
RArray::from_iter(iter).into_value()
|
294
|
-
}
|
295
|
-
}
|
296
|
-
|
297
|
-
impl IntoValue for Wrap<&StructChunked> {
|
298
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
299
|
-
let s = self.0.clone().into_series();
|
300
|
-
// todo! iterate its chunks and flatten.
|
301
|
-
// make series::iter() accept a chunk index.
|
302
|
-
let s = s.rechunk();
|
303
|
-
let iter = s.iter().map(|av| {
|
304
|
-
if let AnyValue::Struct(_, _, flds) = av {
|
305
|
-
struct_dict(av._iter_struct_av(), flds)
|
306
|
-
} else {
|
307
|
-
unreachable!()
|
308
|
-
}
|
309
|
-
});
|
310
|
-
|
311
|
-
RArray::from_iter(iter).into_value()
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
impl IntoValue for Wrap<&DurationChunked> {
|
316
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
317
|
-
let utils = utils();
|
318
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
319
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
320
|
-
opt_v.map(|v| {
|
321
|
-
utils
|
322
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
323
|
-
.unwrap()
|
324
|
-
})
|
325
|
-
});
|
326
|
-
RArray::from_iter(iter).into_value()
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
impl IntoValue for Wrap<&DatetimeChunked> {
|
331
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
332
|
-
let utils = utils();
|
333
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
334
|
-
let time_zone = self.0.time_zone().clone().into_value();
|
335
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
336
|
-
opt_v.map(|v| {
|
337
|
-
utils
|
338
|
-
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
339
|
-
.unwrap()
|
340
|
-
})
|
341
|
-
});
|
342
|
-
RArray::from_iter(iter).into_value()
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
impl IntoValue for Wrap<&TimeChunked> {
|
347
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
348
|
-
let utils = utils();
|
349
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
350
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
351
|
-
});
|
352
|
-
RArray::from_iter(iter).into_value()
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
impl IntoValue for Wrap<&DateChunked> {
|
357
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
358
|
-
let utils = utils();
|
359
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
360
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
361
|
-
});
|
362
|
-
RArray::from_iter(iter).into_value()
|
363
|
-
}
|
364
|
-
}
|
365
|
-
|
366
|
-
impl IntoValue for Wrap<&DecimalChunked> {
|
367
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
368
|
-
let utils = utils();
|
369
|
-
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
370
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
371
|
-
opt_v.map(|v| {
|
372
|
-
utils
|
373
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
374
|
-
.unwrap()
|
375
|
-
})
|
376
|
-
});
|
377
|
-
RArray::from_iter(iter).into_value()
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
382
|
-
match digits.parse::<i128>() {
|
383
|
-
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
384
|
-
Err(_) => None,
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
277
|
impl TryConvert for Wrap<Field> {
|
389
278
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
390
279
|
let name: String = ob.funcall("name", ())?;
|
@@ -410,6 +299,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
299
|
"Polars::Binary" => DataType::Binary,
|
411
300
|
"Polars::Boolean" => DataType::Boolean,
|
412
301
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
302
|
+
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
413
303
|
"Polars::Date" => DataType::Date,
|
414
304
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
305
|
"Polars::Time" => DataType::Time,
|
@@ -431,6 +321,36 @@ impl TryConvert for Wrap<DataType> {
|
|
431
321
|
} else if String::try_convert(ob).is_err() {
|
432
322
|
let name = unsafe { ob.class().name() }.into_owned();
|
433
323
|
match name.as_str() {
|
324
|
+
"Polars::Int8" => DataType::Int8,
|
325
|
+
"Polars::Int16" => DataType::Int16,
|
326
|
+
"Polars::Int32" => DataType::Int32,
|
327
|
+
"Polars::Int64" => DataType::Int64,
|
328
|
+
"Polars::UInt8" => DataType::UInt8,
|
329
|
+
"Polars::UInt16" => DataType::UInt16,
|
330
|
+
"Polars::UInt32" => DataType::UInt32,
|
331
|
+
"Polars::UInt64" => DataType::UInt64,
|
332
|
+
"Polars::String" => DataType::String,
|
333
|
+
"Polars::Binary" => DataType::Binary,
|
334
|
+
"Polars::Boolean" => DataType::Boolean,
|
335
|
+
"Polars::Categorical" => {
|
336
|
+
let ordering = ob
|
337
|
+
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
338
|
+
.0;
|
339
|
+
DataType::Categorical(None, ordering)
|
340
|
+
}
|
341
|
+
"Polars::Enum" => {
|
342
|
+
let categories = ob.funcall("categories", ()).unwrap();
|
343
|
+
let s = get_series(categories)?;
|
344
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
345
|
+
let categories = ca.downcast_iter().next().unwrap().clone();
|
346
|
+
create_enum_data_type(categories)
|
347
|
+
}
|
348
|
+
"Polars::Date" => DataType::Date,
|
349
|
+
"Polars::Time" => DataType::Time,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Null" => DataType::Null,
|
353
|
+
"Polars::Unknown" => DataType::Unknown,
|
434
354
|
"Polars::Duration" => {
|
435
355
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
436
356
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -503,102 +423,6 @@ impl TryConvert for Wrap<DataType> {
|
|
503
423
|
}
|
504
424
|
}
|
505
425
|
|
506
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
507
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
508
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
509
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
510
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
511
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
512
|
-
} else if let Some(v) = Float::from_value(ob) {
|
513
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
|
-
} else if let Some(v) = RString::from_value(ob) {
|
515
|
-
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
|
-
} else {
|
518
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
-
}
|
520
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
521
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
522
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
523
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
524
|
-
let v = sec * 1_000_000_000 + nsec;
|
525
|
-
// TODO support time zone when possible
|
526
|
-
// https://github.com/pola-rs/polars/issues/9103
|
527
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
528
|
-
} else if ob.is_nil() {
|
529
|
-
Ok(AnyValue::Null.into())
|
530
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
531
|
-
let len = dict.len();
|
532
|
-
let mut keys = Vec::with_capacity(len);
|
533
|
-
let mut vals = Vec::with_capacity(len);
|
534
|
-
dict.foreach(|k: Value, v: Value| {
|
535
|
-
let key = String::try_convert(k)?;
|
536
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
537
|
-
let dtype = DataType::from(&val);
|
538
|
-
keys.push(Field::new(&key, dtype));
|
539
|
-
vals.push(val);
|
540
|
-
Ok(ForEach::Continue)
|
541
|
-
})?;
|
542
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
543
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
544
|
-
if v.is_empty() {
|
545
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
546
|
-
} else {
|
547
|
-
let list = v;
|
548
|
-
|
549
|
-
let mut avs = Vec::with_capacity(25);
|
550
|
-
let mut iter = list.each();
|
551
|
-
|
552
|
-
for item in (&mut iter).take(25) {
|
553
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
554
|
-
}
|
555
|
-
|
556
|
-
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
-
|
558
|
-
// push the rest
|
559
|
-
avs.reserve(list.len());
|
560
|
-
for item in iter {
|
561
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
562
|
-
}
|
563
|
-
|
564
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
565
|
-
.map_err(RbPolarsErr::from)?;
|
566
|
-
Ok(Wrap(AnyValue::List(s)))
|
567
|
-
}
|
568
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
-
Ok(Wrap(AnyValue::Datetime(
|
572
|
-
sec * 1_000_000_000 + nsec,
|
573
|
-
TimeUnit::Nanoseconds,
|
574
|
-
&None,
|
575
|
-
)))
|
576
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
577
|
-
// convert to DateTime for UTC
|
578
|
-
let v = ob
|
579
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
580
|
-
.funcall::<_, _, Value>("to_time", ())?
|
581
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
582
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
585
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
586
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
587
|
-
})?;
|
588
|
-
if sign < 0 {
|
589
|
-
// TODO better error
|
590
|
-
v = v.checked_neg().unwrap();
|
591
|
-
}
|
592
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
593
|
-
} else {
|
594
|
-
Err(RbPolarsErr::other(format!(
|
595
|
-
"object type not supported {:?}",
|
596
|
-
ob
|
597
|
-
)))
|
598
|
-
}
|
599
|
-
}
|
600
|
-
}
|
601
|
-
|
602
426
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
603
427
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
604
428
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
@@ -662,6 +486,15 @@ impl TotalEq for ObjectValue {
|
|
662
486
|
}
|
663
487
|
}
|
664
488
|
|
489
|
+
impl TotalHash for ObjectValue {
|
490
|
+
fn tot_hash<H>(&self, state: &mut H)
|
491
|
+
where
|
492
|
+
H: Hasher,
|
493
|
+
{
|
494
|
+
self.hash(state);
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
665
498
|
impl Display for ObjectValue {
|
666
499
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
667
500
|
write!(f, "{}", self.to_object())
|
@@ -715,24 +548,33 @@ impl Default for ObjectValue {
|
|
715
548
|
|
716
549
|
pub(crate) fn dicts_to_rows(
|
717
550
|
records: &Value,
|
718
|
-
infer_schema_len: usize
|
551
|
+
infer_schema_len: Option<usize>,
|
552
|
+
schema_columns: PlIndexSet<String>,
|
719
553
|
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
554
|
+
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
720
555
|
let (dicts, len) = get_rbseq(*records)?;
|
721
556
|
|
722
|
-
let
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
557
|
+
let key_names = {
|
558
|
+
if !schema_columns.is_empty() {
|
559
|
+
schema_columns
|
560
|
+
} else {
|
561
|
+
let mut inferred_keys = PlIndexSet::new();
|
562
|
+
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
563
|
+
let d = d?;
|
564
|
+
let d = RHash::try_convert(d)?;
|
565
|
+
|
566
|
+
d.foreach(|name: Value, _value: Value| {
|
567
|
+
if let Some(v) = Symbol::from_value(name) {
|
568
|
+
inferred_keys.insert(v.name()?.into());
|
569
|
+
} else {
|
570
|
+
inferred_keys.insert(String::try_convert(name)?);
|
571
|
+
};
|
572
|
+
Ok(ForEach::Continue)
|
573
|
+
})?;
|
574
|
+
}
|
575
|
+
inferred_keys
|
576
|
+
}
|
577
|
+
};
|
736
578
|
|
737
579
|
let mut rows = Vec::with_capacity(len);
|
738
580
|
|
@@ -895,8 +737,7 @@ impl TryConvert for Wrap<JoinType> {
|
|
895
737
|
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
896
738
|
"semi" => JoinType::Semi,
|
897
739
|
"anti" => JoinType::Anti,
|
898
|
-
|
899
|
-
// "cross" => JoinType::Cross,
|
740
|
+
"cross" => JoinType::Cross,
|
900
741
|
v => {
|
901
742
|
return Err(RbValueError::new_err(format!(
|
902
743
|
"how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
|
@@ -940,6 +781,21 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
|
|
940
781
|
}
|
941
782
|
}
|
942
783
|
|
784
|
+
impl TryConvert for Wrap<NonExistent> {
|
785
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
786
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
787
|
+
"null" => NonExistent::Null,
|
788
|
+
"raise" => NonExistent::Raise,
|
789
|
+
v => {
|
790
|
+
return Err(RbValueError::new_err(format!(
|
791
|
+
"`non_existent` must be one of {{'null', 'raise'}}, got {v}",
|
792
|
+
)))
|
793
|
+
}
|
794
|
+
};
|
795
|
+
Ok(Wrap(parsed))
|
796
|
+
}
|
797
|
+
}
|
798
|
+
|
943
799
|
impl TryConvert for Wrap<NullBehavior> {
|
944
800
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
945
801
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1062,6 +918,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
1062
918
|
}
|
1063
919
|
}
|
1064
920
|
|
921
|
+
impl TryConvert for Wrap<IpcCompression> {
|
922
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
923
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
924
|
+
"lz4" => IpcCompression::LZ4,
|
925
|
+
"zstd" => IpcCompression::ZSTD,
|
926
|
+
v => {
|
927
|
+
return Err(RbValueError::new_err(format!(
|
928
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {}",
|
929
|
+
v
|
930
|
+
)))
|
931
|
+
}
|
932
|
+
};
|
933
|
+
Ok(Wrap(parsed))
|
934
|
+
}
|
935
|
+
}
|
936
|
+
|
1065
937
|
impl TryConvert for Wrap<SearchSortedSide> {
|
1066
938
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
939
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1078,6 +950,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1078
950
|
}
|
1079
951
|
}
|
1080
952
|
|
953
|
+
impl TryConvert for Wrap<WindowMapping> {
|
954
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
955
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
956
|
+
"group_to_rows" => WindowMapping::GroupsToRows,
|
957
|
+
"join" => WindowMapping::Join,
|
958
|
+
"explode" => WindowMapping::Explode,
|
959
|
+
v => {
|
960
|
+
return Err(RbValueError::new_err(format!(
|
961
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
962
|
+
)))
|
963
|
+
}
|
964
|
+
};
|
965
|
+
Ok(Wrap(parsed))
|
966
|
+
}
|
967
|
+
}
|
968
|
+
|
969
|
+
impl TryConvert for Wrap<JoinValidation> {
|
970
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
971
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
972
|
+
"1:1" => JoinValidation::OneToOne,
|
973
|
+
"1:m" => JoinValidation::OneToMany,
|
974
|
+
"m:m" => JoinValidation::ManyToMany,
|
975
|
+
"m:1" => JoinValidation::ManyToOne,
|
976
|
+
v => {
|
977
|
+
return Err(RbValueError::new_err(format!(
|
978
|
+
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
979
|
+
)))
|
980
|
+
}
|
981
|
+
};
|
982
|
+
Ok(Wrap(parsed))
|
983
|
+
}
|
984
|
+
}
|
985
|
+
|
986
|
+
impl TryConvert for Wrap<QuoteStyle> {
|
987
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
988
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
989
|
+
"always" => QuoteStyle::Always,
|
990
|
+
"necessary" => QuoteStyle::Necessary,
|
991
|
+
"non_numeric" => QuoteStyle::NonNumeric,
|
992
|
+
"never" => QuoteStyle::Never,
|
993
|
+
v => {
|
994
|
+
return Err(RbValueError::new_err(format!(
|
995
|
+
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
996
|
+
)))
|
997
|
+
},
|
998
|
+
};
|
999
|
+
Ok(Wrap(parsed))
|
1000
|
+
}
|
1001
|
+
}
|
1002
|
+
|
1081
1003
|
pub fn parse_fill_null_strategy(
|
1082
1004
|
strategy: &str,
|
1083
1005
|
limit: FillNullLimit,
|
@@ -1150,3 +1072,12 @@ where
|
|
1150
1072
|
{
|
1151
1073
|
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1152
1074
|
}
|
1075
|
+
|
1076
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1077
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1078
|
+
let v = usize::try_convert(ob)?;
|
1079
|
+
NonZeroUsize::new(v)
|
1080
|
+
.map(Wrap)
|
1081
|
+
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1082
|
+
}
|
1083
|
+
}
|