polars-df 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -1,24 +1,28 @@
|
|
1
|
+
pub(crate) mod anyvalue;
|
2
|
+
mod chunked_array;
|
3
|
+
|
1
4
|
use std::fmt::{Debug, Display, Formatter};
|
2
5
|
use std::hash::{Hash, Hasher};
|
6
|
+
use std::num::NonZeroUsize;
|
3
7
|
|
4
|
-
use magnus::encoding::{EncodingCapable, Index};
|
5
8
|
use magnus::{
|
6
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
7
|
-
|
9
|
+
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
8
11
|
};
|
9
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
14
|
use polars::datatypes::AnyValue;
|
12
|
-
use polars::frame::row::
|
15
|
+
use polars::frame::row::Row;
|
13
16
|
use polars::frame::NullStrategy;
|
14
17
|
use polars::io::avro::AvroCompression;
|
15
18
|
use polars::prelude::*;
|
16
19
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
20
|
+
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_utils::total_ord::{TotalEq, TotalHash};
|
18
22
|
use smartstring::alias::String as SmartString;
|
19
23
|
|
20
24
|
use crate::object::OBJECT_NAME;
|
21
|
-
use crate::rb_modules::
|
25
|
+
use crate::rb_modules::series;
|
22
26
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
23
27
|
|
24
28
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
82
|
Ok(rbs.series.borrow().clone())
|
79
83
|
}
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
for res in seq.each() {
|
87
|
-
let item = res?;
|
88
|
-
match String::try_convert(item) {
|
89
|
-
Ok(val) => builder.append_value(&val),
|
90
|
-
Err(_) => builder.append_null(),
|
91
|
-
}
|
92
|
-
}
|
93
|
-
Ok(Wrap(builder.finish()))
|
94
|
-
}
|
95
|
-
}
|
96
|
-
|
97
|
-
impl TryConvert for Wrap<BinaryChunked> {
|
98
|
-
fn try_convert(obj: Value) -> RbResult<Self> {
|
99
|
-
let (seq, len) = get_rbseq(obj)?;
|
100
|
-
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
101
|
-
|
102
|
-
for res in seq.each() {
|
103
|
-
let item = res?;
|
104
|
-
match RString::try_convert(item) {
|
105
|
-
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
106
|
-
Err(_) => builder.append_null(),
|
107
|
-
}
|
108
|
-
}
|
109
|
-
Ok(Wrap(builder.finish()))
|
110
|
-
}
|
85
|
+
pub(crate) fn to_series(s: RbSeries) -> Value {
|
86
|
+
let series = series();
|
87
|
+
series
|
88
|
+
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
89
|
+
.unwrap()
|
111
90
|
}
|
112
91
|
|
113
92
|
impl TryConvert for Wrap<NullValues> {
|
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
|
|
134
113
|
dict.into_value()
|
135
114
|
}
|
136
115
|
|
137
|
-
impl IntoValue for Wrap<
|
138
|
-
fn into_value_with(self,
|
116
|
+
impl IntoValue for Wrap<DataType> {
|
117
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
118
|
+
let pl = crate::rb_modules::polars();
|
119
|
+
|
139
120
|
match self.0 {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
144
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
145
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
146
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
147
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
148
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
149
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
150
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
151
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::String(v) => ruby.into_value(v),
|
153
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
|
-
AnyValue::Categorical(idx, rev, arr) => {
|
155
|
-
let s = if arr.is_null() {
|
156
|
-
rev.get(idx)
|
157
|
-
} else {
|
158
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
159
|
-
};
|
160
|
-
s.into_value()
|
121
|
+
DataType::Int8 => {
|
122
|
+
let class = pl.const_get::<_, Value>("Int8").unwrap();
|
123
|
+
class.funcall("new", ()).unwrap()
|
161
124
|
}
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
utils()
|
166
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
167
|
-
.unwrap()
|
125
|
+
DataType::Int16 => {
|
126
|
+
let class = pl.const_get::<_, Value>("Int16").unwrap();
|
127
|
+
class.funcall("new", ()).unwrap()
|
168
128
|
}
|
169
|
-
|
170
|
-
let
|
171
|
-
|
172
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
173
|
-
.unwrap()
|
129
|
+
DataType::Int32 => {
|
130
|
+
let class = pl.const_get::<_, Value>("Int32").unwrap();
|
131
|
+
class.funcall("new", ()).unwrap()
|
174
132
|
}
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
179
|
-
AnyValue::Object(v) => {
|
180
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
181
|
-
object.to_object()
|
133
|
+
DataType::Int64 => {
|
134
|
+
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
135
|
+
class.funcall("new", ()).unwrap()
|
182
136
|
}
|
183
|
-
|
184
|
-
let
|
185
|
-
|
137
|
+
DataType::UInt8 => {
|
138
|
+
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
139
|
+
class.funcall("new", ()).unwrap()
|
140
|
+
}
|
141
|
+
DataType::UInt16 => {
|
142
|
+
let class = pl.const_get::<_, Value>("UInt16").unwrap();
|
143
|
+
class.funcall("new", ()).unwrap()
|
144
|
+
}
|
145
|
+
DataType::UInt32 => {
|
146
|
+
let class = pl.const_get::<_, Value>("UInt32").unwrap();
|
147
|
+
class.funcall("new", ()).unwrap()
|
148
|
+
}
|
149
|
+
DataType::UInt64 => {
|
150
|
+
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
153
|
+
DataType::Float32 => {
|
154
|
+
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
|
+
class.funcall("new", ()).unwrap()
|
156
|
+
}
|
157
|
+
DataType::Float64 => {
|
158
|
+
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
|
+
class.funcall("new", ()).unwrap()
|
186
160
|
}
|
187
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
188
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
189
|
-
AnyValue::Decimal(v, scale) => utils()
|
190
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
191
|
-
.unwrap(),
|
192
|
-
}
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
impl IntoValue for Wrap<DataType> {
|
197
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
198
|
-
let pl = crate::rb_modules::polars();
|
199
|
-
|
200
|
-
match self.0 {
|
201
|
-
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
202
|
-
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
203
|
-
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
204
|
-
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
205
|
-
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
206
|
-
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
207
|
-
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
208
|
-
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
209
|
-
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
210
|
-
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
211
161
|
DataType::Decimal(precision, scale) => {
|
212
|
-
let
|
213
|
-
|
162
|
+
let class = pl.const_get::<_, Value>("Decimal").unwrap();
|
163
|
+
class
|
214
164
|
.funcall::<_, _, Value>("new", (precision, scale))
|
215
165
|
.unwrap()
|
216
166
|
}
|
217
|
-
DataType::Boolean =>
|
218
|
-
|
219
|
-
|
167
|
+
DataType::Boolean => {
|
168
|
+
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
|
+
class.funcall("new", ()).unwrap()
|
170
|
+
}
|
171
|
+
DataType::String => {
|
172
|
+
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
|
+
class.funcall("new", ()).unwrap()
|
174
|
+
}
|
175
|
+
DataType::Binary => {
|
176
|
+
let class = pl.const_get::<_, Value>("Binary").unwrap();
|
177
|
+
class.funcall("new", ()).unwrap()
|
178
|
+
}
|
220
179
|
DataType::Array(inner, size) => {
|
180
|
+
let class = pl.const_get::<_, Value>("Array").unwrap();
|
221
181
|
let inner = Wrap(*inner);
|
222
|
-
let
|
223
|
-
|
224
|
-
.funcall::<_, _, Value>("new", (size, inner))
|
225
|
-
.unwrap()
|
182
|
+
let args = (inner, size);
|
183
|
+
class.funcall::<_, _, Value>("new", args).unwrap()
|
226
184
|
}
|
227
185
|
DataType::List(inner) => {
|
186
|
+
let class = pl.const_get::<_, Value>("List").unwrap();
|
228
187
|
let inner = Wrap(*inner);
|
229
|
-
|
230
|
-
|
188
|
+
class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
189
|
+
}
|
190
|
+
DataType::Date => {
|
191
|
+
let class = pl.const_get::<_, Value>("Date").unwrap();
|
192
|
+
class.funcall("new", ()).unwrap()
|
231
193
|
}
|
232
|
-
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
233
194
|
DataType::Datetime(tu, tz) => {
|
234
195
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
235
196
|
datetime_class
|
@@ -242,9 +203,29 @@ impl IntoValue for Wrap<DataType> {
|
|
242
203
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
204
|
.unwrap()
|
244
205
|
}
|
245
|
-
DataType::Object(_, _) =>
|
246
|
-
|
247
|
-
|
206
|
+
DataType::Object(_, _) => {
|
207
|
+
let class = pl.const_get::<_, Value>("Object").unwrap();
|
208
|
+
class.funcall("new", ()).unwrap()
|
209
|
+
}
|
210
|
+
DataType::Categorical(_, ordering) => {
|
211
|
+
let class = pl.const_get::<_, Value>("Categorical").unwrap();
|
212
|
+
class.funcall("new", (Wrap(ordering),)).unwrap()
|
213
|
+
}
|
214
|
+
DataType::Enum(rev_map, _) => {
|
215
|
+
// we should always have an initialized rev_map coming from rust
|
216
|
+
let categories = rev_map.as_ref().unwrap().get_categories();
|
217
|
+
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
|
+
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
|
+
let series = to_series(s.into());
|
220
|
+
class
|
221
|
+
.funcall::<_, _, Value>("new", (series,))
|
222
|
+
.unwrap()
|
223
|
+
.into()
|
224
|
+
}
|
225
|
+
DataType::Time => {
|
226
|
+
let class = pl.const_get::<_, Value>("Time").unwrap();
|
227
|
+
class.funcall("new", ()).unwrap()
|
228
|
+
}
|
248
229
|
DataType::Struct(fields) => {
|
249
230
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
250
231
|
let iter = fields.iter().map(|fld| {
|
@@ -260,12 +241,31 @@ impl IntoValue for Wrap<DataType> {
|
|
260
241
|
.funcall::<_, _, Value>("new", (fields,))
|
261
242
|
.unwrap()
|
262
243
|
}
|
263
|
-
DataType::Null =>
|
264
|
-
|
244
|
+
DataType::Null => {
|
245
|
+
let class = pl.const_get::<_, Value>("Null").unwrap();
|
246
|
+
class.funcall("new", ()).unwrap()
|
247
|
+
}
|
248
|
+
DataType::Unknown => {
|
249
|
+
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
250
|
+
class.funcall("new", ()).unwrap()
|
251
|
+
}
|
252
|
+
DataType::BinaryOffset => {
|
253
|
+
unimplemented!()
|
254
|
+
}
|
265
255
|
}
|
266
256
|
}
|
267
257
|
}
|
268
258
|
|
259
|
+
impl IntoValue for Wrap<CategoricalOrdering> {
|
260
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
261
|
+
let ordering = match self.0 {
|
262
|
+
CategoricalOrdering::Physical => "physical",
|
263
|
+
CategoricalOrdering::Lexical => "lexical",
|
264
|
+
};
|
265
|
+
ordering.into_value()
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
269
269
|
impl IntoValue for Wrap<TimeUnit> {
|
270
270
|
fn into_value_with(self, _: &Ruby) -> Value {
|
271
271
|
let tu = match self.0 {
|
@@ -277,114 +277,6 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
277
|
}
|
278
278
|
}
|
279
279
|
|
280
|
-
impl IntoValue for Wrap<&StringChunked> {
|
281
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
let iter = self.0.into_iter();
|
283
|
-
RArray::from_iter(iter).into_value()
|
284
|
-
}
|
285
|
-
}
|
286
|
-
|
287
|
-
impl IntoValue for Wrap<&BinaryChunked> {
|
288
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
289
|
-
let iter = self
|
290
|
-
.0
|
291
|
-
.into_iter()
|
292
|
-
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
293
|
-
RArray::from_iter(iter).into_value()
|
294
|
-
}
|
295
|
-
}
|
296
|
-
|
297
|
-
impl IntoValue for Wrap<&StructChunked> {
|
298
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
299
|
-
let s = self.0.clone().into_series();
|
300
|
-
// todo! iterate its chunks and flatten.
|
301
|
-
// make series::iter() accept a chunk index.
|
302
|
-
let s = s.rechunk();
|
303
|
-
let iter = s.iter().map(|av| {
|
304
|
-
if let AnyValue::Struct(_, _, flds) = av {
|
305
|
-
struct_dict(av._iter_struct_av(), flds)
|
306
|
-
} else {
|
307
|
-
unreachable!()
|
308
|
-
}
|
309
|
-
});
|
310
|
-
|
311
|
-
RArray::from_iter(iter).into_value()
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
impl IntoValue for Wrap<&DurationChunked> {
|
316
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
317
|
-
let utils = utils();
|
318
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
319
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
320
|
-
opt_v.map(|v| {
|
321
|
-
utils
|
322
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
323
|
-
.unwrap()
|
324
|
-
})
|
325
|
-
});
|
326
|
-
RArray::from_iter(iter).into_value()
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
impl IntoValue for Wrap<&DatetimeChunked> {
|
331
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
332
|
-
let utils = utils();
|
333
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
334
|
-
let time_zone = self.0.time_zone().clone().into_value();
|
335
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
336
|
-
opt_v.map(|v| {
|
337
|
-
utils
|
338
|
-
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
339
|
-
.unwrap()
|
340
|
-
})
|
341
|
-
});
|
342
|
-
RArray::from_iter(iter).into_value()
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
impl IntoValue for Wrap<&TimeChunked> {
|
347
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
348
|
-
let utils = utils();
|
349
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
350
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
351
|
-
});
|
352
|
-
RArray::from_iter(iter).into_value()
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
impl IntoValue for Wrap<&DateChunked> {
|
357
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
358
|
-
let utils = utils();
|
359
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
360
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
361
|
-
});
|
362
|
-
RArray::from_iter(iter).into_value()
|
363
|
-
}
|
364
|
-
}
|
365
|
-
|
366
|
-
impl IntoValue for Wrap<&DecimalChunked> {
|
367
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
368
|
-
let utils = utils();
|
369
|
-
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
370
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
371
|
-
opt_v.map(|v| {
|
372
|
-
utils
|
373
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
374
|
-
.unwrap()
|
375
|
-
})
|
376
|
-
});
|
377
|
-
RArray::from_iter(iter).into_value()
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
382
|
-
match digits.parse::<i128>() {
|
383
|
-
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
384
|
-
Err(_) => None,
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
280
|
impl TryConvert for Wrap<Field> {
|
389
281
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
390
282
|
let name: String = ob.funcall("name", ())?;
|
@@ -410,6 +302,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
302
|
"Polars::Binary" => DataType::Binary,
|
411
303
|
"Polars::Boolean" => DataType::Boolean,
|
412
304
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
305
|
+
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
413
306
|
"Polars::Date" => DataType::Date,
|
414
307
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
308
|
"Polars::Time" => DataType::Time,
|
@@ -431,6 +324,36 @@ impl TryConvert for Wrap<DataType> {
|
|
431
324
|
} else if String::try_convert(ob).is_err() {
|
432
325
|
let name = unsafe { ob.class().name() }.into_owned();
|
433
326
|
match name.as_str() {
|
327
|
+
"Polars::Int8" => DataType::Int8,
|
328
|
+
"Polars::Int16" => DataType::Int16,
|
329
|
+
"Polars::Int32" => DataType::Int32,
|
330
|
+
"Polars::Int64" => DataType::Int64,
|
331
|
+
"Polars::UInt8" => DataType::UInt8,
|
332
|
+
"Polars::UInt16" => DataType::UInt16,
|
333
|
+
"Polars::UInt32" => DataType::UInt32,
|
334
|
+
"Polars::UInt64" => DataType::UInt64,
|
335
|
+
"Polars::String" => DataType::String,
|
336
|
+
"Polars::Binary" => DataType::Binary,
|
337
|
+
"Polars::Boolean" => DataType::Boolean,
|
338
|
+
"Polars::Categorical" => {
|
339
|
+
let ordering = ob
|
340
|
+
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
341
|
+
.0;
|
342
|
+
DataType::Categorical(None, ordering)
|
343
|
+
}
|
344
|
+
"Polars::Enum" => {
|
345
|
+
let categories = ob.funcall("categories", ()).unwrap();
|
346
|
+
let s = get_series(categories)?;
|
347
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
348
|
+
let categories = ca.downcast_iter().next().unwrap().clone();
|
349
|
+
create_enum_data_type(categories)
|
350
|
+
}
|
351
|
+
"Polars::Date" => DataType::Date,
|
352
|
+
"Polars::Time" => DataType::Time,
|
353
|
+
"Polars::Float32" => DataType::Float32,
|
354
|
+
"Polars::Float64" => DataType::Float64,
|
355
|
+
"Polars::Null" => DataType::Null,
|
356
|
+
"Polars::Unknown" => DataType::Unknown,
|
434
357
|
"Polars::Duration" => {
|
435
358
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
436
359
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -503,102 +426,6 @@ impl TryConvert for Wrap<DataType> {
|
|
503
426
|
}
|
504
427
|
}
|
505
428
|
|
506
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
507
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
508
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
509
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
510
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
511
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
512
|
-
} else if let Some(v) = Float::from_value(ob) {
|
513
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
|
-
} else if let Some(v) = RString::from_value(ob) {
|
515
|
-
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
|
-
} else {
|
518
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
-
}
|
520
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
521
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
522
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
523
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
524
|
-
let v = sec * 1_000_000_000 + nsec;
|
525
|
-
// TODO support time zone when possible
|
526
|
-
// https://github.com/pola-rs/polars/issues/9103
|
527
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
528
|
-
} else if ob.is_nil() {
|
529
|
-
Ok(AnyValue::Null.into())
|
530
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
531
|
-
let len = dict.len();
|
532
|
-
let mut keys = Vec::with_capacity(len);
|
533
|
-
let mut vals = Vec::with_capacity(len);
|
534
|
-
dict.foreach(|k: Value, v: Value| {
|
535
|
-
let key = String::try_convert(k)?;
|
536
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
537
|
-
let dtype = DataType::from(&val);
|
538
|
-
keys.push(Field::new(&key, dtype));
|
539
|
-
vals.push(val);
|
540
|
-
Ok(ForEach::Continue)
|
541
|
-
})?;
|
542
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
543
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
544
|
-
if v.is_empty() {
|
545
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
546
|
-
} else {
|
547
|
-
let list = v;
|
548
|
-
|
549
|
-
let mut avs = Vec::with_capacity(25);
|
550
|
-
let mut iter = list.each();
|
551
|
-
|
552
|
-
for item in (&mut iter).take(25) {
|
553
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
554
|
-
}
|
555
|
-
|
556
|
-
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
-
|
558
|
-
// push the rest
|
559
|
-
avs.reserve(list.len());
|
560
|
-
for item in iter {
|
561
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
562
|
-
}
|
563
|
-
|
564
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
565
|
-
.map_err(RbPolarsErr::from)?;
|
566
|
-
Ok(Wrap(AnyValue::List(s)))
|
567
|
-
}
|
568
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
-
Ok(Wrap(AnyValue::Datetime(
|
572
|
-
sec * 1_000_000_000 + nsec,
|
573
|
-
TimeUnit::Nanoseconds,
|
574
|
-
&None,
|
575
|
-
)))
|
576
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
577
|
-
// convert to DateTime for UTC
|
578
|
-
let v = ob
|
579
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
580
|
-
.funcall::<_, _, Value>("to_time", ())?
|
581
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
582
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
585
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
586
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
587
|
-
})?;
|
588
|
-
if sign < 0 {
|
589
|
-
// TODO better error
|
590
|
-
v = v.checked_neg().unwrap();
|
591
|
-
}
|
592
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
593
|
-
} else {
|
594
|
-
Err(RbPolarsErr::other(format!(
|
595
|
-
"object type not supported {:?}",
|
596
|
-
ob
|
597
|
-
)))
|
598
|
-
}
|
599
|
-
}
|
600
|
-
}
|
601
|
-
|
602
429
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
603
430
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
604
431
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
@@ -662,6 +489,15 @@ impl TotalEq for ObjectValue {
|
|
662
489
|
}
|
663
490
|
}
|
664
491
|
|
492
|
+
impl TotalHash for ObjectValue {
|
493
|
+
fn tot_hash<H>(&self, state: &mut H)
|
494
|
+
where
|
495
|
+
H: Hasher,
|
496
|
+
{
|
497
|
+
self.hash(state);
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
665
501
|
impl Display for ObjectValue {
|
666
502
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
667
503
|
write!(f, "{}", self.to_object())
|
@@ -715,24 +551,33 @@ impl Default for ObjectValue {
|
|
715
551
|
|
716
552
|
pub(crate) fn dicts_to_rows(
|
717
553
|
records: &Value,
|
718
|
-
infer_schema_len: usize
|
554
|
+
infer_schema_len: Option<usize>,
|
555
|
+
schema_columns: PlIndexSet<String>,
|
719
556
|
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
557
|
+
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
720
558
|
let (dicts, len) = get_rbseq(*records)?;
|
721
559
|
|
722
|
-
let
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
560
|
+
let key_names = {
|
561
|
+
if !schema_columns.is_empty() {
|
562
|
+
schema_columns
|
563
|
+
} else {
|
564
|
+
let mut inferred_keys = PlIndexSet::new();
|
565
|
+
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
566
|
+
let d = d?;
|
567
|
+
let d = RHash::try_convert(d)?;
|
568
|
+
|
569
|
+
d.foreach(|name: Value, _value: Value| {
|
570
|
+
if let Some(v) = Symbol::from_value(name) {
|
571
|
+
inferred_keys.insert(v.name()?.into());
|
572
|
+
} else {
|
573
|
+
inferred_keys.insert(String::try_convert(name)?);
|
574
|
+
};
|
575
|
+
Ok(ForEach::Continue)
|
576
|
+
})?;
|
577
|
+
}
|
578
|
+
inferred_keys
|
579
|
+
}
|
580
|
+
};
|
736
581
|
|
737
582
|
let mut rows = Vec::with_capacity(len);
|
738
583
|
|
@@ -1062,6 +907,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
1062
907
|
}
|
1063
908
|
}
|
1064
909
|
|
910
|
+
impl TryConvert for Wrap<IpcCompression> {
|
911
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
912
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
913
|
+
"lz4" => IpcCompression::LZ4,
|
914
|
+
"zstd" => IpcCompression::ZSTD,
|
915
|
+
v => {
|
916
|
+
return Err(RbValueError::new_err(format!(
|
917
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {}",
|
918
|
+
v
|
919
|
+
)))
|
920
|
+
}
|
921
|
+
};
|
922
|
+
Ok(Wrap(parsed))
|
923
|
+
}
|
924
|
+
}
|
925
|
+
|
1065
926
|
impl TryConvert for Wrap<SearchSortedSide> {
|
1066
927
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
928
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1078,6 +939,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1078
939
|
}
|
1079
940
|
}
|
1080
941
|
|
942
|
+
impl TryConvert for Wrap<WindowMapping> {
|
943
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
944
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
945
|
+
"group_to_rows" => WindowMapping::GroupsToRows,
|
946
|
+
"join" => WindowMapping::Join,
|
947
|
+
"explode" => WindowMapping::Explode,
|
948
|
+
v => {
|
949
|
+
return Err(RbValueError::new_err(format!(
|
950
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
951
|
+
)))
|
952
|
+
}
|
953
|
+
};
|
954
|
+
Ok(Wrap(parsed))
|
955
|
+
}
|
956
|
+
}
|
957
|
+
|
958
|
+
impl TryConvert for Wrap<JoinValidation> {
|
959
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
960
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
961
|
+
"1:1" => JoinValidation::OneToOne,
|
962
|
+
"1:m" => JoinValidation::OneToMany,
|
963
|
+
"m:m" => JoinValidation::ManyToMany,
|
964
|
+
"m:1" => JoinValidation::ManyToOne,
|
965
|
+
v => {
|
966
|
+
return Err(RbValueError::new_err(format!(
|
967
|
+
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
968
|
+
)))
|
969
|
+
}
|
970
|
+
};
|
971
|
+
Ok(Wrap(parsed))
|
972
|
+
}
|
973
|
+
}
|
974
|
+
|
975
|
+
impl TryConvert for Wrap<QuoteStyle> {
|
976
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
977
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
978
|
+
"always" => QuoteStyle::Always,
|
979
|
+
"necessary" => QuoteStyle::Necessary,
|
980
|
+
"non_numeric" => QuoteStyle::NonNumeric,
|
981
|
+
"never" => QuoteStyle::Never,
|
982
|
+
v => {
|
983
|
+
return Err(RbValueError::new_err(format!(
|
984
|
+
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
985
|
+
)))
|
986
|
+
},
|
987
|
+
};
|
988
|
+
Ok(Wrap(parsed))
|
989
|
+
}
|
990
|
+
}
|
991
|
+
|
1081
992
|
pub fn parse_fill_null_strategy(
|
1082
993
|
strategy: &str,
|
1083
994
|
limit: FillNullLimit,
|
@@ -1150,3 +1061,12 @@ where
|
|
1150
1061
|
{
|
1151
1062
|
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1152
1063
|
}
|
1064
|
+
|
1065
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1066
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
|
+
let v = usize::try_convert(ob)?;
|
1068
|
+
NonZeroUsize::new(v)
|
1069
|
+
.map(|v| Wrap(v))
|
1070
|
+
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1071
|
+
}
|
1072
|
+
}
|