polars-df 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -1,24 +1,28 @@
|
|
1
|
+
pub(crate) mod anyvalue;
|
2
|
+
mod chunked_array;
|
3
|
+
|
1
4
|
use std::fmt::{Debug, Display, Formatter};
|
2
5
|
use std::hash::{Hash, Hasher};
|
6
|
+
use std::num::NonZeroUsize;
|
3
7
|
|
4
|
-
use magnus::encoding::{EncodingCapable, Index};
|
5
8
|
use magnus::{
|
6
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
7
|
-
|
9
|
+
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
8
11
|
};
|
9
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
14
|
use polars::datatypes::AnyValue;
|
12
|
-
use polars::frame::row::
|
15
|
+
use polars::frame::row::Row;
|
13
16
|
use polars::frame::NullStrategy;
|
14
17
|
use polars::io::avro::AvroCompression;
|
15
18
|
use polars::prelude::*;
|
16
19
|
use polars::series::ops::NullBehavior;
|
17
|
-
use
|
20
|
+
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_utils::total_ord::{TotalEq, TotalHash};
|
18
22
|
use smartstring::alias::String as SmartString;
|
19
23
|
|
20
24
|
use crate::object::OBJECT_NAME;
|
21
|
-
use crate::rb_modules::
|
25
|
+
use crate::rb_modules::series;
|
22
26
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
23
27
|
|
24
28
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
82
|
Ok(rbs.series.borrow().clone())
|
79
83
|
}
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
for res in seq.each() {
|
87
|
-
let item = res?;
|
88
|
-
match String::try_convert(item) {
|
89
|
-
Ok(val) => builder.append_value(&val),
|
90
|
-
Err(_) => builder.append_null(),
|
91
|
-
}
|
92
|
-
}
|
93
|
-
Ok(Wrap(builder.finish()))
|
94
|
-
}
|
95
|
-
}
|
96
|
-
|
97
|
-
impl TryConvert for Wrap<BinaryChunked> {
|
98
|
-
fn try_convert(obj: Value) -> RbResult<Self> {
|
99
|
-
let (seq, len) = get_rbseq(obj)?;
|
100
|
-
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
101
|
-
|
102
|
-
for res in seq.each() {
|
103
|
-
let item = res?;
|
104
|
-
match RString::try_convert(item) {
|
105
|
-
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
106
|
-
Err(_) => builder.append_null(),
|
107
|
-
}
|
108
|
-
}
|
109
|
-
Ok(Wrap(builder.finish()))
|
110
|
-
}
|
85
|
+
pub(crate) fn to_series(s: RbSeries) -> Value {
|
86
|
+
let series = series();
|
87
|
+
series
|
88
|
+
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
89
|
+
.unwrap()
|
111
90
|
}
|
112
91
|
|
113
92
|
impl TryConvert for Wrap<NullValues> {
|
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
|
|
134
113
|
dict.into_value()
|
135
114
|
}
|
136
115
|
|
137
|
-
impl IntoValue for Wrap<
|
138
|
-
fn into_value_with(self,
|
116
|
+
impl IntoValue for Wrap<DataType> {
|
117
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
118
|
+
let pl = crate::rb_modules::polars();
|
119
|
+
|
139
120
|
match self.0 {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
144
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
145
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
146
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
147
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
148
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
149
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
150
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
151
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::String(v) => ruby.into_value(v),
|
153
|
-
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
154
|
-
AnyValue::Categorical(idx, rev, arr) => {
|
155
|
-
let s = if arr.is_null() {
|
156
|
-
rev.get(idx)
|
157
|
-
} else {
|
158
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
159
|
-
};
|
160
|
-
s.into_value()
|
121
|
+
DataType::Int8 => {
|
122
|
+
let class = pl.const_get::<_, Value>("Int8").unwrap();
|
123
|
+
class.funcall("new", ()).unwrap()
|
161
124
|
}
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
utils()
|
166
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
167
|
-
.unwrap()
|
125
|
+
DataType::Int16 => {
|
126
|
+
let class = pl.const_get::<_, Value>("Int16").unwrap();
|
127
|
+
class.funcall("new", ()).unwrap()
|
168
128
|
}
|
169
|
-
|
170
|
-
let
|
171
|
-
|
172
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
173
|
-
.unwrap()
|
129
|
+
DataType::Int32 => {
|
130
|
+
let class = pl.const_get::<_, Value>("Int32").unwrap();
|
131
|
+
class.funcall("new", ()).unwrap()
|
174
132
|
}
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
179
|
-
AnyValue::Object(v) => {
|
180
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
181
|
-
object.to_object()
|
133
|
+
DataType::Int64 => {
|
134
|
+
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
135
|
+
class.funcall("new", ()).unwrap()
|
182
136
|
}
|
183
|
-
|
184
|
-
let
|
185
|
-
|
137
|
+
DataType::UInt8 => {
|
138
|
+
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
139
|
+
class.funcall("new", ()).unwrap()
|
140
|
+
}
|
141
|
+
DataType::UInt16 => {
|
142
|
+
let class = pl.const_get::<_, Value>("UInt16").unwrap();
|
143
|
+
class.funcall("new", ()).unwrap()
|
144
|
+
}
|
145
|
+
DataType::UInt32 => {
|
146
|
+
let class = pl.const_get::<_, Value>("UInt32").unwrap();
|
147
|
+
class.funcall("new", ()).unwrap()
|
148
|
+
}
|
149
|
+
DataType::UInt64 => {
|
150
|
+
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
153
|
+
DataType::Float32 => {
|
154
|
+
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
|
+
class.funcall("new", ()).unwrap()
|
156
|
+
}
|
157
|
+
DataType::Float64 => {
|
158
|
+
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
|
+
class.funcall("new", ()).unwrap()
|
186
160
|
}
|
187
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
188
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
189
|
-
AnyValue::Decimal(v, scale) => utils()
|
190
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
191
|
-
.unwrap(),
|
192
|
-
}
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
impl IntoValue for Wrap<DataType> {
|
197
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
198
|
-
let pl = crate::rb_modules::polars();
|
199
|
-
|
200
|
-
match self.0 {
|
201
|
-
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
202
|
-
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
203
|
-
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
204
|
-
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
205
|
-
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
206
|
-
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
207
|
-
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
208
|
-
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
209
|
-
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
210
|
-
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
211
161
|
DataType::Decimal(precision, scale) => {
|
212
|
-
let
|
213
|
-
|
162
|
+
let class = pl.const_get::<_, Value>("Decimal").unwrap();
|
163
|
+
class
|
214
164
|
.funcall::<_, _, Value>("new", (precision, scale))
|
215
165
|
.unwrap()
|
216
166
|
}
|
217
|
-
DataType::Boolean =>
|
218
|
-
|
219
|
-
|
167
|
+
DataType::Boolean => {
|
168
|
+
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
|
+
class.funcall("new", ()).unwrap()
|
170
|
+
}
|
171
|
+
DataType::String => {
|
172
|
+
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
|
+
class.funcall("new", ()).unwrap()
|
174
|
+
}
|
175
|
+
DataType::Binary => {
|
176
|
+
let class = pl.const_get::<_, Value>("Binary").unwrap();
|
177
|
+
class.funcall("new", ()).unwrap()
|
178
|
+
}
|
220
179
|
DataType::Array(inner, size) => {
|
180
|
+
let class = pl.const_get::<_, Value>("Array").unwrap();
|
221
181
|
let inner = Wrap(*inner);
|
222
|
-
let
|
223
|
-
|
224
|
-
.funcall::<_, _, Value>("new", (size, inner))
|
225
|
-
.unwrap()
|
182
|
+
let args = (inner, size);
|
183
|
+
class.funcall::<_, _, Value>("new", args).unwrap()
|
226
184
|
}
|
227
185
|
DataType::List(inner) => {
|
186
|
+
let class = pl.const_get::<_, Value>("List").unwrap();
|
228
187
|
let inner = Wrap(*inner);
|
229
|
-
|
230
|
-
|
188
|
+
class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
189
|
+
}
|
190
|
+
DataType::Date => {
|
191
|
+
let class = pl.const_get::<_, Value>("Date").unwrap();
|
192
|
+
class.funcall("new", ()).unwrap()
|
231
193
|
}
|
232
|
-
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
233
194
|
DataType::Datetime(tu, tz) => {
|
234
195
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
235
196
|
datetime_class
|
@@ -242,9 +203,29 @@ impl IntoValue for Wrap<DataType> {
|
|
242
203
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
204
|
.unwrap()
|
244
205
|
}
|
245
|
-
DataType::Object(_, _) =>
|
246
|
-
|
247
|
-
|
206
|
+
DataType::Object(_, _) => {
|
207
|
+
let class = pl.const_get::<_, Value>("Object").unwrap();
|
208
|
+
class.funcall("new", ()).unwrap()
|
209
|
+
}
|
210
|
+
DataType::Categorical(_, ordering) => {
|
211
|
+
let class = pl.const_get::<_, Value>("Categorical").unwrap();
|
212
|
+
class.funcall("new", (Wrap(ordering),)).unwrap()
|
213
|
+
}
|
214
|
+
DataType::Enum(rev_map, _) => {
|
215
|
+
// we should always have an initialized rev_map coming from rust
|
216
|
+
let categories = rev_map.as_ref().unwrap().get_categories();
|
217
|
+
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
|
+
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
|
+
let series = to_series(s.into());
|
220
|
+
class
|
221
|
+
.funcall::<_, _, Value>("new", (series,))
|
222
|
+
.unwrap()
|
223
|
+
.into()
|
224
|
+
}
|
225
|
+
DataType::Time => {
|
226
|
+
let class = pl.const_get::<_, Value>("Time").unwrap();
|
227
|
+
class.funcall("new", ()).unwrap()
|
228
|
+
}
|
248
229
|
DataType::Struct(fields) => {
|
249
230
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
250
231
|
let iter = fields.iter().map(|fld| {
|
@@ -260,12 +241,31 @@ impl IntoValue for Wrap<DataType> {
|
|
260
241
|
.funcall::<_, _, Value>("new", (fields,))
|
261
242
|
.unwrap()
|
262
243
|
}
|
263
|
-
DataType::Null =>
|
264
|
-
|
244
|
+
DataType::Null => {
|
245
|
+
let class = pl.const_get::<_, Value>("Null").unwrap();
|
246
|
+
class.funcall("new", ()).unwrap()
|
247
|
+
}
|
248
|
+
DataType::Unknown => {
|
249
|
+
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
250
|
+
class.funcall("new", ()).unwrap()
|
251
|
+
}
|
252
|
+
DataType::BinaryOffset => {
|
253
|
+
unimplemented!()
|
254
|
+
}
|
265
255
|
}
|
266
256
|
}
|
267
257
|
}
|
268
258
|
|
259
|
+
impl IntoValue for Wrap<CategoricalOrdering> {
|
260
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
261
|
+
let ordering = match self.0 {
|
262
|
+
CategoricalOrdering::Physical => "physical",
|
263
|
+
CategoricalOrdering::Lexical => "lexical",
|
264
|
+
};
|
265
|
+
ordering.into_value()
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
269
269
|
impl IntoValue for Wrap<TimeUnit> {
|
270
270
|
fn into_value_with(self, _: &Ruby) -> Value {
|
271
271
|
let tu = match self.0 {
|
@@ -277,114 +277,6 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
277
|
}
|
278
278
|
}
|
279
279
|
|
280
|
-
impl IntoValue for Wrap<&StringChunked> {
|
281
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
let iter = self.0.into_iter();
|
283
|
-
RArray::from_iter(iter).into_value()
|
284
|
-
}
|
285
|
-
}
|
286
|
-
|
287
|
-
impl IntoValue for Wrap<&BinaryChunked> {
|
288
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
289
|
-
let iter = self
|
290
|
-
.0
|
291
|
-
.into_iter()
|
292
|
-
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
293
|
-
RArray::from_iter(iter).into_value()
|
294
|
-
}
|
295
|
-
}
|
296
|
-
|
297
|
-
impl IntoValue for Wrap<&StructChunked> {
|
298
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
299
|
-
let s = self.0.clone().into_series();
|
300
|
-
// todo! iterate its chunks and flatten.
|
301
|
-
// make series::iter() accept a chunk index.
|
302
|
-
let s = s.rechunk();
|
303
|
-
let iter = s.iter().map(|av| {
|
304
|
-
if let AnyValue::Struct(_, _, flds) = av {
|
305
|
-
struct_dict(av._iter_struct_av(), flds)
|
306
|
-
} else {
|
307
|
-
unreachable!()
|
308
|
-
}
|
309
|
-
});
|
310
|
-
|
311
|
-
RArray::from_iter(iter).into_value()
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
impl IntoValue for Wrap<&DurationChunked> {
|
316
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
317
|
-
let utils = utils();
|
318
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
319
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
320
|
-
opt_v.map(|v| {
|
321
|
-
utils
|
322
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
323
|
-
.unwrap()
|
324
|
-
})
|
325
|
-
});
|
326
|
-
RArray::from_iter(iter).into_value()
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
impl IntoValue for Wrap<&DatetimeChunked> {
|
331
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
332
|
-
let utils = utils();
|
333
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
334
|
-
let time_zone = self.0.time_zone().clone().into_value();
|
335
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
336
|
-
opt_v.map(|v| {
|
337
|
-
utils
|
338
|
-
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
339
|
-
.unwrap()
|
340
|
-
})
|
341
|
-
});
|
342
|
-
RArray::from_iter(iter).into_value()
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
impl IntoValue for Wrap<&TimeChunked> {
|
347
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
348
|
-
let utils = utils();
|
349
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
350
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
351
|
-
});
|
352
|
-
RArray::from_iter(iter).into_value()
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
impl IntoValue for Wrap<&DateChunked> {
|
357
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
358
|
-
let utils = utils();
|
359
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
360
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
361
|
-
});
|
362
|
-
RArray::from_iter(iter).into_value()
|
363
|
-
}
|
364
|
-
}
|
365
|
-
|
366
|
-
impl IntoValue for Wrap<&DecimalChunked> {
|
367
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
368
|
-
let utils = utils();
|
369
|
-
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
370
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
371
|
-
opt_v.map(|v| {
|
372
|
-
utils
|
373
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
374
|
-
.unwrap()
|
375
|
-
})
|
376
|
-
});
|
377
|
-
RArray::from_iter(iter).into_value()
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
382
|
-
match digits.parse::<i128>() {
|
383
|
-
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
384
|
-
Err(_) => None,
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
280
|
impl TryConvert for Wrap<Field> {
|
389
281
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
390
282
|
let name: String = ob.funcall("name", ())?;
|
@@ -410,6 +302,7 @@ impl TryConvert for Wrap<DataType> {
|
|
410
302
|
"Polars::Binary" => DataType::Binary,
|
411
303
|
"Polars::Boolean" => DataType::Boolean,
|
412
304
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
305
|
+
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
413
306
|
"Polars::Date" => DataType::Date,
|
414
307
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
308
|
"Polars::Time" => DataType::Time,
|
@@ -431,6 +324,36 @@ impl TryConvert for Wrap<DataType> {
|
|
431
324
|
} else if String::try_convert(ob).is_err() {
|
432
325
|
let name = unsafe { ob.class().name() }.into_owned();
|
433
326
|
match name.as_str() {
|
327
|
+
"Polars::Int8" => DataType::Int8,
|
328
|
+
"Polars::Int16" => DataType::Int16,
|
329
|
+
"Polars::Int32" => DataType::Int32,
|
330
|
+
"Polars::Int64" => DataType::Int64,
|
331
|
+
"Polars::UInt8" => DataType::UInt8,
|
332
|
+
"Polars::UInt16" => DataType::UInt16,
|
333
|
+
"Polars::UInt32" => DataType::UInt32,
|
334
|
+
"Polars::UInt64" => DataType::UInt64,
|
335
|
+
"Polars::String" => DataType::String,
|
336
|
+
"Polars::Binary" => DataType::Binary,
|
337
|
+
"Polars::Boolean" => DataType::Boolean,
|
338
|
+
"Polars::Categorical" => {
|
339
|
+
let ordering = ob
|
340
|
+
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
341
|
+
.0;
|
342
|
+
DataType::Categorical(None, ordering)
|
343
|
+
}
|
344
|
+
"Polars::Enum" => {
|
345
|
+
let categories = ob.funcall("categories", ()).unwrap();
|
346
|
+
let s = get_series(categories)?;
|
347
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
348
|
+
let categories = ca.downcast_iter().next().unwrap().clone();
|
349
|
+
create_enum_data_type(categories)
|
350
|
+
}
|
351
|
+
"Polars::Date" => DataType::Date,
|
352
|
+
"Polars::Time" => DataType::Time,
|
353
|
+
"Polars::Float32" => DataType::Float32,
|
354
|
+
"Polars::Float64" => DataType::Float64,
|
355
|
+
"Polars::Null" => DataType::Null,
|
356
|
+
"Polars::Unknown" => DataType::Unknown,
|
434
357
|
"Polars::Duration" => {
|
435
358
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
436
359
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -503,102 +426,6 @@ impl TryConvert for Wrap<DataType> {
|
|
503
426
|
}
|
504
427
|
}
|
505
428
|
|
506
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
507
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
508
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
509
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
510
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
511
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
512
|
-
} else if let Some(v) = Float::from_value(ob) {
|
513
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
|
-
} else if let Some(v) = RString::from_value(ob) {
|
515
|
-
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
517
|
-
} else {
|
518
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
-
}
|
520
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
521
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
522
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
523
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
524
|
-
let v = sec * 1_000_000_000 + nsec;
|
525
|
-
// TODO support time zone when possible
|
526
|
-
// https://github.com/pola-rs/polars/issues/9103
|
527
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
528
|
-
} else if ob.is_nil() {
|
529
|
-
Ok(AnyValue::Null.into())
|
530
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
531
|
-
let len = dict.len();
|
532
|
-
let mut keys = Vec::with_capacity(len);
|
533
|
-
let mut vals = Vec::with_capacity(len);
|
534
|
-
dict.foreach(|k: Value, v: Value| {
|
535
|
-
let key = String::try_convert(k)?;
|
536
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
537
|
-
let dtype = DataType::from(&val);
|
538
|
-
keys.push(Field::new(&key, dtype));
|
539
|
-
vals.push(val);
|
540
|
-
Ok(ForEach::Continue)
|
541
|
-
})?;
|
542
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
543
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
544
|
-
if v.is_empty() {
|
545
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
546
|
-
} else {
|
547
|
-
let list = v;
|
548
|
-
|
549
|
-
let mut avs = Vec::with_capacity(25);
|
550
|
-
let mut iter = list.each();
|
551
|
-
|
552
|
-
for item in (&mut iter).take(25) {
|
553
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
554
|
-
}
|
555
|
-
|
556
|
-
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
-
|
558
|
-
// push the rest
|
559
|
-
avs.reserve(list.len());
|
560
|
-
for item in iter {
|
561
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
562
|
-
}
|
563
|
-
|
564
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
565
|
-
.map_err(RbPolarsErr::from)?;
|
566
|
-
Ok(Wrap(AnyValue::List(s)))
|
567
|
-
}
|
568
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
-
Ok(Wrap(AnyValue::Datetime(
|
572
|
-
sec * 1_000_000_000 + nsec,
|
573
|
-
TimeUnit::Nanoseconds,
|
574
|
-
&None,
|
575
|
-
)))
|
576
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
577
|
-
// convert to DateTime for UTC
|
578
|
-
let v = ob
|
579
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
580
|
-
.funcall::<_, _, Value>("to_time", ())?
|
581
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
582
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
585
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
586
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
587
|
-
})?;
|
588
|
-
if sign < 0 {
|
589
|
-
// TODO better error
|
590
|
-
v = v.checked_neg().unwrap();
|
591
|
-
}
|
592
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
593
|
-
} else {
|
594
|
-
Err(RbPolarsErr::other(format!(
|
595
|
-
"object type not supported {:?}",
|
596
|
-
ob
|
597
|
-
)))
|
598
|
-
}
|
599
|
-
}
|
600
|
-
}
|
601
|
-
|
602
429
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
603
430
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
604
431
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
@@ -662,6 +489,15 @@ impl TotalEq for ObjectValue {
|
|
662
489
|
}
|
663
490
|
}
|
664
491
|
|
492
|
+
impl TotalHash for ObjectValue {
|
493
|
+
fn tot_hash<H>(&self, state: &mut H)
|
494
|
+
where
|
495
|
+
H: Hasher,
|
496
|
+
{
|
497
|
+
self.hash(state);
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
665
501
|
impl Display for ObjectValue {
|
666
502
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
667
503
|
write!(f, "{}", self.to_object())
|
@@ -715,24 +551,33 @@ impl Default for ObjectValue {
|
|
715
551
|
|
716
552
|
pub(crate) fn dicts_to_rows(
|
717
553
|
records: &Value,
|
718
|
-
infer_schema_len: usize
|
554
|
+
infer_schema_len: Option<usize>,
|
555
|
+
schema_columns: PlIndexSet<String>,
|
719
556
|
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
557
|
+
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
720
558
|
let (dicts, len) = get_rbseq(*records)?;
|
721
559
|
|
722
|
-
let
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
560
|
+
let key_names = {
|
561
|
+
if !schema_columns.is_empty() {
|
562
|
+
schema_columns
|
563
|
+
} else {
|
564
|
+
let mut inferred_keys = PlIndexSet::new();
|
565
|
+
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
566
|
+
let d = d?;
|
567
|
+
let d = RHash::try_convert(d)?;
|
568
|
+
|
569
|
+
d.foreach(|name: Value, _value: Value| {
|
570
|
+
if let Some(v) = Symbol::from_value(name) {
|
571
|
+
inferred_keys.insert(v.name()?.into());
|
572
|
+
} else {
|
573
|
+
inferred_keys.insert(String::try_convert(name)?);
|
574
|
+
};
|
575
|
+
Ok(ForEach::Continue)
|
576
|
+
})?;
|
577
|
+
}
|
578
|
+
inferred_keys
|
579
|
+
}
|
580
|
+
};
|
736
581
|
|
737
582
|
let mut rows = Vec::with_capacity(len);
|
738
583
|
|
@@ -1062,6 +907,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
1062
907
|
}
|
1063
908
|
}
|
1064
909
|
|
910
|
+
impl TryConvert for Wrap<IpcCompression> {
|
911
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
912
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
913
|
+
"lz4" => IpcCompression::LZ4,
|
914
|
+
"zstd" => IpcCompression::ZSTD,
|
915
|
+
v => {
|
916
|
+
return Err(RbValueError::new_err(format!(
|
917
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {}",
|
918
|
+
v
|
919
|
+
)))
|
920
|
+
}
|
921
|
+
};
|
922
|
+
Ok(Wrap(parsed))
|
923
|
+
}
|
924
|
+
}
|
925
|
+
|
1065
926
|
impl TryConvert for Wrap<SearchSortedSide> {
|
1066
927
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
928
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1078,6 +939,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1078
939
|
}
|
1079
940
|
}
|
1080
941
|
|
942
|
+
impl TryConvert for Wrap<WindowMapping> {
|
943
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
944
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
945
|
+
"group_to_rows" => WindowMapping::GroupsToRows,
|
946
|
+
"join" => WindowMapping::Join,
|
947
|
+
"explode" => WindowMapping::Explode,
|
948
|
+
v => {
|
949
|
+
return Err(RbValueError::new_err(format!(
|
950
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
951
|
+
)))
|
952
|
+
}
|
953
|
+
};
|
954
|
+
Ok(Wrap(parsed))
|
955
|
+
}
|
956
|
+
}
|
957
|
+
|
958
|
+
impl TryConvert for Wrap<JoinValidation> {
|
959
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
960
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
961
|
+
"1:1" => JoinValidation::OneToOne,
|
962
|
+
"1:m" => JoinValidation::OneToMany,
|
963
|
+
"m:m" => JoinValidation::ManyToMany,
|
964
|
+
"m:1" => JoinValidation::ManyToOne,
|
965
|
+
v => {
|
966
|
+
return Err(RbValueError::new_err(format!(
|
967
|
+
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
968
|
+
)))
|
969
|
+
}
|
970
|
+
};
|
971
|
+
Ok(Wrap(parsed))
|
972
|
+
}
|
973
|
+
}
|
974
|
+
|
975
|
+
impl TryConvert for Wrap<QuoteStyle> {
|
976
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
977
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
978
|
+
"always" => QuoteStyle::Always,
|
979
|
+
"necessary" => QuoteStyle::Necessary,
|
980
|
+
"non_numeric" => QuoteStyle::NonNumeric,
|
981
|
+
"never" => QuoteStyle::Never,
|
982
|
+
v => {
|
983
|
+
return Err(RbValueError::new_err(format!(
|
984
|
+
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
985
|
+
)))
|
986
|
+
},
|
987
|
+
};
|
988
|
+
Ok(Wrap(parsed))
|
989
|
+
}
|
990
|
+
}
|
991
|
+
|
1081
992
|
pub fn parse_fill_null_strategy(
|
1082
993
|
strategy: &str,
|
1083
994
|
limit: FillNullLimit,
|
@@ -1150,3 +1061,12 @@ where
|
|
1150
1061
|
{
|
1151
1062
|
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1152
1063
|
}
|
1064
|
+
|
1065
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1066
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
|
+
let v = usize::try_convert(ob)?;
|
1068
|
+
NonZeroUsize::new(v)
|
1069
|
+
.map(|v| Wrap(v))
|
1070
|
+
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1071
|
+
}
|
1072
|
+
}
|