polars-df 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +17 -6
- data/ext/polars/src/batched_csv.rs +6 -7
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
- data/ext/polars/src/dataframe.rs +96 -116
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +124 -37
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +16 -10
- data/ext/polars/src/expr/string.rs +68 -17
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
- data/ext/polars/src/lib.rs +245 -187
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +23 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +40 -9
- data/lib/polars/lazy_functions.rb +0 -1197
@@ -1,24 +1,28 @@
|
|
1
|
+
pub(crate) mod anyvalue;
|
2
|
+
mod chunked_array;
|
3
|
+
|
1
4
|
use std::fmt::{Debug, Display, Formatter};
|
2
5
|
use std::hash::{Hash, Hasher};
|
6
|
+
use std::num::NonZeroUsize;
|
3
7
|
|
4
|
-
use magnus::encoding::{EncodingCapable, Index};
|
5
8
|
use magnus::{
|
6
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
7
|
-
|
9
|
+
class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
|
10
|
+
Ruby, Symbol, TryConvert, Value,
|
8
11
|
};
|
9
12
|
use polars::chunked_array::object::PolarsObjectSafe;
|
10
13
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
11
14
|
use polars::datatypes::AnyValue;
|
12
|
-
use polars::frame::row::
|
15
|
+
use polars::frame::row::Row;
|
13
16
|
use polars::frame::NullStrategy;
|
14
17
|
use polars::io::avro::AvroCompression;
|
15
18
|
use polars::prelude::*;
|
16
19
|
use polars::series::ops::NullBehavior;
|
17
|
-
use polars_core::utils::arrow::
|
20
|
+
use polars_core::utils::arrow::array::Array;
|
21
|
+
use polars_utils::total_ord::{TotalEq, TotalHash};
|
18
22
|
use smartstring::alias::String as SmartString;
|
19
23
|
|
20
24
|
use crate::object::OBJECT_NAME;
|
21
|
-
use crate::rb_modules::
|
25
|
+
use crate::rb_modules::series;
|
22
26
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
23
27
|
|
24
28
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
78
82
|
Ok(rbs.series.borrow().clone())
|
79
83
|
}
|
80
84
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
for res in seq.each() {
|
87
|
-
let item = res?;
|
88
|
-
match String::try_convert(item) {
|
89
|
-
Ok(val) => builder.append_value(&val),
|
90
|
-
Err(_) => builder.append_null(),
|
91
|
-
}
|
92
|
-
}
|
93
|
-
Ok(Wrap(builder.finish()))
|
94
|
-
}
|
95
|
-
}
|
96
|
-
|
97
|
-
impl TryConvert for Wrap<BinaryChunked> {
|
98
|
-
fn try_convert(obj: Value) -> RbResult<Self> {
|
99
|
-
let (seq, len) = get_rbseq(obj)?;
|
100
|
-
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
101
|
-
|
102
|
-
for res in seq.each() {
|
103
|
-
let item = res?;
|
104
|
-
match RString::try_convert(item) {
|
105
|
-
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
106
|
-
Err(_) => builder.append_null(),
|
107
|
-
}
|
108
|
-
}
|
109
|
-
Ok(Wrap(builder.finish()))
|
110
|
-
}
|
85
|
+
pub(crate) fn to_series(s: RbSeries) -> Value {
|
86
|
+
let series = series();
|
87
|
+
series
|
88
|
+
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
89
|
+
.unwrap()
|
111
90
|
}
|
112
91
|
|
113
92
|
impl TryConvert for Wrap<NullValues> {
|
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
|
|
134
113
|
dict.into_value()
|
135
114
|
}
|
136
115
|
|
137
|
-
impl IntoValue for Wrap<
|
138
|
-
fn into_value_with(self,
|
116
|
+
impl IntoValue for Wrap<DataType> {
|
117
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
118
|
+
let pl = crate::rb_modules::polars();
|
119
|
+
|
139
120
|
match self.0 {
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
AnyValue::UInt64(v) => ruby.into_value(v),
|
144
|
-
AnyValue::Int8(v) => ruby.into_value(v),
|
145
|
-
AnyValue::Int16(v) => ruby.into_value(v),
|
146
|
-
AnyValue::Int32(v) => ruby.into_value(v),
|
147
|
-
AnyValue::Int64(v) => ruby.into_value(v),
|
148
|
-
AnyValue::Float32(v) => ruby.into_value(v),
|
149
|
-
AnyValue::Float64(v) => ruby.into_value(v),
|
150
|
-
AnyValue::Null => ruby.qnil().as_value(),
|
151
|
-
AnyValue::Boolean(v) => ruby.into_value(v),
|
152
|
-
AnyValue::Utf8(v) => ruby.into_value(v),
|
153
|
-
AnyValue::Utf8Owned(v) => ruby.into_value(v.as_str()),
|
154
|
-
AnyValue::Categorical(idx, rev, arr) => {
|
155
|
-
let s = if arr.is_null() {
|
156
|
-
rev.get(idx)
|
157
|
-
} else {
|
158
|
-
unsafe { arr.deref_unchecked().value(idx as usize) }
|
159
|
-
};
|
160
|
-
s.into_value()
|
121
|
+
DataType::Int8 => {
|
122
|
+
let class = pl.const_get::<_, Value>("Int8").unwrap();
|
123
|
+
class.funcall("new", ()).unwrap()
|
161
124
|
}
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
utils()
|
166
|
-
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
167
|
-
.unwrap()
|
125
|
+
DataType::Int16 => {
|
126
|
+
let class = pl.const_get::<_, Value>("Int16").unwrap();
|
127
|
+
class.funcall("new", ()).unwrap()
|
168
128
|
}
|
169
|
-
|
170
|
-
let
|
171
|
-
|
172
|
-
.funcall("_to_ruby_duration", (v, time_unit))
|
173
|
-
.unwrap()
|
129
|
+
DataType::Int32 => {
|
130
|
+
let class = pl.const_get::<_, Value>("Int32").unwrap();
|
131
|
+
class.funcall("new", ()).unwrap()
|
174
132
|
}
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
179
|
-
AnyValue::Object(v) => {
|
180
|
-
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
181
|
-
object.to_object()
|
133
|
+
DataType::Int64 => {
|
134
|
+
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
135
|
+
class.funcall("new", ()).unwrap()
|
182
136
|
}
|
183
|
-
|
184
|
-
let
|
185
|
-
|
137
|
+
DataType::UInt8 => {
|
138
|
+
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
139
|
+
class.funcall("new", ()).unwrap()
|
140
|
+
}
|
141
|
+
DataType::UInt16 => {
|
142
|
+
let class = pl.const_get::<_, Value>("UInt16").unwrap();
|
143
|
+
class.funcall("new", ()).unwrap()
|
144
|
+
}
|
145
|
+
DataType::UInt32 => {
|
146
|
+
let class = pl.const_get::<_, Value>("UInt32").unwrap();
|
147
|
+
class.funcall("new", ()).unwrap()
|
148
|
+
}
|
149
|
+
DataType::UInt64 => {
|
150
|
+
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
153
|
+
DataType::Float32 => {
|
154
|
+
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
155
|
+
class.funcall("new", ()).unwrap()
|
156
|
+
}
|
157
|
+
DataType::Float64 => {
|
158
|
+
let class = pl.const_get::<_, Value>("Float64").unwrap();
|
159
|
+
class.funcall("new", ()).unwrap()
|
186
160
|
}
|
187
|
-
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
188
|
-
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
189
|
-
AnyValue::Decimal(v, scale) => utils()
|
190
|
-
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
191
|
-
.unwrap(),
|
192
|
-
}
|
193
|
-
}
|
194
|
-
}
|
195
|
-
|
196
|
-
impl IntoValue for Wrap<DataType> {
|
197
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
198
|
-
let pl = crate::rb_modules::polars();
|
199
|
-
|
200
|
-
match self.0 {
|
201
|
-
DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
|
202
|
-
DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
|
203
|
-
DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
|
204
|
-
DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
|
205
|
-
DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
|
206
|
-
DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
|
207
|
-
DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
|
208
|
-
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
209
|
-
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
210
|
-
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
211
161
|
DataType::Decimal(precision, scale) => {
|
212
|
-
let
|
213
|
-
|
162
|
+
let class = pl.const_get::<_, Value>("Decimal").unwrap();
|
163
|
+
class
|
214
164
|
.funcall::<_, _, Value>("new", (precision, scale))
|
215
165
|
.unwrap()
|
216
166
|
}
|
217
|
-
DataType::Boolean =>
|
218
|
-
|
219
|
-
|
167
|
+
DataType::Boolean => {
|
168
|
+
let class = pl.const_get::<_, Value>("Boolean").unwrap();
|
169
|
+
class.funcall("new", ()).unwrap()
|
170
|
+
}
|
171
|
+
DataType::String => {
|
172
|
+
let class = pl.const_get::<_, Value>("String").unwrap();
|
173
|
+
class.funcall("new", ()).unwrap()
|
174
|
+
}
|
175
|
+
DataType::Binary => {
|
176
|
+
let class = pl.const_get::<_, Value>("Binary").unwrap();
|
177
|
+
class.funcall("new", ()).unwrap()
|
178
|
+
}
|
220
179
|
DataType::Array(inner, size) => {
|
180
|
+
let class = pl.const_get::<_, Value>("Array").unwrap();
|
221
181
|
let inner = Wrap(*inner);
|
222
|
-
let
|
223
|
-
|
224
|
-
.funcall::<_, _, Value>("new", (size, inner))
|
225
|
-
.unwrap()
|
182
|
+
let args = (inner, size);
|
183
|
+
class.funcall::<_, _, Value>("new", args).unwrap()
|
226
184
|
}
|
227
185
|
DataType::List(inner) => {
|
186
|
+
let class = pl.const_get::<_, Value>("List").unwrap();
|
228
187
|
let inner = Wrap(*inner);
|
229
|
-
|
230
|
-
|
188
|
+
class.funcall::<_, _, Value>("new", (inner,)).unwrap()
|
189
|
+
}
|
190
|
+
DataType::Date => {
|
191
|
+
let class = pl.const_get::<_, Value>("Date").unwrap();
|
192
|
+
class.funcall("new", ()).unwrap()
|
231
193
|
}
|
232
|
-
DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
|
233
194
|
DataType::Datetime(tu, tz) => {
|
234
195
|
let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
|
235
196
|
datetime_class
|
@@ -242,9 +203,29 @@ impl IntoValue for Wrap<DataType> {
|
|
242
203
|
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
243
204
|
.unwrap()
|
244
205
|
}
|
245
|
-
DataType::Object(_) =>
|
246
|
-
|
247
|
-
|
206
|
+
DataType::Object(_, _) => {
|
207
|
+
let class = pl.const_get::<_, Value>("Object").unwrap();
|
208
|
+
class.funcall("new", ()).unwrap()
|
209
|
+
}
|
210
|
+
DataType::Categorical(_, ordering) => {
|
211
|
+
let class = pl.const_get::<_, Value>("Categorical").unwrap();
|
212
|
+
class.funcall("new", (Wrap(ordering),)).unwrap()
|
213
|
+
}
|
214
|
+
DataType::Enum(rev_map, _) => {
|
215
|
+
// we should always have an initialized rev_map coming from rust
|
216
|
+
let categories = rev_map.as_ref().unwrap().get_categories();
|
217
|
+
let class = pl.const_get::<_, Value>("Enum").unwrap();
|
218
|
+
let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
|
219
|
+
let series = to_series(s.into());
|
220
|
+
class
|
221
|
+
.funcall::<_, _, Value>("new", (series,))
|
222
|
+
.unwrap()
|
223
|
+
.into()
|
224
|
+
}
|
225
|
+
DataType::Time => {
|
226
|
+
let class = pl.const_get::<_, Value>("Time").unwrap();
|
227
|
+
class.funcall("new", ()).unwrap()
|
228
|
+
}
|
248
229
|
DataType::Struct(fields) => {
|
249
230
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
250
231
|
let iter = fields.iter().map(|fld| {
|
@@ -260,12 +241,31 @@ impl IntoValue for Wrap<DataType> {
|
|
260
241
|
.funcall::<_, _, Value>("new", (fields,))
|
261
242
|
.unwrap()
|
262
243
|
}
|
263
|
-
DataType::Null =>
|
264
|
-
|
244
|
+
DataType::Null => {
|
245
|
+
let class = pl.const_get::<_, Value>("Null").unwrap();
|
246
|
+
class.funcall("new", ()).unwrap()
|
247
|
+
}
|
248
|
+
DataType::Unknown => {
|
249
|
+
let class = pl.const_get::<_, Value>("Unknown").unwrap();
|
250
|
+
class.funcall("new", ()).unwrap()
|
251
|
+
}
|
252
|
+
DataType::BinaryOffset => {
|
253
|
+
unimplemented!()
|
254
|
+
}
|
265
255
|
}
|
266
256
|
}
|
267
257
|
}
|
268
258
|
|
259
|
+
impl IntoValue for Wrap<CategoricalOrdering> {
|
260
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
261
|
+
let ordering = match self.0 {
|
262
|
+
CategoricalOrdering::Physical => "physical",
|
263
|
+
CategoricalOrdering::Lexical => "lexical",
|
264
|
+
};
|
265
|
+
ordering.into_value()
|
266
|
+
}
|
267
|
+
}
|
268
|
+
|
269
269
|
impl IntoValue for Wrap<TimeUnit> {
|
270
270
|
fn into_value_with(self, _: &Ruby) -> Value {
|
271
271
|
let tu = match self.0 {
|
@@ -277,114 +277,6 @@ impl IntoValue for Wrap<TimeUnit> {
|
|
277
277
|
}
|
278
278
|
}
|
279
279
|
|
280
|
-
impl IntoValue for Wrap<&Utf8Chunked> {
|
281
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
282
|
-
let iter = self.0.into_iter();
|
283
|
-
RArray::from_iter(iter).into_value()
|
284
|
-
}
|
285
|
-
}
|
286
|
-
|
287
|
-
impl IntoValue for Wrap<&BinaryChunked> {
|
288
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
289
|
-
let iter = self
|
290
|
-
.0
|
291
|
-
.into_iter()
|
292
|
-
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
293
|
-
RArray::from_iter(iter).into_value()
|
294
|
-
}
|
295
|
-
}
|
296
|
-
|
297
|
-
impl IntoValue for Wrap<&StructChunked> {
|
298
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
299
|
-
let s = self.0.clone().into_series();
|
300
|
-
// todo! iterate its chunks and flatten.
|
301
|
-
// make series::iter() accept a chunk index.
|
302
|
-
let s = s.rechunk();
|
303
|
-
let iter = s.iter().map(|av| {
|
304
|
-
if let AnyValue::Struct(_, _, flds) = av {
|
305
|
-
struct_dict(av._iter_struct_av(), flds)
|
306
|
-
} else {
|
307
|
-
unreachable!()
|
308
|
-
}
|
309
|
-
});
|
310
|
-
|
311
|
-
RArray::from_iter(iter).into_value()
|
312
|
-
}
|
313
|
-
}
|
314
|
-
|
315
|
-
impl IntoValue for Wrap<&DurationChunked> {
|
316
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
317
|
-
let utils = utils();
|
318
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
319
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
320
|
-
opt_v.map(|v| {
|
321
|
-
utils
|
322
|
-
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
323
|
-
.unwrap()
|
324
|
-
})
|
325
|
-
});
|
326
|
-
RArray::from_iter(iter).into_value()
|
327
|
-
}
|
328
|
-
}
|
329
|
-
|
330
|
-
impl IntoValue for Wrap<&DatetimeChunked> {
|
331
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
332
|
-
let utils = utils();
|
333
|
-
let time_unit = Wrap(self.0.time_unit()).into_value();
|
334
|
-
let time_zone = self.0.time_zone().clone().into_value();
|
335
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
336
|
-
opt_v.map(|v| {
|
337
|
-
utils
|
338
|
-
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
339
|
-
.unwrap()
|
340
|
-
})
|
341
|
-
});
|
342
|
-
RArray::from_iter(iter).into_value()
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
impl IntoValue for Wrap<&TimeChunked> {
|
347
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
348
|
-
let utils = utils();
|
349
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
350
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
351
|
-
});
|
352
|
-
RArray::from_iter(iter).into_value()
|
353
|
-
}
|
354
|
-
}
|
355
|
-
|
356
|
-
impl IntoValue for Wrap<&DateChunked> {
|
357
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
358
|
-
let utils = utils();
|
359
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
360
|
-
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
361
|
-
});
|
362
|
-
RArray::from_iter(iter).into_value()
|
363
|
-
}
|
364
|
-
}
|
365
|
-
|
366
|
-
impl IntoValue for Wrap<&DecimalChunked> {
|
367
|
-
fn into_value_with(self, _: &Ruby) -> Value {
|
368
|
-
let utils = utils();
|
369
|
-
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
370
|
-
let iter = self.0.into_iter().map(|opt_v| {
|
371
|
-
opt_v.map(|v| {
|
372
|
-
utils
|
373
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
374
|
-
.unwrap()
|
375
|
-
})
|
376
|
-
});
|
377
|
-
RArray::from_iter(iter).into_value()
|
378
|
-
}
|
379
|
-
}
|
380
|
-
|
381
|
-
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
382
|
-
match digits.parse::<i128>() {
|
383
|
-
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
384
|
-
Err(_) => None,
|
385
|
-
}
|
386
|
-
}
|
387
|
-
|
388
280
|
impl TryConvert for Wrap<Field> {
|
389
281
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
390
282
|
let name: String = ob.funcall("name", ())?;
|
@@ -406,10 +298,11 @@ impl TryConvert for Wrap<DataType> {
|
|
406
298
|
"Polars::Int16" => DataType::Int16,
|
407
299
|
"Polars::Int32" => DataType::Int32,
|
408
300
|
"Polars::Int64" => DataType::Int64,
|
409
|
-
"Polars::
|
301
|
+
"Polars::String" => DataType::String,
|
410
302
|
"Polars::Binary" => DataType::Binary,
|
411
303
|
"Polars::Boolean" => DataType::Boolean,
|
412
|
-
"Polars::Categorical" => DataType::Categorical(None),
|
304
|
+
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
305
|
+
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
413
306
|
"Polars::Date" => DataType::Date,
|
414
307
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
415
308
|
"Polars::Time" => DataType::Time,
|
@@ -417,7 +310,7 @@ impl TryConvert for Wrap<DataType> {
|
|
417
310
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
418
311
|
"Polars::Float32" => DataType::Float32,
|
419
312
|
"Polars::Float64" => DataType::Float64,
|
420
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
313
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
421
314
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
422
315
|
"Polars::Null" => DataType::Null,
|
423
316
|
"Polars::Unknown" => DataType::Unknown,
|
@@ -431,6 +324,36 @@ impl TryConvert for Wrap<DataType> {
|
|
431
324
|
} else if String::try_convert(ob).is_err() {
|
432
325
|
let name = unsafe { ob.class().name() }.into_owned();
|
433
326
|
match name.as_str() {
|
327
|
+
"Polars::Int8" => DataType::Int8,
|
328
|
+
"Polars::Int16" => DataType::Int16,
|
329
|
+
"Polars::Int32" => DataType::Int32,
|
330
|
+
"Polars::Int64" => DataType::Int64,
|
331
|
+
"Polars::UInt8" => DataType::UInt8,
|
332
|
+
"Polars::UInt16" => DataType::UInt16,
|
333
|
+
"Polars::UInt32" => DataType::UInt32,
|
334
|
+
"Polars::UInt64" => DataType::UInt64,
|
335
|
+
"Polars::String" => DataType::String,
|
336
|
+
"Polars::Binary" => DataType::Binary,
|
337
|
+
"Polars::Boolean" => DataType::Boolean,
|
338
|
+
"Polars::Categorical" => {
|
339
|
+
let ordering = ob
|
340
|
+
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
341
|
+
.0;
|
342
|
+
DataType::Categorical(None, ordering)
|
343
|
+
}
|
344
|
+
"Polars::Enum" => {
|
345
|
+
let categories = ob.funcall("categories", ()).unwrap();
|
346
|
+
let s = get_series(categories)?;
|
347
|
+
let ca = s.str().map_err(RbPolarsErr::from)?;
|
348
|
+
let categories = ca.downcast_iter().next().unwrap().clone();
|
349
|
+
create_enum_data_type(categories)
|
350
|
+
}
|
351
|
+
"Polars::Date" => DataType::Date,
|
352
|
+
"Polars::Time" => DataType::Time,
|
353
|
+
"Polars::Float32" => DataType::Float32,
|
354
|
+
"Polars::Float64" => DataType::Float64,
|
355
|
+
"Polars::Null" => DataType::Null,
|
356
|
+
"Polars::Unknown" => DataType::Unknown,
|
434
357
|
"Polars::Duration" => {
|
435
358
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
436
359
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
@@ -477,17 +400,17 @@ impl TryConvert for Wrap<DataType> {
|
|
477
400
|
"i16" => DataType::Int16,
|
478
401
|
"i32" => DataType::Int32,
|
479
402
|
"i64" => DataType::Int64,
|
480
|
-
"str" => DataType::
|
403
|
+
"str" => DataType::String,
|
481
404
|
"bin" => DataType::Binary,
|
482
405
|
"bool" => DataType::Boolean,
|
483
|
-
"cat" => DataType::Categorical(None),
|
406
|
+
"cat" => DataType::Categorical(None, Default::default()),
|
484
407
|
"date" => DataType::Date,
|
485
408
|
"datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
486
409
|
"f32" => DataType::Float32,
|
487
410
|
"time" => DataType::Time,
|
488
411
|
"dur" => DataType::Duration(TimeUnit::Microseconds),
|
489
412
|
"f64" => DataType::Float64,
|
490
|
-
"obj" => DataType::Object(OBJECT_NAME),
|
413
|
+
"obj" => DataType::Object(OBJECT_NAME, None),
|
491
414
|
"list" => DataType::List(Box::new(DataType::Boolean)),
|
492
415
|
"null" => DataType::Null,
|
493
416
|
"unk" => DataType::Unknown,
|
@@ -503,102 +426,6 @@ impl TryConvert for Wrap<DataType> {
|
|
503
426
|
}
|
504
427
|
}
|
505
428
|
|
506
|
-
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
507
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
508
|
-
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
509
|
-
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
510
|
-
} else if let Some(v) = Integer::from_value(ob) {
|
511
|
-
Ok(AnyValue::Int64(v.to_i64()?).into())
|
512
|
-
} else if let Some(v) = Float::from_value(ob) {
|
513
|
-
Ok(AnyValue::Float64(v.to_f64()).into())
|
514
|
-
} else if let Some(v) = RString::from_value(ob) {
|
515
|
-
if v.enc_get() == Index::utf8() {
|
516
|
-
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
517
|
-
} else {
|
518
|
-
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
-
}
|
520
|
-
// call is_a? for ActiveSupport::TimeWithZone
|
521
|
-
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
522
|
-
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
523
|
-
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
524
|
-
let v = sec * 1_000_000_000 + nsec;
|
525
|
-
// TODO support time zone when possible
|
526
|
-
// https://github.com/pola-rs/polars/issues/9103
|
527
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
528
|
-
} else if ob.is_nil() {
|
529
|
-
Ok(AnyValue::Null.into())
|
530
|
-
} else if let Some(dict) = RHash::from_value(ob) {
|
531
|
-
let len = dict.len();
|
532
|
-
let mut keys = Vec::with_capacity(len);
|
533
|
-
let mut vals = Vec::with_capacity(len);
|
534
|
-
dict.foreach(|k: Value, v: Value| {
|
535
|
-
let key = String::try_convert(k)?;
|
536
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
537
|
-
let dtype = DataType::from(&val);
|
538
|
-
keys.push(Field::new(&key, dtype));
|
539
|
-
vals.push(val);
|
540
|
-
Ok(ForEach::Continue)
|
541
|
-
})?;
|
542
|
-
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
543
|
-
} else if let Some(v) = RArray::from_value(ob) {
|
544
|
-
if v.is_empty() {
|
545
|
-
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
546
|
-
} else {
|
547
|
-
let list = v;
|
548
|
-
|
549
|
-
let mut avs = Vec::with_capacity(25);
|
550
|
-
let mut iter = list.each();
|
551
|
-
|
552
|
-
for item in (&mut iter).take(25) {
|
553
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
554
|
-
}
|
555
|
-
|
556
|
-
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
-
|
558
|
-
// push the rest
|
559
|
-
avs.reserve(list.len());
|
560
|
-
for item in iter {
|
561
|
-
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
562
|
-
}
|
563
|
-
|
564
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
565
|
-
.map_err(RbPolarsErr::from)?;
|
566
|
-
Ok(Wrap(AnyValue::List(s)))
|
567
|
-
}
|
568
|
-
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
-
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
-
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
-
Ok(Wrap(AnyValue::Datetime(
|
572
|
-
sec * 1_000_000_000 + nsec,
|
573
|
-
TimeUnit::Nanoseconds,
|
574
|
-
&None,
|
575
|
-
)))
|
576
|
-
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
577
|
-
// convert to DateTime for UTC
|
578
|
-
let v = ob
|
579
|
-
.funcall::<_, _, Value>("to_datetime", ())?
|
580
|
-
.funcall::<_, _, Value>("to_time", ())?
|
581
|
-
.funcall::<_, _, i64>("to_i", ())?;
|
582
|
-
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
-
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
-
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
585
|
-
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
586
|
-
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
587
|
-
})?;
|
588
|
-
if sign < 0 {
|
589
|
-
// TODO better error
|
590
|
-
v = v.checked_neg().unwrap();
|
591
|
-
}
|
592
|
-
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
593
|
-
} else {
|
594
|
-
Err(RbPolarsErr::other(format!(
|
595
|
-
"object type not supported {:?}",
|
596
|
-
ob
|
597
|
-
)))
|
598
|
-
}
|
599
|
-
}
|
600
|
-
}
|
601
|
-
|
602
429
|
impl<'s> TryConvert for Wrap<Row<'s>> {
|
603
430
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
604
431
|
let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
|
@@ -662,6 +489,15 @@ impl TotalEq for ObjectValue {
|
|
662
489
|
}
|
663
490
|
}
|
664
491
|
|
492
|
+
impl TotalHash for ObjectValue {
|
493
|
+
fn tot_hash<H>(&self, state: &mut H)
|
494
|
+
where
|
495
|
+
H: Hasher,
|
496
|
+
{
|
497
|
+
self.hash(state);
|
498
|
+
}
|
499
|
+
}
|
500
|
+
|
665
501
|
impl Display for ObjectValue {
|
666
502
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
667
503
|
write!(f, "{}", self.to_object())
|
@@ -715,24 +551,33 @@ impl Default for ObjectValue {
|
|
715
551
|
|
716
552
|
pub(crate) fn dicts_to_rows(
|
717
553
|
records: &Value,
|
718
|
-
infer_schema_len: usize
|
554
|
+
infer_schema_len: Option<usize>,
|
555
|
+
schema_columns: PlIndexSet<String>,
|
719
556
|
) -> RbResult<(Vec<Row>, Vec<String>)> {
|
557
|
+
let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
|
720
558
|
let (dicts, len) = get_rbseq(*records)?;
|
721
559
|
|
722
|
-
let
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
560
|
+
let key_names = {
|
561
|
+
if !schema_columns.is_empty() {
|
562
|
+
schema_columns
|
563
|
+
} else {
|
564
|
+
let mut inferred_keys = PlIndexSet::new();
|
565
|
+
for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
|
566
|
+
let d = d?;
|
567
|
+
let d = RHash::try_convert(d)?;
|
568
|
+
|
569
|
+
d.foreach(|name: Value, _value: Value| {
|
570
|
+
if let Some(v) = Symbol::from_value(name) {
|
571
|
+
inferred_keys.insert(v.name()?.into());
|
572
|
+
} else {
|
573
|
+
inferred_keys.insert(String::try_convert(name)?);
|
574
|
+
};
|
575
|
+
Ok(ForEach::Continue)
|
576
|
+
})?;
|
577
|
+
}
|
578
|
+
inferred_keys
|
579
|
+
}
|
580
|
+
};
|
736
581
|
|
737
582
|
let mut rows = Vec::with_capacity(len);
|
738
583
|
|
@@ -891,7 +736,8 @@ impl TryConvert for Wrap<JoinType> {
|
|
891
736
|
let parsed = match String::try_convert(ob)?.as_str() {
|
892
737
|
"inner" => JoinType::Inner,
|
893
738
|
"left" => JoinType::Left,
|
894
|
-
"outer" => JoinType::Outer,
|
739
|
+
"outer" => JoinType::Outer { coalesce: false },
|
740
|
+
"outer_coalesce" => JoinType::Outer { coalesce: true },
|
895
741
|
"semi" => JoinType::Semi,
|
896
742
|
"anti" => JoinType::Anti,
|
897
743
|
// #[cfg(feature = "cross_join")]
|
@@ -1061,6 +907,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
1061
907
|
}
|
1062
908
|
}
|
1063
909
|
|
910
|
+
impl TryConvert for Wrap<IpcCompression> {
|
911
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
912
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
913
|
+
"lz4" => IpcCompression::LZ4,
|
914
|
+
"zstd" => IpcCompression::ZSTD,
|
915
|
+
v => {
|
916
|
+
return Err(RbValueError::new_err(format!(
|
917
|
+
"compression must be one of {{'lz4', 'zstd'}}, got {}",
|
918
|
+
v
|
919
|
+
)))
|
920
|
+
}
|
921
|
+
};
|
922
|
+
Ok(Wrap(parsed))
|
923
|
+
}
|
924
|
+
}
|
925
|
+
|
1064
926
|
impl TryConvert for Wrap<SearchSortedSide> {
|
1065
927
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
1066
928
|
let parsed = match String::try_convert(ob)?.as_str() {
|
@@ -1077,6 +939,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
1077
939
|
}
|
1078
940
|
}
|
1079
941
|
|
942
|
+
impl TryConvert for Wrap<WindowMapping> {
|
943
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
944
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
945
|
+
"group_to_rows" => WindowMapping::GroupsToRows,
|
946
|
+
"join" => WindowMapping::Join,
|
947
|
+
"explode" => WindowMapping::Explode,
|
948
|
+
v => {
|
949
|
+
return Err(RbValueError::new_err(format!(
|
950
|
+
"`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
|
951
|
+
)))
|
952
|
+
}
|
953
|
+
};
|
954
|
+
Ok(Wrap(parsed))
|
955
|
+
}
|
956
|
+
}
|
957
|
+
|
958
|
+
impl TryConvert for Wrap<JoinValidation> {
|
959
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
960
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
961
|
+
"1:1" => JoinValidation::OneToOne,
|
962
|
+
"1:m" => JoinValidation::OneToMany,
|
963
|
+
"m:m" => JoinValidation::ManyToMany,
|
964
|
+
"m:1" => JoinValidation::ManyToOne,
|
965
|
+
v => {
|
966
|
+
return Err(RbValueError::new_err(format!(
|
967
|
+
"`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
|
968
|
+
)))
|
969
|
+
}
|
970
|
+
};
|
971
|
+
Ok(Wrap(parsed))
|
972
|
+
}
|
973
|
+
}
|
974
|
+
|
975
|
+
impl TryConvert for Wrap<QuoteStyle> {
|
976
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
977
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
978
|
+
"always" => QuoteStyle::Always,
|
979
|
+
"necessary" => QuoteStyle::Necessary,
|
980
|
+
"non_numeric" => QuoteStyle::NonNumeric,
|
981
|
+
"never" => QuoteStyle::Never,
|
982
|
+
v => {
|
983
|
+
return Err(RbValueError::new_err(format!(
|
984
|
+
"`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
|
985
|
+
)))
|
986
|
+
},
|
987
|
+
};
|
988
|
+
Ok(Wrap(parsed))
|
989
|
+
}
|
990
|
+
}
|
991
|
+
|
1080
992
|
pub fn parse_fill_null_strategy(
|
1081
993
|
strategy: &str,
|
1082
994
|
limit: FillNullLimit,
|
@@ -1149,3 +1061,12 @@ where
|
|
1149
1061
|
{
|
1150
1062
|
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1151
1063
|
}
|
1064
|
+
|
1065
|
+
impl TryConvert for Wrap<NonZeroUsize> {
|
1066
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1067
|
+
let v = usize::try_convert(ob)?;
|
1068
|
+
NonZeroUsize::new(v)
|
1069
|
+
.map(|v| Wrap(v))
|
1070
|
+
.ok_or(RbValueError::new_err("must be non-zero".into()))
|
1071
|
+
}
|
1072
|
+
}
|