polars-df 0.4.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/README.md +6 -5
- data/ext/polars/Cargo.toml +10 -5
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +8 -3
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +269 -59
- data/ext/polars/src/dataframe.rs +38 -40
- data/ext/polars/src/error.rs +6 -2
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +963 -0
- data/ext/polars/src/expr/list.rs +151 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +314 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +34 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +249 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +223 -316
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +12 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +190 -0
- data/ext/polars/src/series.rs +151 -551
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +29 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml
CHANGED
@@ -3,7 +3,6 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
6
|
|
8
7
|
[profile.release]
|
9
8
|
strip = true
|
data/README.md
CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
29
|
|
30
30
|
## Reference
|
31
31
|
|
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
|
|
58
58
|
From Active Record
|
59
59
|
|
60
60
|
```ruby
|
61
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
62
|
# or
|
63
|
-
Polars.
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
64
|
```
|
65
65
|
|
66
66
|
From JSON
|
@@ -348,7 +348,7 @@ df.to_numo
|
|
348
348
|
You can specify column types when creating a data frame
|
349
349
|
|
350
350
|
```ruby
|
351
|
-
Polars::DataFrame.new(data,
|
351
|
+
Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
|
352
352
|
```
|
353
353
|
|
354
354
|
Supported types are:
|
@@ -357,8 +357,9 @@ Supported types are:
|
|
357
357
|
- float - `Float64`, `Float32`
|
358
358
|
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
|
-
- string - `Utf8`, `Categorical`
|
360
|
+
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
+
- other - `Object`, `List`, `Struct`, `Array` [unreleased]
|
362
363
|
|
363
364
|
Get column types
|
364
365
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.6.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,23 +11,24 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
+
chrono = "=0.4.24"
|
14
15
|
magnus = "0.5"
|
15
|
-
polars-core = "0.
|
16
|
+
polars-core = "0.31.1"
|
16
17
|
serde_json = "1"
|
17
18
|
smartstring = "1"
|
18
19
|
|
19
20
|
[dependencies.polars]
|
20
|
-
version = "0.
|
21
|
+
version = "0.31.1"
|
21
22
|
features = [
|
22
23
|
"abs",
|
23
|
-
"
|
24
|
+
"approx_unique",
|
24
25
|
"arg_where",
|
25
26
|
"asof_join",
|
26
27
|
"avro",
|
27
28
|
"binary_encoding",
|
28
29
|
"concat_str",
|
29
30
|
"cse",
|
30
|
-
"csv
|
31
|
+
"csv",
|
31
32
|
"cum_agg",
|
32
33
|
"cumulative_eval",
|
33
34
|
"dataframe_arithmetic",
|
@@ -49,7 +50,9 @@ features = [
|
|
49
50
|
"json",
|
50
51
|
"lazy",
|
51
52
|
"lazy_regex",
|
53
|
+
"list_count",
|
52
54
|
"list_eval",
|
55
|
+
"list_take",
|
53
56
|
"list_to_struct",
|
54
57
|
"log",
|
55
58
|
"meta",
|
@@ -65,6 +68,7 @@ features = [
|
|
65
68
|
"propagate_nans",
|
66
69
|
"random",
|
67
70
|
"rank",
|
71
|
+
"range",
|
68
72
|
"reinterpret",
|
69
73
|
"repeat_by",
|
70
74
|
"rolling_window",
|
@@ -75,6 +79,7 @@ features = [
|
|
75
79
|
"serde-lazy",
|
76
80
|
"sign",
|
77
81
|
"string_encoding",
|
82
|
+
"string_from_radix",
|
78
83
|
"string_justify",
|
79
84
|
"strings",
|
80
85
|
"timezones",
|
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
202
202
|
}
|
203
203
|
|
204
204
|
/// Apply a lambda with list output type
|
205
|
-
pub fn apply_lambda_with_list_out_type
|
206
|
-
df: &
|
205
|
+
pub fn apply_lambda_with_list_out_type(
|
206
|
+
df: &DataFrame,
|
207
207
|
lambda: Value,
|
208
208
|
init_null_count: usize,
|
209
209
|
first_value: Option<&Series>,
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
pub mod dataframe;
|
2
|
+
pub mod lazy;
|
2
3
|
pub mod series;
|
3
4
|
|
4
5
|
use magnus::{RHash, Value};
|
@@ -236,15 +237,19 @@ fn iterator_to_list(
|
|
236
237
|
for _ in 0..init_null_count {
|
237
238
|
builder.append_null()
|
238
239
|
}
|
239
|
-
builder
|
240
|
+
builder
|
241
|
+
.append_opt_series(first_value)
|
242
|
+
.map_err(RbPolarsErr::from)?;
|
240
243
|
for opt_val in it {
|
241
244
|
match opt_val {
|
242
245
|
None => builder.append_null(),
|
243
246
|
Some(s) => {
|
244
247
|
if s.len() == 0 && s.dtype() != dt {
|
245
|
-
builder
|
248
|
+
builder
|
249
|
+
.append_series(&Series::full_null("", 0, dt))
|
250
|
+
.unwrap()
|
246
251
|
} else {
|
247
|
-
builder.append_series(&s)
|
252
|
+
builder.append_series(&s).map_err(RbPolarsErr::from)?
|
248
253
|
}
|
249
254
|
}
|
250
255
|
}
|
@@ -68,11 +68,13 @@ impl RbBatchedCsv {
|
|
68
68
|
};
|
69
69
|
|
70
70
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
overwrite_dtype
|
72
|
+
.iter()
|
73
|
+
.map(|(name, dtype)| {
|
74
|
+
let dtype = dtype.0.clone();
|
75
|
+
Field::new(name, dtype)
|
76
|
+
})
|
77
|
+
.collect::<Schema>()
|
76
78
|
});
|
77
79
|
|
78
80
|
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
@@ -1,9 +1,10 @@
|
|
1
1
|
use std::fmt::{Display, Formatter};
|
2
2
|
use std::hash::{Hash, Hasher};
|
3
3
|
|
4
|
+
use magnus::encoding::{EncodingCapable, Index};
|
4
5
|
use magnus::{
|
5
|
-
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module,
|
6
|
-
|
6
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
|
7
|
+
RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
7
8
|
};
|
8
9
|
use polars::chunked_array::object::PolarsObjectSafe;
|
9
10
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -15,7 +16,9 @@ use polars::prelude::*;
|
|
15
16
|
use polars::series::ops::NullBehavior;
|
16
17
|
use smartstring::alias::String as SmartString;
|
17
18
|
|
18
|
-
use crate::
|
19
|
+
use crate::object::OBJECT_NAME;
|
20
|
+
use crate::rb_modules::utils;
|
21
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
19
22
|
|
20
23
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
21
24
|
// Safety:
|
@@ -23,6 +26,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
23
26
|
unsafe { std::mem::transmute(slice) }
|
24
27
|
}
|
25
28
|
|
29
|
+
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
30
|
+
// Safety:
|
31
|
+
// Wrap is transparent.
|
32
|
+
unsafe { std::mem::transmute(slice) }
|
33
|
+
}
|
34
|
+
|
26
35
|
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
27
36
|
// Safety:
|
28
37
|
// Wrap is transparent.
|
@@ -141,48 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
141
150
|
AnyValue::Boolean(v) => Value::from(v),
|
142
151
|
AnyValue::Utf8(v) => Value::from(v),
|
143
152
|
AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
|
144
|
-
AnyValue::Categorical(
|
145
|
-
|
146
|
-
|
147
|
-
.unwrap()
|
148
|
-
.funcall::<_, _, Value>("utc", ())
|
149
|
-
.unwrap()
|
150
|
-
.funcall::<_, _, Value>("to_date", ())
|
151
|
-
.unwrap(),
|
152
|
-
AnyValue::Datetime(v, tu, tz) => {
|
153
|
-
let t = match tu {
|
154
|
-
TimeUnit::Nanoseconds => {
|
155
|
-
let sec = v / 1000000000;
|
156
|
-
let subsec = v % 1000000000;
|
157
|
-
class::time()
|
158
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
159
|
-
.unwrap()
|
160
|
-
}
|
161
|
-
TimeUnit::Microseconds => {
|
162
|
-
let sec = v / 1000000;
|
163
|
-
let subsec = v % 1000000;
|
164
|
-
class::time()
|
165
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
166
|
-
.unwrap()
|
167
|
-
}
|
168
|
-
TimeUnit::Milliseconds => {
|
169
|
-
let sec = v / 1000;
|
170
|
-
let subsec = v % 1000;
|
171
|
-
class::time()
|
172
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
173
|
-
.unwrap()
|
174
|
-
}
|
175
|
-
};
|
176
|
-
|
177
|
-
if tz.is_some() {
|
178
|
-
todo!();
|
153
|
+
AnyValue::Categorical(idx, rev, arr) => {
|
154
|
+
let s = if arr.is_null() {
|
155
|
+
rev.get(idx)
|
179
156
|
} else {
|
180
|
-
|
181
|
-
}
|
157
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
158
|
+
};
|
159
|
+
s.into_value()
|
160
|
+
}
|
161
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
162
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
163
|
+
let time_unit = time_unit.to_ascii();
|
164
|
+
utils()
|
165
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
166
|
+
.unwrap()
|
167
|
+
}
|
168
|
+
AnyValue::Duration(v, time_unit) => {
|
169
|
+
let time_unit = time_unit.to_ascii();
|
170
|
+
utils()
|
171
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
172
|
+
.unwrap()
|
182
173
|
}
|
183
|
-
AnyValue::
|
184
|
-
AnyValue::
|
185
|
-
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
174
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
175
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
186
176
|
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
187
177
|
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
188
178
|
AnyValue::Object(v) => {
|
@@ -195,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
195
185
|
}
|
196
186
|
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
197
187
|
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
198
|
-
AnyValue::Decimal(
|
188
|
+
AnyValue::Decimal(v, scale) => utils()
|
189
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
190
|
+
.unwrap(),
|
199
191
|
}
|
200
192
|
}
|
201
193
|
}
|
@@ -215,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
|
|
215
207
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
216
208
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
217
209
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
218
|
-
DataType::Decimal(
|
210
|
+
DataType::Decimal(precision, scale) => {
|
211
|
+
let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
|
212
|
+
decimal_class
|
213
|
+
.funcall::<_, _, Value>("new", (precision, scale))
|
214
|
+
.unwrap()
|
215
|
+
}
|
219
216
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
220
217
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
221
218
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
219
|
+
DataType::Array(inner, size) => {
|
220
|
+
let inner = Wrap(*inner);
|
221
|
+
let list_class = pl.const_get::<_, Value>("Array").unwrap();
|
222
|
+
list_class
|
223
|
+
.funcall::<_, _, Value>("new", (size, inner))
|
224
|
+
.unwrap()
|
225
|
+
}
|
222
226
|
DataType::List(inner) => {
|
223
227
|
let inner = Wrap(*inner);
|
224
228
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
@@ -261,6 +265,133 @@ impl IntoValue for Wrap<DataType> {
|
|
261
265
|
}
|
262
266
|
}
|
263
267
|
|
268
|
+
impl IntoValue for Wrap<TimeUnit> {
|
269
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
270
|
+
let tu = match self.0 {
|
271
|
+
TimeUnit::Nanoseconds => "ns",
|
272
|
+
TimeUnit::Microseconds => "us",
|
273
|
+
TimeUnit::Milliseconds => "ms",
|
274
|
+
};
|
275
|
+
tu.into_value()
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
impl IntoValue for Wrap<&Utf8Chunked> {
|
280
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
281
|
+
let iter = self.0.into_iter();
|
282
|
+
RArray::from_iter(iter).into_value()
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
287
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
288
|
+
let iter = self
|
289
|
+
.0
|
290
|
+
.into_iter()
|
291
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
292
|
+
RArray::from_iter(iter).into_value()
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
impl IntoValue for Wrap<&StructChunked> {
|
297
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
298
|
+
let s = self.0.clone().into_series();
|
299
|
+
// todo! iterate its chunks and flatten.
|
300
|
+
// make series::iter() accept a chunk index.
|
301
|
+
let s = s.rechunk();
|
302
|
+
let iter = s.iter().map(|av| {
|
303
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
304
|
+
struct_dict(av._iter_struct_av(), flds)
|
305
|
+
} else {
|
306
|
+
unreachable!()
|
307
|
+
}
|
308
|
+
});
|
309
|
+
|
310
|
+
RArray::from_iter(iter).into_value()
|
311
|
+
}
|
312
|
+
}
|
313
|
+
|
314
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
315
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
316
|
+
let utils = utils();
|
317
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
318
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
319
|
+
opt_v.map(|v| {
|
320
|
+
utils
|
321
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
322
|
+
.unwrap()
|
323
|
+
})
|
324
|
+
});
|
325
|
+
RArray::from_iter(iter).into_value()
|
326
|
+
}
|
327
|
+
}
|
328
|
+
|
329
|
+
impl IntoValue for Wrap<&DatetimeChunked> {
|
330
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
331
|
+
let utils = utils();
|
332
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
333
|
+
let time_zone = self.0.time_zone().clone().into_value();
|
334
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
335
|
+
opt_v.map(|v| {
|
336
|
+
utils
|
337
|
+
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
338
|
+
.unwrap()
|
339
|
+
})
|
340
|
+
});
|
341
|
+
RArray::from_iter(iter).into_value()
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
impl IntoValue for Wrap<&TimeChunked> {
|
346
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
347
|
+
let utils = utils();
|
348
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
349
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
350
|
+
});
|
351
|
+
RArray::from_iter(iter).into_value()
|
352
|
+
}
|
353
|
+
}
|
354
|
+
|
355
|
+
impl IntoValue for Wrap<&DateChunked> {
|
356
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
357
|
+
let utils = utils();
|
358
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
359
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
360
|
+
});
|
361
|
+
RArray::from_iter(iter).into_value()
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
impl IntoValue for Wrap<&DecimalChunked> {
|
366
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
367
|
+
let utils = utils();
|
368
|
+
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
369
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
370
|
+
opt_v.map(|v| {
|
371
|
+
utils
|
372
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
373
|
+
.unwrap()
|
374
|
+
})
|
375
|
+
});
|
376
|
+
RArray::from_iter(iter).into_value()
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
381
|
+
match digits.parse::<i128>() {
|
382
|
+
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
383
|
+
Err(_) => None,
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
impl TryConvert for Wrap<Field> {
|
388
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
389
|
+
let name: String = ob.funcall("name", ())?;
|
390
|
+
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
391
|
+
Ok(Wrap(Field::new(&name, dtype.0)))
|
392
|
+
}
|
393
|
+
}
|
394
|
+
|
264
395
|
impl TryConvert for Wrap<DataType> {
|
265
396
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
266
397
|
let dtype = if ob.is_kind_of(class::class()) {
|
@@ -282,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
|
|
282
413
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
283
414
|
"Polars::Time" => DataType::Time,
|
284
415
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
416
|
+
"Polars::Decimal" => DataType::Decimal(None, None),
|
285
417
|
"Polars::Float32" => DataType::Float32,
|
286
418
|
"Polars::Float64" => DataType::Float64,
|
287
|
-
|
288
|
-
"Polars::List" => DataType::List(Box::new(DataType::
|
419
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
420
|
+
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
289
421
|
"Polars::Null" => DataType::Null,
|
290
422
|
"Polars::Unknown" => DataType::Unknown,
|
291
423
|
dt => {
|
@@ -294,6 +426,47 @@ impl TryConvert for Wrap<DataType> {
|
|
294
426
|
)))
|
295
427
|
}
|
296
428
|
}
|
429
|
+
// TODO improve
|
430
|
+
} else if ob.try_convert::<String>().is_err() {
|
431
|
+
let name = unsafe { ob.class().name() }.into_owned();
|
432
|
+
match name.as_str() {
|
433
|
+
"Polars::Duration" => {
|
434
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
435
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
436
|
+
DataType::Duration(time_unit)
|
437
|
+
}
|
438
|
+
"Polars::Datetime" => {
|
439
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
440
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
441
|
+
let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
|
442
|
+
let time_zone = time_zone.try_convert()?;
|
443
|
+
DataType::Datetime(time_unit, time_zone)
|
444
|
+
}
|
445
|
+
"Polars::Decimal" => {
|
446
|
+
let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
|
447
|
+
let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
|
448
|
+
DataType::Decimal(precision, Some(scale))
|
449
|
+
}
|
450
|
+
"Polars::List" => {
|
451
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
452
|
+
let inner = inner.try_convert::<Wrap<DataType>>()?;
|
453
|
+
DataType::List(Box::new(inner.0))
|
454
|
+
}
|
455
|
+
"Polars::Struct" => {
|
456
|
+
let arr: RArray = ob.funcall("fields", ())?;
|
457
|
+
let mut fields = Vec::with_capacity(arr.len());
|
458
|
+
for v in arr.each() {
|
459
|
+
fields.push(v?.try_convert::<Wrap<Field>>()?.0);
|
460
|
+
}
|
461
|
+
DataType::Struct(fields)
|
462
|
+
}
|
463
|
+
dt => {
|
464
|
+
return Err(RbTypeError::new_err(format!(
|
465
|
+
"A {dt} object is not a correct polars DataType. \
|
466
|
+
Hint: use the class without instantiating it.",
|
467
|
+
)))
|
468
|
+
}
|
469
|
+
}
|
297
470
|
} else {
|
298
471
|
match ob.try_convert::<String>()?.as_str() {
|
299
472
|
"u8" => DataType::UInt8,
|
@@ -336,16 +509,21 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
336
509
|
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
337
510
|
} else if let Some(v) = Integer::from_value(ob) {
|
338
511
|
Ok(AnyValue::Int64(v.to_i64()?).into())
|
339
|
-
} else if let Some(v) =
|
512
|
+
} else if let Some(v) = Float::from_value(ob) {
|
340
513
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
341
514
|
} else if let Some(v) = RString::from_value(ob) {
|
342
|
-
|
515
|
+
if v.enc_get() == Index::utf8() {
|
516
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
517
|
+
} else {
|
518
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
+
}
|
343
520
|
// call is_a? for ActiveSupport::TimeWithZone
|
344
521
|
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
345
522
|
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
346
523
|
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
347
524
|
let v = sec * 1_000_000_000 + nsec;
|
348
|
-
// TODO support time zone
|
525
|
+
// TODO support time zone when possible
|
526
|
+
// https://github.com/pola-rs/polars/issues/9103
|
349
527
|
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
350
528
|
} else if ob.is_nil() {
|
351
529
|
Ok(AnyValue::Null.into())
|
@@ -366,17 +544,35 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
366
544
|
if v.is_empty() {
|
367
545
|
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
368
546
|
} else {
|
369
|
-
let
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
.
|
547
|
+
let list = v;
|
548
|
+
|
549
|
+
let mut avs = Vec::with_capacity(25);
|
550
|
+
let mut iter = list.each();
|
551
|
+
|
552
|
+
for item in (&mut iter).take(25) {
|
553
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
554
|
+
}
|
555
|
+
|
556
|
+
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
+
|
558
|
+
// push the rest
|
559
|
+
avs.reserve(list.len());
|
560
|
+
for item in iter {
|
561
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
562
|
+
}
|
563
|
+
|
376
564
|
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
377
565
|
.map_err(RbPolarsErr::from)?;
|
378
566
|
Ok(Wrap(AnyValue::List(s)))
|
379
567
|
}
|
568
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
+
Ok(Wrap(AnyValue::Datetime(
|
572
|
+
sec * 1_000_000_000 + nsec,
|
573
|
+
TimeUnit::Nanoseconds,
|
574
|
+
&None,
|
575
|
+
)))
|
380
576
|
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
381
577
|
// convert to DateTime for UTC
|
382
578
|
let v = ob
|
@@ -384,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
384
580
|
.funcall::<_, _, Value>("to_time", ())?
|
385
581
|
.funcall::<_, _, i64>("to_i", ())?;
|
386
582
|
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob
|
585
|
+
.funcall::<_, _, Value>("split", ())
|
586
|
+
.unwrap()
|
587
|
+
.try_convert()
|
588
|
+
.unwrap();
|
589
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
590
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
591
|
+
})?;
|
592
|
+
if sign < 0 {
|
593
|
+
// TODO better error
|
594
|
+
v = v.checked_neg().unwrap();
|
595
|
+
}
|
596
|
+
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
387
597
|
} else {
|
388
598
|
Err(RbPolarsErr::other(format!(
|
389
599
|
"object type not supported {:?}",
|
@@ -410,12 +620,12 @@ impl TryConvert for Wrap<Schema> {
|
|
410
620
|
|
411
621
|
let mut schema = Vec::new();
|
412
622
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
413
|
-
schema.push(Field::new(&key, val.0));
|
623
|
+
schema.push(Ok(Field::new(&key, val.0)));
|
414
624
|
Ok(ForEach::Continue)
|
415
625
|
})
|
416
626
|
.unwrap();
|
417
627
|
|
418
|
-
Ok(Wrap(schema.into_iter().
|
628
|
+
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
419
629
|
}
|
420
630
|
}
|
421
631
|
|