polars-df 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/README.md +6 -5
- data/ext/polars/Cargo.toml +10 -5
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +8 -3
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +269 -59
- data/ext/polars/src/dataframe.rs +38 -40
- data/ext/polars/src/error.rs +6 -2
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +963 -0
- data/ext/polars/src/expr/list.rs +151 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +314 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +34 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +249 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +223 -316
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +12 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +190 -0
- data/ext/polars/src/series.rs +151 -551
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +29 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml
CHANGED
@@ -3,7 +3,6 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
6
|
|
8
7
|
[profile.release]
|
9
8
|
strip = true
|
data/README.md
CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
|
|
25
25
|
.collect
|
26
26
|
```
|
27
27
|
|
28
|
-
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/
|
28
|
+
You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
29
29
|
|
30
30
|
## Reference
|
31
31
|
|
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
|
|
58
58
|
From Active Record
|
59
59
|
|
60
60
|
```ruby
|
61
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
62
|
# or
|
63
|
-
Polars.
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
64
|
```
|
65
65
|
|
66
66
|
From JSON
|
@@ -348,7 +348,7 @@ df.to_numo
|
|
348
348
|
You can specify column types when creating a data frame
|
349
349
|
|
350
350
|
```ruby
|
351
|
-
Polars::DataFrame.new(data,
|
351
|
+
Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
|
352
352
|
```
|
353
353
|
|
354
354
|
Supported types are:
|
@@ -357,8 +357,9 @@ Supported types are:
|
|
357
357
|
- float - `Float64`, `Float32`
|
358
358
|
- integer - `Int64`, `Int32`, `Int16`, `Int8`
|
359
359
|
- unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
|
360
|
-
- string - `Utf8`, `Categorical`
|
360
|
+
- string - `Utf8`, `Binary`, `Categorical`
|
361
361
|
- temporal - `Date`, `Datetime`, `Time`, `Duration`
|
362
|
+
- other - `Object`, `List`, `Struct`, `Array` [unreleased]
|
362
363
|
|
363
364
|
Get column types
|
364
365
|
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.6.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -11,23 +11,24 @@ crate-type = ["cdylib"]
|
|
11
11
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
|
+
chrono = "=0.4.24"
|
14
15
|
magnus = "0.5"
|
15
|
-
polars-core = "0.
|
16
|
+
polars-core = "0.31.1"
|
16
17
|
serde_json = "1"
|
17
18
|
smartstring = "1"
|
18
19
|
|
19
20
|
[dependencies.polars]
|
20
|
-
version = "0.
|
21
|
+
version = "0.31.1"
|
21
22
|
features = [
|
22
23
|
"abs",
|
23
|
-
"
|
24
|
+
"approx_unique",
|
24
25
|
"arg_where",
|
25
26
|
"asof_join",
|
26
27
|
"avro",
|
27
28
|
"binary_encoding",
|
28
29
|
"concat_str",
|
29
30
|
"cse",
|
30
|
-
"csv
|
31
|
+
"csv",
|
31
32
|
"cum_agg",
|
32
33
|
"cumulative_eval",
|
33
34
|
"dataframe_arithmetic",
|
@@ -49,7 +50,9 @@ features = [
|
|
49
50
|
"json",
|
50
51
|
"lazy",
|
51
52
|
"lazy_regex",
|
53
|
+
"list_count",
|
52
54
|
"list_eval",
|
55
|
+
"list_take",
|
53
56
|
"list_to_struct",
|
54
57
|
"log",
|
55
58
|
"meta",
|
@@ -65,6 +68,7 @@ features = [
|
|
65
68
|
"propagate_nans",
|
66
69
|
"random",
|
67
70
|
"rank",
|
71
|
+
"range",
|
68
72
|
"reinterpret",
|
69
73
|
"repeat_by",
|
70
74
|
"rolling_window",
|
@@ -75,6 +79,7 @@ features = [
|
|
75
79
|
"serde-lazy",
|
76
80
|
"sign",
|
77
81
|
"string_encoding",
|
82
|
+
"string_from_radix",
|
78
83
|
"string_justify",
|
79
84
|
"strings",
|
80
85
|
"timezones",
|
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
202
202
|
}
|
203
203
|
|
204
204
|
/// Apply a lambda with list output type
|
205
|
-
pub fn apply_lambda_with_list_out_type
|
206
|
-
df: &
|
205
|
+
pub fn apply_lambda_with_list_out_type(
|
206
|
+
df: &DataFrame,
|
207
207
|
lambda: Value,
|
208
208
|
init_null_count: usize,
|
209
209
|
first_value: Option<&Series>,
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
pub mod dataframe;
|
2
|
+
pub mod lazy;
|
2
3
|
pub mod series;
|
3
4
|
|
4
5
|
use magnus::{RHash, Value};
|
@@ -236,15 +237,19 @@ fn iterator_to_list(
|
|
236
237
|
for _ in 0..init_null_count {
|
237
238
|
builder.append_null()
|
238
239
|
}
|
239
|
-
builder
|
240
|
+
builder
|
241
|
+
.append_opt_series(first_value)
|
242
|
+
.map_err(RbPolarsErr::from)?;
|
240
243
|
for opt_val in it {
|
241
244
|
match opt_val {
|
242
245
|
None => builder.append_null(),
|
243
246
|
Some(s) => {
|
244
247
|
if s.len() == 0 && s.dtype() != dt {
|
245
|
-
builder
|
248
|
+
builder
|
249
|
+
.append_series(&Series::full_null("", 0, dt))
|
250
|
+
.unwrap()
|
246
251
|
} else {
|
247
|
-
builder.append_series(&s)
|
252
|
+
builder.append_series(&s).map_err(RbPolarsErr::from)?
|
248
253
|
}
|
249
254
|
}
|
250
255
|
}
|
@@ -68,11 +68,13 @@ impl RbBatchedCsv {
|
|
68
68
|
};
|
69
69
|
|
70
70
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
71
|
+
overwrite_dtype
|
72
|
+
.iter()
|
73
|
+
.map(|(name, dtype)| {
|
74
|
+
let dtype = dtype.0.clone();
|
75
|
+
Field::new(name, dtype)
|
76
|
+
})
|
77
|
+
.collect::<Schema>()
|
76
78
|
});
|
77
79
|
|
78
80
|
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
@@ -1,9 +1,10 @@
|
|
1
1
|
use std::fmt::{Display, Formatter};
|
2
2
|
use std::hash::{Hash, Hasher};
|
3
3
|
|
4
|
+
use magnus::encoding::{EncodingCapable, Index};
|
4
5
|
use magnus::{
|
5
|
-
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module,
|
6
|
-
|
6
|
+
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
|
7
|
+
RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
7
8
|
};
|
8
9
|
use polars::chunked_array::object::PolarsObjectSafe;
|
9
10
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -15,7 +16,9 @@ use polars::prelude::*;
|
|
15
16
|
use polars::series::ops::NullBehavior;
|
16
17
|
use smartstring::alias::String as SmartString;
|
17
18
|
|
18
|
-
use crate::
|
19
|
+
use crate::object::OBJECT_NAME;
|
20
|
+
use crate::rb_modules::utils;
|
21
|
+
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
19
22
|
|
20
23
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
21
24
|
// Safety:
|
@@ -23,6 +26,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
23
26
|
unsafe { std::mem::transmute(slice) }
|
24
27
|
}
|
25
28
|
|
29
|
+
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
30
|
+
// Safety:
|
31
|
+
// Wrap is transparent.
|
32
|
+
unsafe { std::mem::transmute(slice) }
|
33
|
+
}
|
34
|
+
|
26
35
|
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
27
36
|
// Safety:
|
28
37
|
// Wrap is transparent.
|
@@ -141,48 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
141
150
|
AnyValue::Boolean(v) => Value::from(v),
|
142
151
|
AnyValue::Utf8(v) => Value::from(v),
|
143
152
|
AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
|
144
|
-
AnyValue::Categorical(
|
145
|
-
|
146
|
-
|
147
|
-
.unwrap()
|
148
|
-
.funcall::<_, _, Value>("utc", ())
|
149
|
-
.unwrap()
|
150
|
-
.funcall::<_, _, Value>("to_date", ())
|
151
|
-
.unwrap(),
|
152
|
-
AnyValue::Datetime(v, tu, tz) => {
|
153
|
-
let t = match tu {
|
154
|
-
TimeUnit::Nanoseconds => {
|
155
|
-
let sec = v / 1000000000;
|
156
|
-
let subsec = v % 1000000000;
|
157
|
-
class::time()
|
158
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
|
159
|
-
.unwrap()
|
160
|
-
}
|
161
|
-
TimeUnit::Microseconds => {
|
162
|
-
let sec = v / 1000000;
|
163
|
-
let subsec = v % 1000000;
|
164
|
-
class::time()
|
165
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
|
166
|
-
.unwrap()
|
167
|
-
}
|
168
|
-
TimeUnit::Milliseconds => {
|
169
|
-
let sec = v / 1000;
|
170
|
-
let subsec = v % 1000;
|
171
|
-
class::time()
|
172
|
-
.funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
|
173
|
-
.unwrap()
|
174
|
-
}
|
175
|
-
};
|
176
|
-
|
177
|
-
if tz.is_some() {
|
178
|
-
todo!();
|
153
|
+
AnyValue::Categorical(idx, rev, arr) => {
|
154
|
+
let s = if arr.is_null() {
|
155
|
+
rev.get(idx)
|
179
156
|
} else {
|
180
|
-
|
181
|
-
}
|
157
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
158
|
+
};
|
159
|
+
s.into_value()
|
160
|
+
}
|
161
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
162
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
163
|
+
let time_unit = time_unit.to_ascii();
|
164
|
+
utils()
|
165
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
166
|
+
.unwrap()
|
167
|
+
}
|
168
|
+
AnyValue::Duration(v, time_unit) => {
|
169
|
+
let time_unit = time_unit.to_ascii();
|
170
|
+
utils()
|
171
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
172
|
+
.unwrap()
|
182
173
|
}
|
183
|
-
AnyValue::
|
184
|
-
AnyValue::
|
185
|
-
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
174
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
175
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
186
176
|
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
187
177
|
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
188
178
|
AnyValue::Object(v) => {
|
@@ -195,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
195
185
|
}
|
196
186
|
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
197
187
|
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
198
|
-
AnyValue::Decimal(
|
188
|
+
AnyValue::Decimal(v, scale) => utils()
|
189
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
190
|
+
.unwrap(),
|
199
191
|
}
|
200
192
|
}
|
201
193
|
}
|
@@ -215,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
|
|
215
207
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
216
208
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
217
209
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
218
|
-
DataType::Decimal(
|
210
|
+
DataType::Decimal(precision, scale) => {
|
211
|
+
let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
|
212
|
+
decimal_class
|
213
|
+
.funcall::<_, _, Value>("new", (precision, scale))
|
214
|
+
.unwrap()
|
215
|
+
}
|
219
216
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
220
217
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
221
218
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
219
|
+
DataType::Array(inner, size) => {
|
220
|
+
let inner = Wrap(*inner);
|
221
|
+
let list_class = pl.const_get::<_, Value>("Array").unwrap();
|
222
|
+
list_class
|
223
|
+
.funcall::<_, _, Value>("new", (size, inner))
|
224
|
+
.unwrap()
|
225
|
+
}
|
222
226
|
DataType::List(inner) => {
|
223
227
|
let inner = Wrap(*inner);
|
224
228
|
let list_class = pl.const_get::<_, Value>("List").unwrap();
|
@@ -261,6 +265,133 @@ impl IntoValue for Wrap<DataType> {
|
|
261
265
|
}
|
262
266
|
}
|
263
267
|
|
268
|
+
impl IntoValue for Wrap<TimeUnit> {
|
269
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
270
|
+
let tu = match self.0 {
|
271
|
+
TimeUnit::Nanoseconds => "ns",
|
272
|
+
TimeUnit::Microseconds => "us",
|
273
|
+
TimeUnit::Milliseconds => "ms",
|
274
|
+
};
|
275
|
+
tu.into_value()
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
impl IntoValue for Wrap<&Utf8Chunked> {
|
280
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
281
|
+
let iter = self.0.into_iter();
|
282
|
+
RArray::from_iter(iter).into_value()
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
287
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
288
|
+
let iter = self
|
289
|
+
.0
|
290
|
+
.into_iter()
|
291
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
292
|
+
RArray::from_iter(iter).into_value()
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
impl IntoValue for Wrap<&StructChunked> {
|
297
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
298
|
+
let s = self.0.clone().into_series();
|
299
|
+
// todo! iterate its chunks and flatten.
|
300
|
+
// make series::iter() accept a chunk index.
|
301
|
+
let s = s.rechunk();
|
302
|
+
let iter = s.iter().map(|av| {
|
303
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
304
|
+
struct_dict(av._iter_struct_av(), flds)
|
305
|
+
} else {
|
306
|
+
unreachable!()
|
307
|
+
}
|
308
|
+
});
|
309
|
+
|
310
|
+
RArray::from_iter(iter).into_value()
|
311
|
+
}
|
312
|
+
}
|
313
|
+
|
314
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
315
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
316
|
+
let utils = utils();
|
317
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
318
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
319
|
+
opt_v.map(|v| {
|
320
|
+
utils
|
321
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
322
|
+
.unwrap()
|
323
|
+
})
|
324
|
+
});
|
325
|
+
RArray::from_iter(iter).into_value()
|
326
|
+
}
|
327
|
+
}
|
328
|
+
|
329
|
+
impl IntoValue for Wrap<&DatetimeChunked> {
|
330
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
331
|
+
let utils = utils();
|
332
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
333
|
+
let time_zone = self.0.time_zone().clone().into_value();
|
334
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
335
|
+
opt_v.map(|v| {
|
336
|
+
utils
|
337
|
+
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
338
|
+
.unwrap()
|
339
|
+
})
|
340
|
+
});
|
341
|
+
RArray::from_iter(iter).into_value()
|
342
|
+
}
|
343
|
+
}
|
344
|
+
|
345
|
+
impl IntoValue for Wrap<&TimeChunked> {
|
346
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
347
|
+
let utils = utils();
|
348
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
349
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
350
|
+
});
|
351
|
+
RArray::from_iter(iter).into_value()
|
352
|
+
}
|
353
|
+
}
|
354
|
+
|
355
|
+
impl IntoValue for Wrap<&DateChunked> {
|
356
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
357
|
+
let utils = utils();
|
358
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
359
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
360
|
+
});
|
361
|
+
RArray::from_iter(iter).into_value()
|
362
|
+
}
|
363
|
+
}
|
364
|
+
|
365
|
+
impl IntoValue for Wrap<&DecimalChunked> {
|
366
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
367
|
+
let utils = utils();
|
368
|
+
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
369
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
370
|
+
opt_v.map(|v| {
|
371
|
+
utils
|
372
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
373
|
+
.unwrap()
|
374
|
+
})
|
375
|
+
});
|
376
|
+
RArray::from_iter(iter).into_value()
|
377
|
+
}
|
378
|
+
}
|
379
|
+
|
380
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
381
|
+
match digits.parse::<i128>() {
|
382
|
+
Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
|
383
|
+
Err(_) => None,
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
impl TryConvert for Wrap<Field> {
|
388
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
389
|
+
let name: String = ob.funcall("name", ())?;
|
390
|
+
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
391
|
+
Ok(Wrap(Field::new(&name, dtype.0)))
|
392
|
+
}
|
393
|
+
}
|
394
|
+
|
264
395
|
impl TryConvert for Wrap<DataType> {
|
265
396
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
266
397
|
let dtype = if ob.is_kind_of(class::class()) {
|
@@ -282,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
|
|
282
413
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
283
414
|
"Polars::Time" => DataType::Time,
|
284
415
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
416
|
+
"Polars::Decimal" => DataType::Decimal(None, None),
|
285
417
|
"Polars::Float32" => DataType::Float32,
|
286
418
|
"Polars::Float64" => DataType::Float64,
|
287
|
-
|
288
|
-
"Polars::List" => DataType::List(Box::new(DataType::
|
419
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME),
|
420
|
+
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
289
421
|
"Polars::Null" => DataType::Null,
|
290
422
|
"Polars::Unknown" => DataType::Unknown,
|
291
423
|
dt => {
|
@@ -294,6 +426,47 @@ impl TryConvert for Wrap<DataType> {
|
|
294
426
|
)))
|
295
427
|
}
|
296
428
|
}
|
429
|
+
// TODO improve
|
430
|
+
} else if ob.try_convert::<String>().is_err() {
|
431
|
+
let name = unsafe { ob.class().name() }.into_owned();
|
432
|
+
match name.as_str() {
|
433
|
+
"Polars::Duration" => {
|
434
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
435
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
436
|
+
DataType::Duration(time_unit)
|
437
|
+
}
|
438
|
+
"Polars::Datetime" => {
|
439
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
440
|
+
let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
|
441
|
+
let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
|
442
|
+
let time_zone = time_zone.try_convert()?;
|
443
|
+
DataType::Datetime(time_unit, time_zone)
|
444
|
+
}
|
445
|
+
"Polars::Decimal" => {
|
446
|
+
let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
|
447
|
+
let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
|
448
|
+
DataType::Decimal(precision, Some(scale))
|
449
|
+
}
|
450
|
+
"Polars::List" => {
|
451
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
452
|
+
let inner = inner.try_convert::<Wrap<DataType>>()?;
|
453
|
+
DataType::List(Box::new(inner.0))
|
454
|
+
}
|
455
|
+
"Polars::Struct" => {
|
456
|
+
let arr: RArray = ob.funcall("fields", ())?;
|
457
|
+
let mut fields = Vec::with_capacity(arr.len());
|
458
|
+
for v in arr.each() {
|
459
|
+
fields.push(v?.try_convert::<Wrap<Field>>()?.0);
|
460
|
+
}
|
461
|
+
DataType::Struct(fields)
|
462
|
+
}
|
463
|
+
dt => {
|
464
|
+
return Err(RbTypeError::new_err(format!(
|
465
|
+
"A {dt} object is not a correct polars DataType. \
|
466
|
+
Hint: use the class without instantiating it.",
|
467
|
+
)))
|
468
|
+
}
|
469
|
+
}
|
297
470
|
} else {
|
298
471
|
match ob.try_convert::<String>()?.as_str() {
|
299
472
|
"u8" => DataType::UInt8,
|
@@ -336,16 +509,21 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
336
509
|
Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
|
337
510
|
} else if let Some(v) = Integer::from_value(ob) {
|
338
511
|
Ok(AnyValue::Int64(v.to_i64()?).into())
|
339
|
-
} else if let Some(v) =
|
512
|
+
} else if let Some(v) = Float::from_value(ob) {
|
340
513
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
341
514
|
} else if let Some(v) = RString::from_value(ob) {
|
342
|
-
|
515
|
+
if v.enc_get() == Index::utf8() {
|
516
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
517
|
+
} else {
|
518
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
519
|
+
}
|
343
520
|
// call is_a? for ActiveSupport::TimeWithZone
|
344
521
|
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
345
522
|
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
346
523
|
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
347
524
|
let v = sec * 1_000_000_000 + nsec;
|
348
|
-
// TODO support time zone
|
525
|
+
// TODO support time zone when possible
|
526
|
+
// https://github.com/pola-rs/polars/issues/9103
|
349
527
|
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
350
528
|
} else if ob.is_nil() {
|
351
529
|
Ok(AnyValue::Null.into())
|
@@ -366,17 +544,35 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
366
544
|
if v.is_empty() {
|
367
545
|
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
368
546
|
} else {
|
369
|
-
let
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
.
|
547
|
+
let list = v;
|
548
|
+
|
549
|
+
let mut avs = Vec::with_capacity(25);
|
550
|
+
let mut iter = list.each();
|
551
|
+
|
552
|
+
for item in (&mut iter).take(25) {
|
553
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
554
|
+
}
|
555
|
+
|
556
|
+
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
557
|
+
|
558
|
+
// push the rest
|
559
|
+
avs.reserve(list.len());
|
560
|
+
for item in iter {
|
561
|
+
avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
|
562
|
+
}
|
563
|
+
|
376
564
|
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
377
565
|
.map_err(RbPolarsErr::from)?;
|
378
566
|
Ok(Wrap(AnyValue::List(s)))
|
379
567
|
}
|
568
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
569
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
570
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
571
|
+
Ok(Wrap(AnyValue::Datetime(
|
572
|
+
sec * 1_000_000_000 + nsec,
|
573
|
+
TimeUnit::Nanoseconds,
|
574
|
+
&None,
|
575
|
+
)))
|
380
576
|
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
381
577
|
// convert to DateTime for UTC
|
382
578
|
let v = ob
|
@@ -384,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
384
580
|
.funcall::<_, _, Value>("to_time", ())?
|
385
581
|
.funcall::<_, _, i64>("to_i", ())?;
|
386
582
|
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
583
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
584
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob
|
585
|
+
.funcall::<_, _, Value>("split", ())
|
586
|
+
.unwrap()
|
587
|
+
.try_convert()
|
588
|
+
.unwrap();
|
589
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
590
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
591
|
+
})?;
|
592
|
+
if sign < 0 {
|
593
|
+
// TODO better error
|
594
|
+
v = v.checked_neg().unwrap();
|
595
|
+
}
|
596
|
+
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
387
597
|
} else {
|
388
598
|
Err(RbPolarsErr::other(format!(
|
389
599
|
"object type not supported {:?}",
|
@@ -410,12 +620,12 @@ impl TryConvert for Wrap<Schema> {
|
|
410
620
|
|
411
621
|
let mut schema = Vec::new();
|
412
622
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
413
|
-
schema.push(Field::new(&key, val.0));
|
623
|
+
schema.push(Ok(Field::new(&key, val.0)));
|
414
624
|
Ok(ForEach::Continue)
|
415
625
|
})
|
416
626
|
.unwrap();
|
417
627
|
|
418
|
-
Ok(Wrap(schema.into_iter().
|
628
|
+
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
419
629
|
}
|
420
630
|
}
|
421
631
|
|