polars-df 0.3.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/README.md +31 -2
- data/ext/polars/Cargo.toml +10 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +36 -19
- data/ext/polars/src/conversion.rs +159 -16
- data/ext/polars/src/dataframe.rs +51 -52
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +216 -300
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +103 -531
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +29 -12
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml
CHANGED
@@ -3,8 +3,6 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
6
|
|
9
7
|
[profile.release]
|
10
8
|
strip = true
|
data/README.md
CHANGED
@@ -50,14 +50,43 @@ From Parquet
|
|
50
50
|
|
51
51
|
```ruby
|
52
52
|
Polars.read_parquet("file.parquet")
|
53
|
+
|
54
|
+
# or lazily with
|
55
|
+
Polars.scan_parquet("file.parquet")
|
53
56
|
```
|
54
57
|
|
55
58
|
From Active Record
|
56
59
|
|
57
60
|
```ruby
|
58
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
|
+
# or
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
|
+
```
|
65
|
+
|
66
|
+
From JSON
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
Polars.read_json("file.json")
|
59
70
|
# or
|
60
|
-
Polars.
|
71
|
+
Polars.read_ndjson("file.ndjson")
|
72
|
+
|
73
|
+
# or lazily with
|
74
|
+
Polars.scan_ndjson("file.ndjson")
|
75
|
+
```
|
76
|
+
|
77
|
+
From Feather / Arrow IPC
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
Polars.read_ipc("file.arrow")
|
81
|
+
|
82
|
+
# or lazily with
|
83
|
+
Polars.scan_ipc("file.arrow")
|
84
|
+
```
|
85
|
+
|
86
|
+
From Avro
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
Polars.read_avro("file.avro")
|
61
90
|
```
|
62
91
|
|
63
92
|
From a hash
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,13 +12,15 @@ crate-type = ["cdylib"]
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
14
|
magnus = "0.5"
|
15
|
-
polars-core = "0.
|
15
|
+
polars-core = "0.29.0"
|
16
16
|
serde_json = "1"
|
17
|
+
smartstring = "1"
|
17
18
|
|
18
19
|
[dependencies.polars]
|
19
|
-
version = "0.
|
20
|
+
version = "0.29.0"
|
20
21
|
features = [
|
21
22
|
"abs",
|
23
|
+
"approx_unique",
|
22
24
|
"arange",
|
23
25
|
"arg_where",
|
24
26
|
"asof_join",
|
@@ -26,7 +28,7 @@ features = [
|
|
26
28
|
"binary_encoding",
|
27
29
|
"concat_str",
|
28
30
|
"cse",
|
29
|
-
"csv
|
31
|
+
"csv",
|
30
32
|
"cum_agg",
|
31
33
|
"cumulative_eval",
|
32
34
|
"dataframe_arithmetic",
|
@@ -44,10 +46,13 @@ features = [
|
|
44
46
|
"ipc",
|
45
47
|
"is_first",
|
46
48
|
"is_in",
|
49
|
+
"is_unique",
|
47
50
|
"json",
|
48
51
|
"lazy",
|
49
52
|
"lazy_regex",
|
53
|
+
"list_count",
|
50
54
|
"list_eval",
|
55
|
+
"list_take",
|
51
56
|
"list_to_struct",
|
52
57
|
"log",
|
53
58
|
"meta",
|
@@ -73,6 +78,7 @@ features = [
|
|
73
78
|
"serde-lazy",
|
74
79
|
"sign",
|
75
80
|
"string_encoding",
|
81
|
+
"string_from_radix",
|
76
82
|
"string_justify",
|
77
83
|
"strings",
|
78
84
|
"timezones",
|
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
202
202
|
}
|
203
203
|
|
204
204
|
/// Apply a lambda with list output type
|
205
|
-
pub fn apply_lambda_with_list_out_type
|
206
|
-
df: &
|
205
|
+
pub fn apply_lambda_with_list_out_type(
|
206
|
+
df: &DataFrame,
|
207
207
|
lambda: Value,
|
208
208
|
init_null_count: usize,
|
209
209
|
first_value: Option<&Series>,
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -7,11 +7,17 @@ use std::cell::RefCell;
|
|
7
7
|
use std::path::PathBuf;
|
8
8
|
|
9
9
|
use crate::conversion::*;
|
10
|
+
use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
|
10
11
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
11
12
|
|
13
|
+
pub enum BatchedReader {
|
14
|
+
MMap(OwnedBatchedCsvReaderMmap),
|
15
|
+
Read(OwnedBatchedCsvReader),
|
16
|
+
}
|
17
|
+
|
12
18
|
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
13
19
|
pub struct RbBatchedCsv {
|
14
|
-
pub reader: RefCell<
|
20
|
+
pub reader: RefCell<BatchedReader>,
|
15
21
|
}
|
16
22
|
|
17
23
|
impl RbBatchedCsv {
|
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
|
|
38
44
|
let comment_char: Option<String> = arguments[16].try_convert()?;
|
39
45
|
let quote_char: Option<String> = arguments[17].try_convert()?;
|
40
46
|
let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
|
41
|
-
let
|
47
|
+
let try_parse_dates: bool = arguments[19].try_convert()?;
|
42
48
|
let skip_rows_after_header: usize = arguments[20].try_convert()?;
|
43
49
|
let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
|
44
50
|
let sample_size: usize = arguments[22].try_convert()?;
|
@@ -62,11 +68,13 @@ impl RbBatchedCsv {
|
|
62
68
|
};
|
63
69
|
|
64
70
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
71
|
+
overwrite_dtype
|
72
|
+
.iter()
|
73
|
+
.map(|(name, dtype)| {
|
74
|
+
let dtype = dtype.0.clone();
|
75
|
+
Field::new(name, dtype)
|
76
|
+
})
|
77
|
+
.collect::<Schema>()
|
70
78
|
});
|
71
79
|
|
72
80
|
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
@@ -95,14 +103,24 @@ impl RbBatchedCsv {
|
|
95
103
|
.low_memory(low_memory)
|
96
104
|
.with_comment_char(comment_char)
|
97
105
|
.with_null_values(null_values)
|
98
|
-
.
|
106
|
+
.with_try_parse_dates(try_parse_dates)
|
99
107
|
.with_quote_char(quote_char)
|
100
108
|
.with_end_of_line_char(eol_char)
|
101
109
|
.with_skip_rows_after_header(skip_rows_after_header)
|
102
110
|
.with_row_count(row_count)
|
103
|
-
.sample_size(sample_size)
|
104
|
-
|
105
|
-
|
111
|
+
.sample_size(sample_size);
|
112
|
+
|
113
|
+
let reader = if low_memory {
|
114
|
+
let reader = reader
|
115
|
+
.batched_read(overwrite_dtype.map(Arc::new))
|
116
|
+
.map_err(RbPolarsErr::from)?;
|
117
|
+
BatchedReader::Read(reader)
|
118
|
+
} else {
|
119
|
+
let reader = reader
|
120
|
+
.batched_mmap(overwrite_dtype.map(Arc::new))
|
121
|
+
.map_err(RbPolarsErr::from)?;
|
122
|
+
BatchedReader::MMap(reader)
|
123
|
+
};
|
106
124
|
|
107
125
|
Ok(RbBatchedCsv {
|
108
126
|
reader: RefCell::new(reader),
|
@@ -110,13 +128,12 @@ impl RbBatchedCsv {
|
|
110
128
|
}
|
111
129
|
|
112
130
|
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
|
-
let batches = self
|
114
|
-
.
|
115
|
-
.
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
}))
|
131
|
+
let batches = match &mut *self.reader.borrow_mut() {
|
132
|
+
BatchedReader::MMap(reader) => reader.next_batches(n),
|
133
|
+
BatchedReader::Read(reader) => reader.next_batches(n),
|
134
|
+
}
|
135
|
+
.map_err(RbPolarsErr::from)?;
|
136
|
+
|
137
|
+
Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
|
121
138
|
}
|
122
139
|
}
|
@@ -1,7 +1,11 @@
|
|
1
|
+
use std::fmt::{Display, Formatter};
|
2
|
+
use std::hash::{Hash, Hasher};
|
3
|
+
|
1
4
|
use magnus::{
|
2
5
|
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
6
|
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
7
|
};
|
8
|
+
use magnus::encoding::{EncodingCapable, Index};
|
5
9
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
10
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
7
11
|
use polars::datatypes::AnyValue;
|
@@ -10,9 +14,9 @@ use polars::frame::NullStrategy;
|
|
10
14
|
use polars::io::avro::AvroCompression;
|
11
15
|
use polars::prelude::*;
|
12
16
|
use polars::series::ops::NullBehavior;
|
13
|
-
use
|
14
|
-
use std::hash::{Hash, Hasher};
|
17
|
+
use smartstring::alias::String as SmartString;
|
15
18
|
|
19
|
+
use crate::rb_modules::utils;
|
16
20
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
17
21
|
|
18
22
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -21,6 +25,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
21
25
|
unsafe { std::mem::transmute(slice) }
|
22
26
|
}
|
23
27
|
|
28
|
+
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
29
|
+
// Safety:
|
30
|
+
// Wrap is transparent.
|
31
|
+
unsafe { std::mem::transmute(slice) }
|
32
|
+
}
|
33
|
+
|
24
34
|
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
25
35
|
// Safety:
|
26
36
|
// Wrap is transparent.
|
@@ -82,6 +92,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
|
|
82
92
|
}
|
83
93
|
}
|
84
94
|
|
95
|
+
impl TryConvert for Wrap<BinaryChunked> {
|
96
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
97
|
+
let (seq, len) = get_rbseq(obj)?;
|
98
|
+
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
99
|
+
|
100
|
+
for res in seq.each() {
|
101
|
+
let item = res?;
|
102
|
+
match item.try_convert::<RString>() {
|
103
|
+
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
104
|
+
Err(_) => builder.append_null(),
|
105
|
+
}
|
106
|
+
}
|
107
|
+
Ok(Wrap(builder.finish()))
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
85
111
|
impl TryConvert for Wrap<NullValues> {
|
86
112
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
87
113
|
if let Ok(s) = ob.try_convert::<String>() {
|
@@ -98,6 +124,14 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
124
|
}
|
99
125
|
}
|
100
126
|
|
127
|
+
fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
|
128
|
+
let dict = RHash::new();
|
129
|
+
for (fld, val) in flds.iter().zip(vals) {
|
130
|
+
dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
|
131
|
+
}
|
132
|
+
dict.into_value()
|
133
|
+
}
|
134
|
+
|
101
135
|
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
136
|
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
137
|
match self.0 {
|
@@ -114,7 +148,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
114
148
|
AnyValue::Null => *QNIL,
|
115
149
|
AnyValue::Boolean(v) => Value::from(v),
|
116
150
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
-
AnyValue::Utf8Owned(
|
151
|
+
AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
|
118
152
|
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
119
153
|
AnyValue::Date(v) => class::time()
|
120
154
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
@@ -154,15 +188,25 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
154
188
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
155
189
|
}
|
156
190
|
}
|
157
|
-
AnyValue::Duration(
|
191
|
+
AnyValue::Duration(v, tu) => {
|
192
|
+
let tu = tu.to_ascii();
|
193
|
+
utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
|
194
|
+
}
|
158
195
|
AnyValue::Time(_v) => todo!(),
|
159
196
|
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
160
|
-
ref
|
161
|
-
AnyValue::StructOwned(
|
162
|
-
AnyValue::Object(
|
163
|
-
|
164
|
-
|
165
|
-
|
197
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
198
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
199
|
+
AnyValue::Object(v) => {
|
200
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
201
|
+
object.inner
|
202
|
+
}
|
203
|
+
AnyValue::ObjectOwned(v) => {
|
204
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
205
|
+
object.inner
|
206
|
+
}
|
207
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
208
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
209
|
+
AnyValue::Decimal(_v, _scale) => todo!(),
|
166
210
|
}
|
167
211
|
}
|
168
212
|
}
|
@@ -182,7 +226,7 @@ impl IntoValue for Wrap<DataType> {
|
|
182
226
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
183
227
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
184
228
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
185
|
-
DataType::
|
229
|
+
DataType::Decimal(_precision, _scale) => todo!(),
|
186
230
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
187
231
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
188
232
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
@@ -210,7 +254,7 @@ impl IntoValue for Wrap<DataType> {
|
|
210
254
|
DataType::Struct(fields) => {
|
211
255
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
212
256
|
let iter = fields.iter().map(|fld| {
|
213
|
-
let name = fld.name().
|
257
|
+
let name = fld.name().as_str();
|
214
258
|
let dtype = Wrap(fld.data_type().clone());
|
215
259
|
field_class
|
216
260
|
.funcall::<_, _, Value>("new", (name, dtype))
|
@@ -228,6 +272,75 @@ impl IntoValue for Wrap<DataType> {
|
|
228
272
|
}
|
229
273
|
}
|
230
274
|
|
275
|
+
impl IntoValue for Wrap<TimeUnit> {
|
276
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
277
|
+
let tu = match self.0 {
|
278
|
+
TimeUnit::Nanoseconds => "ns",
|
279
|
+
TimeUnit::Microseconds => "us",
|
280
|
+
TimeUnit::Milliseconds => "ms",
|
281
|
+
};
|
282
|
+
tu.into_value()
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
impl IntoValue for Wrap<&Utf8Chunked> {
|
287
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
288
|
+
let iter = self.0.into_iter();
|
289
|
+
RArray::from_iter(iter).into_value()
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
294
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
295
|
+
let iter = self
|
296
|
+
.0
|
297
|
+
.into_iter()
|
298
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
299
|
+
RArray::from_iter(iter).into_value()
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
impl IntoValue for Wrap<&StructChunked> {
|
304
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
305
|
+
let s = self.0.clone().into_series();
|
306
|
+
// todo! iterate its chunks and flatten.
|
307
|
+
// make series::iter() accept a chunk index.
|
308
|
+
let s = s.rechunk();
|
309
|
+
let iter = s.iter().map(|av| {
|
310
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
311
|
+
struct_dict(av._iter_struct_av(), flds)
|
312
|
+
} else {
|
313
|
+
unreachable!()
|
314
|
+
}
|
315
|
+
});
|
316
|
+
|
317
|
+
RArray::from_iter(iter).into_value()
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
322
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
323
|
+
let utils = utils();
|
324
|
+
let tu = Wrap(self.0.time_unit()).into_value();
|
325
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
326
|
+
opt_v.map(|v| {
|
327
|
+
utils
|
328
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
|
329
|
+
.unwrap()
|
330
|
+
})
|
331
|
+
});
|
332
|
+
RArray::from_iter(iter).into_value()
|
333
|
+
}
|
334
|
+
}
|
335
|
+
|
336
|
+
impl TryConvert for Wrap<Field> {
|
337
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
338
|
+
let name: String = ob.funcall("name", ())?;
|
339
|
+
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
340
|
+
Ok(Wrap(Field::new(&name, dtype.0)))
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
231
344
|
impl TryConvert for Wrap<DataType> {
|
232
345
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
233
346
|
let dtype = if ob.is_kind_of(class::class()) {
|
@@ -261,6 +374,20 @@ impl TryConvert for Wrap<DataType> {
|
|
261
374
|
)))
|
262
375
|
}
|
263
376
|
}
|
377
|
+
// TODO improve
|
378
|
+
} else if ob.try_convert::<String>().is_err() {
|
379
|
+
let name = unsafe { ob.class().name() }.into_owned();
|
380
|
+
match name.as_str() {
|
381
|
+
"Polars::Struct" => {
|
382
|
+
let arr: RArray = ob.funcall("fields", ())?;
|
383
|
+
let mut fields = Vec::with_capacity(arr.len());
|
384
|
+
for v in arr.each() {
|
385
|
+
fields.push(v?.try_convert::<Wrap<Field>>()?.0);
|
386
|
+
}
|
387
|
+
DataType::Struct(fields)
|
388
|
+
}
|
389
|
+
_ => todo!(),
|
390
|
+
}
|
264
391
|
} else {
|
265
392
|
match ob.try_convert::<String>()?.as_str() {
|
266
393
|
"u8" => DataType::UInt8,
|
@@ -306,7 +433,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
306
433
|
} else if let Some(v) = RFloat::from_value(ob) {
|
307
434
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
308
435
|
} else if let Some(v) = RString::from_value(ob) {
|
309
|
-
|
436
|
+
if v.enc_get() == Index::utf8() {
|
437
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
438
|
+
} else {
|
439
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
440
|
+
}
|
310
441
|
// call is_a? for ActiveSupport::TimeWithZone
|
311
442
|
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
312
443
|
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
@@ -340,10 +471,14 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
340
471
|
let n = 25;
|
341
472
|
let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
|
342
473
|
.map_err(RbPolarsErr::from)?;
|
343
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype)
|
474
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
344
475
|
.map_err(RbPolarsErr::from)?;
|
345
476
|
Ok(Wrap(AnyValue::List(s)))
|
346
477
|
}
|
478
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
479
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
480
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
481
|
+
Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
|
347
482
|
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
348
483
|
// convert to DateTime for UTC
|
349
484
|
let v = ob
|
@@ -377,12 +512,12 @@ impl TryConvert for Wrap<Schema> {
|
|
377
512
|
|
378
513
|
let mut schema = Vec::new();
|
379
514
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
380
|
-
schema.push(Field::new(&key, val.0));
|
515
|
+
schema.push(Ok(Field::new(&key, val.0)));
|
381
516
|
Ok(ForEach::Continue)
|
382
517
|
})
|
383
518
|
.unwrap();
|
384
519
|
|
385
|
-
Ok(Wrap(schema.into_iter().
|
520
|
+
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
386
521
|
}
|
387
522
|
}
|
388
523
|
|
@@ -870,3 +1005,11 @@ pub fn parse_parquet_compression(
|
|
870
1005
|
};
|
871
1006
|
Ok(parsed)
|
872
1007
|
}
|
1008
|
+
|
1009
|
+
pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
|
1010
|
+
where
|
1011
|
+
I: IntoIterator<Item = S>,
|
1012
|
+
S: AsRef<str>,
|
1013
|
+
{
|
1014
|
+
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1015
|
+
}
|