polars-df 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -1
- data/Cargo.lock +486 -380
- data/Cargo.toml +0 -2
- data/README.md +31 -2
- data/ext/polars/Cargo.toml +10 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +36 -19
- data/ext/polars/src/conversion.rs +159 -16
- data/ext/polars/src/dataframe.rs +51 -52
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +216 -300
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +103 -531
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/binary_expr.rb +77 -0
- data/lib/polars/binary_name_space.rb +66 -0
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +263 -87
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +148 -8
- data/lib/polars/expr.rb +78 -11
- data/lib/polars/io.rb +73 -62
- data/lib/polars/lazy_frame.rb +107 -10
- data/lib/polars/lazy_functions.rb +7 -3
- data/lib/polars/list_expr.rb +70 -21
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/series.rb +190 -74
- data/lib/polars/string_expr.rb +150 -44
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +51 -9
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +4 -2
- metadata +29 -12
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml
CHANGED
@@ -3,8 +3,6 @@ members = ["ext/polars"]
|
|
3
3
|
|
4
4
|
[patch.crates-io]
|
5
5
|
jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
|
6
|
-
halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
|
7
|
-
arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
|
8
6
|
|
9
7
|
[profile.release]
|
10
8
|
strip = true
|
data/README.md
CHANGED
@@ -50,14 +50,43 @@ From Parquet
|
|
50
50
|
|
51
51
|
```ruby
|
52
52
|
Polars.read_parquet("file.parquet")
|
53
|
+
|
54
|
+
# or lazily with
|
55
|
+
Polars.scan_parquet("file.parquet")
|
53
56
|
```
|
54
57
|
|
55
58
|
From Active Record
|
56
59
|
|
57
60
|
```ruby
|
58
|
-
Polars.
|
61
|
+
Polars.read_database(User.all)
|
62
|
+
# or
|
63
|
+
Polars.read_database("SELECT * FROM users")
|
64
|
+
```
|
65
|
+
|
66
|
+
From JSON
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
Polars.read_json("file.json")
|
59
70
|
# or
|
60
|
-
Polars.
|
71
|
+
Polars.read_ndjson("file.ndjson")
|
72
|
+
|
73
|
+
# or lazily with
|
74
|
+
Polars.scan_ndjson("file.ndjson")
|
75
|
+
```
|
76
|
+
|
77
|
+
From Feather / Arrow IPC
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
Polars.read_ipc("file.arrow")
|
81
|
+
|
82
|
+
# or lazily with
|
83
|
+
Polars.scan_ipc("file.arrow")
|
84
|
+
```
|
85
|
+
|
86
|
+
From Avro
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
Polars.read_avro("file.avro")
|
61
90
|
```
|
62
91
|
|
63
92
|
From a hash
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.5.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,13 +12,15 @@ crate-type = ["cdylib"]
|
|
12
12
|
[dependencies]
|
13
13
|
ahash = "0.8"
|
14
14
|
magnus = "0.5"
|
15
|
-
polars-core = "0.
|
15
|
+
polars-core = "0.29.0"
|
16
16
|
serde_json = "1"
|
17
|
+
smartstring = "1"
|
17
18
|
|
18
19
|
[dependencies.polars]
|
19
|
-
version = "0.
|
20
|
+
version = "0.29.0"
|
20
21
|
features = [
|
21
22
|
"abs",
|
23
|
+
"approx_unique",
|
22
24
|
"arange",
|
23
25
|
"arg_where",
|
24
26
|
"asof_join",
|
@@ -26,7 +28,7 @@ features = [
|
|
26
28
|
"binary_encoding",
|
27
29
|
"concat_str",
|
28
30
|
"cse",
|
29
|
-
"csv
|
31
|
+
"csv",
|
30
32
|
"cum_agg",
|
31
33
|
"cumulative_eval",
|
32
34
|
"dataframe_arithmetic",
|
@@ -44,10 +46,13 @@ features = [
|
|
44
46
|
"ipc",
|
45
47
|
"is_first",
|
46
48
|
"is_in",
|
49
|
+
"is_unique",
|
47
50
|
"json",
|
48
51
|
"lazy",
|
49
52
|
"lazy_regex",
|
53
|
+
"list_count",
|
50
54
|
"list_eval",
|
55
|
+
"list_take",
|
51
56
|
"list_to_struct",
|
52
57
|
"log",
|
53
58
|
"meta",
|
@@ -73,6 +78,7 @@ features = [
|
|
73
78
|
"serde-lazy",
|
74
79
|
"sign",
|
75
80
|
"string_encoding",
|
81
|
+
"string_from_radix",
|
76
82
|
"string_justify",
|
77
83
|
"strings",
|
78
84
|
"timezones",
|
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
202
202
|
}
|
203
203
|
|
204
204
|
/// Apply a lambda with list output type
|
205
|
-
pub fn apply_lambda_with_list_out_type
|
206
|
-
df: &
|
205
|
+
pub fn apply_lambda_with_list_out_type(
|
206
|
+
df: &DataFrame,
|
207
207
|
lambda: Value,
|
208
208
|
init_null_count: usize,
|
209
209
|
first_value: Option<&Series>,
|
data/ext/polars/src/apply/mod.rs
CHANGED
@@ -7,11 +7,17 @@ use std::cell::RefCell;
|
|
7
7
|
use std::path::PathBuf;
|
8
8
|
|
9
9
|
use crate::conversion::*;
|
10
|
+
use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
|
10
11
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
11
12
|
|
13
|
+
pub enum BatchedReader {
|
14
|
+
MMap(OwnedBatchedCsvReaderMmap),
|
15
|
+
Read(OwnedBatchedCsvReader),
|
16
|
+
}
|
17
|
+
|
12
18
|
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
13
19
|
pub struct RbBatchedCsv {
|
14
|
-
pub reader: RefCell<
|
20
|
+
pub reader: RefCell<BatchedReader>,
|
15
21
|
}
|
16
22
|
|
17
23
|
impl RbBatchedCsv {
|
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
|
|
38
44
|
let comment_char: Option<String> = arguments[16].try_convert()?;
|
39
45
|
let quote_char: Option<String> = arguments[17].try_convert()?;
|
40
46
|
let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
|
41
|
-
let
|
47
|
+
let try_parse_dates: bool = arguments[19].try_convert()?;
|
42
48
|
let skip_rows_after_header: usize = arguments[20].try_convert()?;
|
43
49
|
let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
|
44
50
|
let sample_size: usize = arguments[22].try_convert()?;
|
@@ -62,11 +68,13 @@ impl RbBatchedCsv {
|
|
62
68
|
};
|
63
69
|
|
64
70
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
71
|
+
overwrite_dtype
|
72
|
+
.iter()
|
73
|
+
.map(|(name, dtype)| {
|
74
|
+
let dtype = dtype.0.clone();
|
75
|
+
Field::new(name, dtype)
|
76
|
+
})
|
77
|
+
.collect::<Schema>()
|
70
78
|
});
|
71
79
|
|
72
80
|
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
@@ -95,14 +103,24 @@ impl RbBatchedCsv {
|
|
95
103
|
.low_memory(low_memory)
|
96
104
|
.with_comment_char(comment_char)
|
97
105
|
.with_null_values(null_values)
|
98
|
-
.
|
106
|
+
.with_try_parse_dates(try_parse_dates)
|
99
107
|
.with_quote_char(quote_char)
|
100
108
|
.with_end_of_line_char(eol_char)
|
101
109
|
.with_skip_rows_after_header(skip_rows_after_header)
|
102
110
|
.with_row_count(row_count)
|
103
|
-
.sample_size(sample_size)
|
104
|
-
|
105
|
-
|
111
|
+
.sample_size(sample_size);
|
112
|
+
|
113
|
+
let reader = if low_memory {
|
114
|
+
let reader = reader
|
115
|
+
.batched_read(overwrite_dtype.map(Arc::new))
|
116
|
+
.map_err(RbPolarsErr::from)?;
|
117
|
+
BatchedReader::Read(reader)
|
118
|
+
} else {
|
119
|
+
let reader = reader
|
120
|
+
.batched_mmap(overwrite_dtype.map(Arc::new))
|
121
|
+
.map_err(RbPolarsErr::from)?;
|
122
|
+
BatchedReader::MMap(reader)
|
123
|
+
};
|
106
124
|
|
107
125
|
Ok(RbBatchedCsv {
|
108
126
|
reader: RefCell::new(reader),
|
@@ -110,13 +128,12 @@ impl RbBatchedCsv {
|
|
110
128
|
}
|
111
129
|
|
112
130
|
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
113
|
-
let batches = self
|
114
|
-
.
|
115
|
-
.
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
}))
|
131
|
+
let batches = match &mut *self.reader.borrow_mut() {
|
132
|
+
BatchedReader::MMap(reader) => reader.next_batches(n),
|
133
|
+
BatchedReader::Read(reader) => reader.next_batches(n),
|
134
|
+
}
|
135
|
+
.map_err(RbPolarsErr::from)?;
|
136
|
+
|
137
|
+
Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
|
121
138
|
}
|
122
139
|
}
|
@@ -1,7 +1,11 @@
|
|
1
|
+
use std::fmt::{Display, Formatter};
|
2
|
+
use std::hash::{Hash, Hasher};
|
3
|
+
|
1
4
|
use magnus::{
|
2
5
|
class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
|
3
6
|
RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
|
4
7
|
};
|
8
|
+
use magnus::encoding::{EncodingCapable, Index};
|
5
9
|
use polars::chunked_array::object::PolarsObjectSafe;
|
6
10
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
7
11
|
use polars::datatypes::AnyValue;
|
@@ -10,9 +14,9 @@ use polars::frame::NullStrategy;
|
|
10
14
|
use polars::io::avro::AvroCompression;
|
11
15
|
use polars::prelude::*;
|
12
16
|
use polars::series::ops::NullBehavior;
|
13
|
-
use
|
14
|
-
use std::hash::{Hash, Hasher};
|
17
|
+
use smartstring::alias::String as SmartString;
|
15
18
|
|
19
|
+
use crate::rb_modules::utils;
|
16
20
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
17
21
|
|
18
22
|
pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
@@ -21,6 +25,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
|
|
21
25
|
unsafe { std::mem::transmute(slice) }
|
22
26
|
}
|
23
27
|
|
28
|
+
pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
|
29
|
+
// Safety:
|
30
|
+
// Wrap is transparent.
|
31
|
+
unsafe { std::mem::transmute(slice) }
|
32
|
+
}
|
33
|
+
|
24
34
|
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
25
35
|
// Safety:
|
26
36
|
// Wrap is transparent.
|
@@ -82,6 +92,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
|
|
82
92
|
}
|
83
93
|
}
|
84
94
|
|
95
|
+
impl TryConvert for Wrap<BinaryChunked> {
|
96
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
97
|
+
let (seq, len) = get_rbseq(obj)?;
|
98
|
+
let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
|
99
|
+
|
100
|
+
for res in seq.each() {
|
101
|
+
let item = res?;
|
102
|
+
match item.try_convert::<RString>() {
|
103
|
+
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
104
|
+
Err(_) => builder.append_null(),
|
105
|
+
}
|
106
|
+
}
|
107
|
+
Ok(Wrap(builder.finish()))
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
85
111
|
impl TryConvert for Wrap<NullValues> {
|
86
112
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
87
113
|
if let Ok(s) = ob.try_convert::<String>() {
|
@@ -98,6 +124,14 @@ impl TryConvert for Wrap<NullValues> {
|
|
98
124
|
}
|
99
125
|
}
|
100
126
|
|
127
|
+
fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
|
128
|
+
let dict = RHash::new();
|
129
|
+
for (fld, val) in flds.iter().zip(vals) {
|
130
|
+
dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
|
131
|
+
}
|
132
|
+
dict.into_value()
|
133
|
+
}
|
134
|
+
|
101
135
|
impl IntoValue for Wrap<AnyValue<'_>> {
|
102
136
|
fn into_value_with(self, _: &RubyHandle) -> Value {
|
103
137
|
match self.0 {
|
@@ -114,7 +148,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
114
148
|
AnyValue::Null => *QNIL,
|
115
149
|
AnyValue::Boolean(v) => Value::from(v),
|
116
150
|
AnyValue::Utf8(v) => Value::from(v),
|
117
|
-
AnyValue::Utf8Owned(
|
151
|
+
AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
|
118
152
|
AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
|
119
153
|
AnyValue::Date(v) => class::time()
|
120
154
|
.funcall::<_, _, Value>("at", (v * 86400,))
|
@@ -154,15 +188,25 @@ impl IntoValue for Wrap<AnyValue<'_>> {
|
|
154
188
|
t.funcall::<_, _, Value>("utc", ()).unwrap()
|
155
189
|
}
|
156
190
|
}
|
157
|
-
AnyValue::Duration(
|
191
|
+
AnyValue::Duration(v, tu) => {
|
192
|
+
let tu = tu.to_ascii();
|
193
|
+
utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
|
194
|
+
}
|
158
195
|
AnyValue::Time(_v) => todo!(),
|
159
196
|
AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
160
|
-
ref
|
161
|
-
AnyValue::StructOwned(
|
162
|
-
AnyValue::Object(
|
163
|
-
|
164
|
-
|
165
|
-
|
197
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
198
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
199
|
+
AnyValue::Object(v) => {
|
200
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
201
|
+
object.inner
|
202
|
+
}
|
203
|
+
AnyValue::ObjectOwned(v) => {
|
204
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
205
|
+
object.inner
|
206
|
+
}
|
207
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
208
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
209
|
+
AnyValue::Decimal(_v, _scale) => todo!(),
|
166
210
|
}
|
167
211
|
}
|
168
212
|
}
|
@@ -182,7 +226,7 @@ impl IntoValue for Wrap<DataType> {
|
|
182
226
|
DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
|
183
227
|
DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
|
184
228
|
DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
|
185
|
-
DataType::
|
229
|
+
DataType::Decimal(_precision, _scale) => todo!(),
|
186
230
|
DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
|
187
231
|
DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
|
188
232
|
DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
|
@@ -210,7 +254,7 @@ impl IntoValue for Wrap<DataType> {
|
|
210
254
|
DataType::Struct(fields) => {
|
211
255
|
let field_class = pl.const_get::<_, Value>("Field").unwrap();
|
212
256
|
let iter = fields.iter().map(|fld| {
|
213
|
-
let name = fld.name().
|
257
|
+
let name = fld.name().as_str();
|
214
258
|
let dtype = Wrap(fld.data_type().clone());
|
215
259
|
field_class
|
216
260
|
.funcall::<_, _, Value>("new", (name, dtype))
|
@@ -228,6 +272,75 @@ impl IntoValue for Wrap<DataType> {
|
|
228
272
|
}
|
229
273
|
}
|
230
274
|
|
275
|
+
impl IntoValue for Wrap<TimeUnit> {
|
276
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
277
|
+
let tu = match self.0 {
|
278
|
+
TimeUnit::Nanoseconds => "ns",
|
279
|
+
TimeUnit::Microseconds => "us",
|
280
|
+
TimeUnit::Milliseconds => "ms",
|
281
|
+
};
|
282
|
+
tu.into_value()
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
impl IntoValue for Wrap<&Utf8Chunked> {
|
287
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
288
|
+
let iter = self.0.into_iter();
|
289
|
+
RArray::from_iter(iter).into_value()
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
294
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
295
|
+
let iter = self
|
296
|
+
.0
|
297
|
+
.into_iter()
|
298
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
299
|
+
RArray::from_iter(iter).into_value()
|
300
|
+
}
|
301
|
+
}
|
302
|
+
|
303
|
+
impl IntoValue for Wrap<&StructChunked> {
|
304
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
305
|
+
let s = self.0.clone().into_series();
|
306
|
+
// todo! iterate its chunks and flatten.
|
307
|
+
// make series::iter() accept a chunk index.
|
308
|
+
let s = s.rechunk();
|
309
|
+
let iter = s.iter().map(|av| {
|
310
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
311
|
+
struct_dict(av._iter_struct_av(), flds)
|
312
|
+
} else {
|
313
|
+
unreachable!()
|
314
|
+
}
|
315
|
+
});
|
316
|
+
|
317
|
+
RArray::from_iter(iter).into_value()
|
318
|
+
}
|
319
|
+
}
|
320
|
+
|
321
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
322
|
+
fn into_value_with(self, _: &RubyHandle) -> Value {
|
323
|
+
let utils = utils();
|
324
|
+
let tu = Wrap(self.0.time_unit()).into_value();
|
325
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
326
|
+
opt_v.map(|v| {
|
327
|
+
utils
|
328
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
|
329
|
+
.unwrap()
|
330
|
+
})
|
331
|
+
});
|
332
|
+
RArray::from_iter(iter).into_value()
|
333
|
+
}
|
334
|
+
}
|
335
|
+
|
336
|
+
impl TryConvert for Wrap<Field> {
|
337
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
338
|
+
let name: String = ob.funcall("name", ())?;
|
339
|
+
let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
|
340
|
+
Ok(Wrap(Field::new(&name, dtype.0)))
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
231
344
|
impl TryConvert for Wrap<DataType> {
|
232
345
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
233
346
|
let dtype = if ob.is_kind_of(class::class()) {
|
@@ -261,6 +374,20 @@ impl TryConvert for Wrap<DataType> {
|
|
261
374
|
)))
|
262
375
|
}
|
263
376
|
}
|
377
|
+
// TODO improve
|
378
|
+
} else if ob.try_convert::<String>().is_err() {
|
379
|
+
let name = unsafe { ob.class().name() }.into_owned();
|
380
|
+
match name.as_str() {
|
381
|
+
"Polars::Struct" => {
|
382
|
+
let arr: RArray = ob.funcall("fields", ())?;
|
383
|
+
let mut fields = Vec::with_capacity(arr.len());
|
384
|
+
for v in arr.each() {
|
385
|
+
fields.push(v?.try_convert::<Wrap<Field>>()?.0);
|
386
|
+
}
|
387
|
+
DataType::Struct(fields)
|
388
|
+
}
|
389
|
+
_ => todo!(),
|
390
|
+
}
|
264
391
|
} else {
|
265
392
|
match ob.try_convert::<String>()?.as_str() {
|
266
393
|
"u8" => DataType::UInt8,
|
@@ -306,7 +433,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
306
433
|
} else if let Some(v) = RFloat::from_value(ob) {
|
307
434
|
Ok(AnyValue::Float64(v.to_f64()).into())
|
308
435
|
} else if let Some(v) = RString::from_value(ob) {
|
309
|
-
|
436
|
+
if v.enc_get() == Index::utf8() {
|
437
|
+
Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
|
438
|
+
} else {
|
439
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
440
|
+
}
|
310
441
|
// call is_a? for ActiveSupport::TimeWithZone
|
311
442
|
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
312
443
|
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
@@ -340,10 +471,14 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
|
340
471
|
let n = 25;
|
341
472
|
let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
|
342
473
|
.map_err(RbPolarsErr::from)?;
|
343
|
-
let s = Series::from_any_values_and_dtype("", &avs, &dtype)
|
474
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
344
475
|
.map_err(RbPolarsErr::from)?;
|
345
476
|
Ok(Wrap(AnyValue::List(s)))
|
346
477
|
}
|
478
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
479
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
480
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
481
|
+
Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
|
347
482
|
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
348
483
|
// convert to DateTime for UTC
|
349
484
|
let v = ob
|
@@ -377,12 +512,12 @@ impl TryConvert for Wrap<Schema> {
|
|
377
512
|
|
378
513
|
let mut schema = Vec::new();
|
379
514
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
380
|
-
schema.push(Field::new(&key, val.0));
|
515
|
+
schema.push(Ok(Field::new(&key, val.0)));
|
381
516
|
Ok(ForEach::Continue)
|
382
517
|
})
|
383
518
|
.unwrap();
|
384
519
|
|
385
|
-
Ok(Wrap(schema.into_iter().
|
520
|
+
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
386
521
|
}
|
387
522
|
}
|
388
523
|
|
@@ -870,3 +1005,11 @@ pub fn parse_parquet_compression(
|
|
870
1005
|
};
|
871
1006
|
Ok(parsed)
|
872
1007
|
}
|
1008
|
+
|
1009
|
+
pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
|
1010
|
+
where
|
1011
|
+
I: IntoIterator<Item = S>,
|
1012
|
+
S: AsRef<str>,
|
1013
|
+
{
|
1014
|
+
container.into_iter().map(|s| s.as_ref().into()).collect()
|
1015
|
+
}
|