polars-df 0.10.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Polars
|
1
|
+
# Ruby Polars
|
2
2
|
|
3
3
|
:fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/polars
|
5
|
+
[![Build Status](https://github.com/ankane/ruby-polars/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -420,16 +420,16 @@ View the [changelog](CHANGELOG.md)
|
|
420
420
|
|
421
421
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
422
422
|
|
423
|
-
- [Report bugs](https://github.com/ankane/polars
|
424
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/polars
|
423
|
+
- [Report bugs](https://github.com/ankane/ruby-polars/issues)
|
424
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/ruby-polars/pulls)
|
425
425
|
- Write, clarify, or fix documentation
|
426
426
|
- Suggest or add new features
|
427
427
|
|
428
428
|
To get started with development:
|
429
429
|
|
430
430
|
```sh
|
431
|
-
git clone https://github.com/ankane/polars
|
432
|
-
cd polars
|
431
|
+
git clone https://github.com/ankane/ruby-polars.git
|
432
|
+
cd ruby-polars
|
433
433
|
bundle install
|
434
434
|
bundle exec rake compile
|
435
435
|
bundle exec rake test
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.12.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,15 +14,15 @@ crate-type = ["cdylib"]
|
|
14
14
|
ahash = "0.8"
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
|
-
magnus = "0.
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
17
|
+
magnus = "0.7"
|
18
|
+
polars-core = "=0.41.3"
|
19
|
+
polars-parquet = "=0.41.3"
|
20
|
+
polars-utils = "=0.41.3"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.41.3"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -41,7 +41,6 @@ features = [
|
|
41
41
|
"cumulative_eval",
|
42
42
|
"cutqcut",
|
43
43
|
"dataframe_arithmetic",
|
44
|
-
"date_offset",
|
45
44
|
"diagonal_concat",
|
46
45
|
"diff",
|
47
46
|
"dot_product",
|
@@ -55,6 +54,8 @@ features = [
|
|
55
54
|
"fmt",
|
56
55
|
"interpolate",
|
57
56
|
"ipc",
|
57
|
+
"ipc_streaming",
|
58
|
+
"is_between",
|
58
59
|
"is_first_distinct",
|
59
60
|
"is_in",
|
60
61
|
"is_last_distinct",
|
@@ -73,6 +74,9 @@ features = [
|
|
73
74
|
"meta",
|
74
75
|
"mode",
|
75
76
|
"moment",
|
77
|
+
"month_start",
|
78
|
+
"month_end",
|
79
|
+
"offset_by",
|
76
80
|
"object",
|
77
81
|
"parquet",
|
78
82
|
"partition_by",
|
@@ -91,6 +95,7 @@ features = [
|
|
91
95
|
"replace",
|
92
96
|
"rle",
|
93
97
|
"rolling_window",
|
98
|
+
"rolling_window_by",
|
94
99
|
"round_series",
|
95
100
|
"row_hash",
|
96
101
|
"search_sorted",
|
@@ -1,23 +1,19 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
use std::path::PathBuf;
|
3
|
+
use std::sync::Mutex;
|
4
|
+
|
1
5
|
use magnus::{prelude::*, RArray, Value};
|
6
|
+
use polars::io::csv::read::OwnedBatchedCsvReader;
|
2
7
|
use polars::io::mmap::MmapBytesReader;
|
3
8
|
use polars::io::RowIndex;
|
4
|
-
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
5
9
|
use polars::prelude::*;
|
6
|
-
use std::cell::RefCell;
|
7
|
-
use std::path::PathBuf;
|
8
10
|
|
9
11
|
use crate::conversion::*;
|
10
|
-
use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
|
11
12
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
12
13
|
|
13
|
-
pub enum BatchedReader {
|
14
|
-
MMap(OwnedBatchedCsvReaderMmap),
|
15
|
-
Read(OwnedBatchedCsvReader),
|
16
|
-
}
|
17
|
-
|
18
14
|
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
19
15
|
pub struct RbBatchedCsv {
|
20
|
-
pub reader: RefCell<
|
16
|
+
pub reader: RefCell<Mutex<OwnedBatchedCsvReader>>,
|
21
17
|
}
|
22
18
|
|
23
19
|
impl RbBatchedCsv {
|
@@ -44,19 +40,23 @@ impl RbBatchedCsv {
|
|
44
40
|
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
41
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
42
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
-
let
|
48
|
-
let
|
49
|
-
let
|
50
|
-
let
|
51
|
-
let
|
52
|
-
let
|
43
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[19])?;
|
44
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
45
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
46
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
47
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
48
|
+
let eol_char = String::try_convert(arguments[24])?;
|
49
|
+
let raise_if_empty = bool::try_convert(arguments[25])?;
|
50
|
+
let truncate_ragged_lines = bool::try_convert(arguments[26])?;
|
51
|
+
let decimal_comma = bool::try_convert(arguments[27])?;
|
53
52
|
// end arguments
|
54
53
|
|
55
54
|
let null_values = null_values.map(|w| w.0);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
|
-
|
58
|
-
|
59
|
-
|
56
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
57
|
+
name: Arc::from(name.as_str()),
|
58
|
+
offset,
|
59
|
+
});
|
60
60
|
let quote_char = if let Some(s) = quote_char {
|
61
61
|
if s.is_empty() {
|
62
62
|
None
|
@@ -86,54 +86,55 @@ impl RbBatchedCsv {
|
|
86
86
|
|
87
87
|
let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
|
88
88
|
let reader = Box::new(file) as Box<dyn MmapBytesReader>;
|
89
|
-
let reader =
|
90
|
-
.
|
91
|
-
.
|
89
|
+
let reader = CsvReadOptions::default()
|
90
|
+
.with_infer_schema_length(infer_schema_length)
|
91
|
+
.with_has_header(has_header)
|
92
92
|
.with_n_rows(n_rows)
|
93
|
-
.with_separator(separator.as_bytes()[0])
|
94
93
|
.with_skip_rows(skip_rows)
|
95
94
|
.with_ignore_errors(ignore_errors)
|
96
|
-
.with_projection(projection)
|
95
|
+
.with_projection(projection.map(Arc::new))
|
97
96
|
.with_rechunk(rechunk)
|
98
97
|
.with_chunk_size(chunk_size)
|
99
|
-
.
|
100
|
-
.with_columns(columns)
|
98
|
+
.with_columns(columns.map(Arc::from))
|
101
99
|
.with_n_threads(n_threads)
|
102
|
-
.
|
103
|
-
.
|
104
|
-
.with_comment_prefix(comment_prefix.as_deref())
|
105
|
-
.with_null_values(null_values)
|
106
|
-
.with_try_parse_dates(try_parse_dates)
|
107
|
-
.with_quote_char(quote_char)
|
108
|
-
.with_end_of_line_char(eol_char)
|
100
|
+
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
|
+
.with_low_memory(low_memory)
|
109
102
|
.with_skip_rows_after_header(skip_rows_after_header)
|
110
103
|
.with_row_index(row_index)
|
111
|
-
.
|
112
|
-
.
|
104
|
+
.with_sample_size(sample_size)
|
105
|
+
.with_raise_if_empty(raise_if_empty)
|
106
|
+
.with_parse_options(
|
107
|
+
CsvParseOptions::default()
|
108
|
+
.with_separator(separator.as_bytes()[0])
|
109
|
+
.with_encoding(encoding.0)
|
110
|
+
.with_missing_is_null(!missing_utf8_is_empty_string)
|
111
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
112
|
+
.with_null_values(null_values)
|
113
|
+
.with_try_parse_dates(try_parse_dates)
|
114
|
+
.with_quote_char(quote_char)
|
115
|
+
.with_eol_char(eol_char)
|
116
|
+
.with_truncate_ragged_lines(truncate_ragged_lines)
|
117
|
+
.with_decimal_comma(decimal_comma),
|
118
|
+
)
|
119
|
+
.into_reader_with_file_handle(reader);
|
113
120
|
|
114
|
-
let reader =
|
115
|
-
|
116
|
-
|
117
|
-
.map_err(RbPolarsErr::from)?;
|
118
|
-
BatchedReader::Read(reader)
|
119
|
-
} else {
|
120
|
-
let reader = reader
|
121
|
-
.batched_mmap(overwrite_dtype.map(Arc::new))
|
122
|
-
.map_err(RbPolarsErr::from)?;
|
123
|
-
BatchedReader::MMap(reader)
|
124
|
-
};
|
121
|
+
let reader = reader
|
122
|
+
.batched(overwrite_dtype.map(Arc::new))
|
123
|
+
.map_err(RbPolarsErr::from)?;
|
125
124
|
|
126
125
|
Ok(RbBatchedCsv {
|
127
|
-
reader: RefCell::new(reader),
|
126
|
+
reader: RefCell::new(Mutex::new(reader)),
|
128
127
|
})
|
129
128
|
}
|
130
129
|
|
131
130
|
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
132
|
-
let
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
131
|
+
let reader = &self.reader;
|
132
|
+
let batches = reader
|
133
|
+
.borrow()
|
134
|
+
.lock()
|
135
|
+
.map_err(|e| RbPolarsErr::other(e.to_string()))?
|
136
|
+
.next_batches(n)
|
137
|
+
.map_err(RbPolarsErr::from)?;
|
137
138
|
|
138
139
|
Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
|
139
140
|
}
|
@@ -0,0 +1,261 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value,
|
4
|
+
};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
7
|
+
|
8
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
9
|
+
|
10
|
+
use crate::error::RbOverflowError;
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
any_value_into_rb_object(self.0, ruby)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
rb_object_to_any_value(ob, true).map(Wrap)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
27
|
+
match av {
|
28
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
29
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
30
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
31
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
32
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
33
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
34
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
35
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
37
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
38
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
39
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
40
|
+
AnyValue::String(v) => ruby.into_value(v),
|
41
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
42
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
43
|
+
let s = if arr.is_null() {
|
44
|
+
rev.get(idx)
|
45
|
+
} else {
|
46
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
47
|
+
};
|
48
|
+
s.into_value()
|
49
|
+
}
|
50
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
51
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
|
+
let time_unit = time_unit.to_ascii();
|
53
|
+
utils()
|
54
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
55
|
+
.unwrap()
|
56
|
+
}
|
57
|
+
AnyValue::Duration(v, time_unit) => {
|
58
|
+
let time_unit = time_unit.to_ascii();
|
59
|
+
utils()
|
60
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
61
|
+
.unwrap()
|
62
|
+
}
|
63
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
64
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
65
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
67
|
+
AnyValue::Object(v) => {
|
68
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
69
|
+
object.to_object()
|
70
|
+
}
|
71
|
+
AnyValue::ObjectOwned(v) => {
|
72
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
73
|
+
object.to_object()
|
74
|
+
}
|
75
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
76
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
77
|
+
AnyValue::Decimal(v, scale) => utils()
|
78
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
79
|
+
.unwrap(),
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
|
84
|
+
// Conversion functions.
|
85
|
+
fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
86
|
+
Ok(AnyValue::Null)
|
87
|
+
}
|
88
|
+
|
89
|
+
fn get_bool(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
90
|
+
let b = bool::try_convert(ob)?;
|
91
|
+
Ok(AnyValue::Boolean(b))
|
92
|
+
}
|
93
|
+
|
94
|
+
fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
|
95
|
+
if let Ok(v) = i64::try_convert(ob) {
|
96
|
+
Ok(AnyValue::Int64(v))
|
97
|
+
} else if let Ok(v) = u64::try_convert(ob) {
|
98
|
+
Ok(AnyValue::UInt64(v))
|
99
|
+
} else if !strict {
|
100
|
+
let f = f64::try_convert(ob)?;
|
101
|
+
Ok(AnyValue::Float64(f))
|
102
|
+
} else {
|
103
|
+
Err(RbOverflowError::new_err(format!(
|
104
|
+
"int value too large for Polars integer types: {ob}"
|
105
|
+
)))
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
fn get_float(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
110
|
+
Ok(AnyValue::Float64(f64::try_convert(ob)?))
|
111
|
+
}
|
112
|
+
|
113
|
+
fn get_str(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
114
|
+
let v = RString::from_value(ob).unwrap();
|
115
|
+
if v.enc_get() == Index::utf8() {
|
116
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()))
|
117
|
+
} else {
|
118
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()))
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
|
+
let v = RArray::from_value(ob).unwrap();
|
124
|
+
if v.is_empty() {
|
125
|
+
Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
|
126
|
+
} else {
|
127
|
+
let list = v;
|
128
|
+
|
129
|
+
let mut avs = Vec::with_capacity(25);
|
130
|
+
let mut iter = list.into_iter();
|
131
|
+
|
132
|
+
for item in (&mut iter).take(25) {
|
133
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
134
|
+
}
|
135
|
+
|
136
|
+
let (dtype, _n_types) =
|
137
|
+
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
138
|
+
|
139
|
+
// push the rest
|
140
|
+
avs.reserve(list.len());
|
141
|
+
for item in iter {
|
142
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
|
+
}
|
144
|
+
|
145
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
146
|
+
.map_err(RbPolarsErr::from)?;
|
147
|
+
Ok(AnyValue::List(s))
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
fn get_list_from_series(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
152
|
+
let s = super::get_series(ob)?;
|
153
|
+
Ok(AnyValue::List(s))
|
154
|
+
}
|
155
|
+
|
156
|
+
fn get_struct(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
157
|
+
let dict = RHash::from_value(ob).unwrap();
|
158
|
+
let len = dict.len();
|
159
|
+
let mut keys = Vec::with_capacity(len);
|
160
|
+
let mut vals = Vec::with_capacity(len);
|
161
|
+
dict.foreach(|k: Value, v: Value| {
|
162
|
+
let key = String::try_convert(k)?;
|
163
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
|
+
let dtype = DataType::from(&val);
|
165
|
+
keys.push(Field::new(&key, dtype));
|
166
|
+
vals.push(val);
|
167
|
+
Ok(ForEach::Continue)
|
168
|
+
})?;
|
169
|
+
Ok(AnyValue::StructOwned(Box::new((vals, keys))))
|
170
|
+
}
|
171
|
+
|
172
|
+
fn get_date(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
173
|
+
// convert to DateTime for UTC
|
174
|
+
let v = ob
|
175
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
176
|
+
.funcall::<_, _, Value>("to_time", ())?
|
177
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
178
|
+
Ok(AnyValue::Date((v / 86400) as i32))
|
179
|
+
}
|
180
|
+
|
181
|
+
fn get_time(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
182
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
183
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
184
|
+
let v = sec * 1_000_000_000 + nsec;
|
185
|
+
// TODO support time zone when possible
|
186
|
+
// https://github.com/pola-rs/polars/issues/9103
|
187
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
|
188
|
+
}
|
189
|
+
|
190
|
+
fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
191
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
192
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
193
|
+
Ok(AnyValue::Datetime(
|
194
|
+
sec * 1_000_000_000 + nsec,
|
195
|
+
TimeUnit::Nanoseconds,
|
196
|
+
&None,
|
197
|
+
))
|
198
|
+
}
|
199
|
+
|
200
|
+
fn get_decimal(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
201
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
202
|
+
let exp = exp - (digits.len() as i32);
|
203
|
+
match digits.parse::<i128>() {
|
204
|
+
Ok(mut v) => {
|
205
|
+
let scale = if exp > 0 {
|
206
|
+
v = 10_i128
|
207
|
+
.checked_pow(exp as u32)
|
208
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
209
|
+
0
|
210
|
+
} else {
|
211
|
+
(-exp) as usize
|
212
|
+
};
|
213
|
+
Some((v, scale))
|
214
|
+
}
|
215
|
+
Err(_) => None,
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
220
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
221
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
222
|
+
})?;
|
223
|
+
if sign < 0 {
|
224
|
+
// TODO better error
|
225
|
+
v = v.checked_neg().unwrap();
|
226
|
+
}
|
227
|
+
Ok(AnyValue::Decimal(v, scale))
|
228
|
+
}
|
229
|
+
|
230
|
+
if ob.is_nil() {
|
231
|
+
get_null(ob, strict)
|
232
|
+
} else if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
233
|
+
get_bool(ob, strict)
|
234
|
+
} else if ob.is_kind_of(class::integer()) {
|
235
|
+
get_int(ob, strict)
|
236
|
+
} else if ob.is_kind_of(class::float()) {
|
237
|
+
get_float(ob, strict)
|
238
|
+
} else if ob.is_kind_of(class::string()) {
|
239
|
+
get_str(ob, strict)
|
240
|
+
} else if ob.is_kind_of(class::array()) {
|
241
|
+
get_list(ob, strict)
|
242
|
+
} else if ob.is_kind_of(class::hash()) {
|
243
|
+
get_struct(ob, strict)
|
244
|
+
} else if ob.respond_to("_s", true)? {
|
245
|
+
get_list_from_series(ob, strict)
|
246
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
247
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
248
|
+
get_time(ob, strict)
|
249
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
250
|
+
get_datetime(ob, strict)
|
251
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
252
|
+
get_date(ob, strict)
|
253
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
254
|
+
get_decimal(ob, strict)
|
255
|
+
} else {
|
256
|
+
Err(RbPolarsErr::other(format!(
|
257
|
+
"object type not supported {:?}",
|
258
|
+
ob
|
259
|
+
)))
|
260
|
+
}
|
261
|
+
}
|
@@ -11,8 +11,8 @@ impl TryConvert for Wrap<StringChunked> {
|
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
12
|
let mut builder = StringChunkedBuilder::new("", len);
|
13
13
|
|
14
|
-
for res in seq.
|
15
|
-
let item = res
|
14
|
+
for res in seq.into_iter() {
|
15
|
+
let item = res;
|
16
16
|
match String::try_convert(item) {
|
17
17
|
Ok(val) => builder.append_value(&val),
|
18
18
|
Err(_) => builder.append_null(),
|
@@ -27,8 +27,8 @@ impl TryConvert for Wrap<BinaryChunked> {
|
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
28
|
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
29
|
|
30
|
-
for res in seq.
|
31
|
-
let item = res
|
30
|
+
for res in seq.into_iter() {
|
31
|
+
let item = res;
|
32
32
|
match RString::try_convert(item) {
|
33
33
|
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
34
|
Err(_) => builder.append_null(),
|