polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Polars
|
1
|
+
# Ruby Polars
|
2
2
|
|
3
3
|
:fire: Blazingly fast DataFrames for Ruby, powered by [Polars](https://github.com/pola-rs/polars)
|
4
4
|
|
5
|
-
[](https://github.com/ankane/ruby-polars/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -420,16 +420,16 @@ View the [changelog](CHANGELOG.md)
|
|
420
420
|
|
421
421
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
422
422
|
|
423
|
-
- [Report bugs](https://github.com/ankane/polars
|
424
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/polars
|
423
|
+
- [Report bugs](https://github.com/ankane/ruby-polars/issues)
|
424
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/ruby-polars/pulls)
|
425
425
|
- Write, clarify, or fix documentation
|
426
426
|
- Suggest or add new features
|
427
427
|
|
428
428
|
To get started with development:
|
429
429
|
|
430
430
|
```sh
|
431
|
-
git clone https://github.com/ankane/polars
|
432
|
-
cd polars
|
431
|
+
git clone https://github.com/ankane/ruby-polars.git
|
432
|
+
cd ruby-polars
|
433
433
|
bundle install
|
434
434
|
bundle exec rake compile
|
435
435
|
bundle exec rake test
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.12.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -14,15 +14,15 @@ crate-type = ["cdylib"]
|
|
14
14
|
ahash = "0.8"
|
15
15
|
chrono = "0.4"
|
16
16
|
either = "1.8"
|
17
|
-
magnus = "0.
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-parquet = "=0.
|
20
|
-
polars-utils = "=0.
|
17
|
+
magnus = "0.7"
|
18
|
+
polars-core = "=0.41.3"
|
19
|
+
polars-parquet = "=0.41.3"
|
20
|
+
polars-utils = "=0.41.3"
|
21
21
|
serde_json = "1"
|
22
22
|
smartstring = "1"
|
23
23
|
|
24
24
|
[dependencies.polars]
|
25
|
-
version = "=0.
|
25
|
+
version = "=0.41.3"
|
26
26
|
features = [
|
27
27
|
"abs",
|
28
28
|
"approx_unique",
|
@@ -41,7 +41,6 @@ features = [
|
|
41
41
|
"cumulative_eval",
|
42
42
|
"cutqcut",
|
43
43
|
"dataframe_arithmetic",
|
44
|
-
"date_offset",
|
45
44
|
"diagonal_concat",
|
46
45
|
"diff",
|
47
46
|
"dot_product",
|
@@ -55,6 +54,8 @@ features = [
|
|
55
54
|
"fmt",
|
56
55
|
"interpolate",
|
57
56
|
"ipc",
|
57
|
+
"ipc_streaming",
|
58
|
+
"is_between",
|
58
59
|
"is_first_distinct",
|
59
60
|
"is_in",
|
60
61
|
"is_last_distinct",
|
@@ -73,6 +74,9 @@ features = [
|
|
73
74
|
"meta",
|
74
75
|
"mode",
|
75
76
|
"moment",
|
77
|
+
"month_start",
|
78
|
+
"month_end",
|
79
|
+
"offset_by",
|
76
80
|
"object",
|
77
81
|
"parquet",
|
78
82
|
"partition_by",
|
@@ -91,6 +95,7 @@ features = [
|
|
91
95
|
"replace",
|
92
96
|
"rle",
|
93
97
|
"rolling_window",
|
98
|
+
"rolling_window_by",
|
94
99
|
"round_series",
|
95
100
|
"row_hash",
|
96
101
|
"search_sorted",
|
@@ -1,23 +1,19 @@
|
|
1
|
+
use std::cell::RefCell;
|
2
|
+
use std::path::PathBuf;
|
3
|
+
use std::sync::Mutex;
|
4
|
+
|
1
5
|
use magnus::{prelude::*, RArray, Value};
|
6
|
+
use polars::io::csv::read::OwnedBatchedCsvReader;
|
2
7
|
use polars::io::mmap::MmapBytesReader;
|
3
8
|
use polars::io::RowIndex;
|
4
|
-
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
5
9
|
use polars::prelude::*;
|
6
|
-
use std::cell::RefCell;
|
7
|
-
use std::path::PathBuf;
|
8
10
|
|
9
11
|
use crate::conversion::*;
|
10
|
-
use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
|
11
12
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
12
13
|
|
13
|
-
pub enum BatchedReader {
|
14
|
-
MMap(OwnedBatchedCsvReaderMmap),
|
15
|
-
Read(OwnedBatchedCsvReader),
|
16
|
-
}
|
17
|
-
|
18
14
|
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
19
15
|
pub struct RbBatchedCsv {
|
20
|
-
pub reader: RefCell<
|
16
|
+
pub reader: RefCell<Mutex<OwnedBatchedCsvReader>>,
|
21
17
|
}
|
22
18
|
|
23
19
|
impl RbBatchedCsv {
|
@@ -44,19 +40,23 @@ impl RbBatchedCsv {
|
|
44
40
|
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
45
41
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
46
42
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[18])?;
|
47
|
-
let
|
48
|
-
let
|
49
|
-
let
|
50
|
-
let
|
51
|
-
let
|
52
|
-
let
|
43
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[19])?;
|
44
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
45
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
46
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
47
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
48
|
+
let eol_char = String::try_convert(arguments[24])?;
|
49
|
+
let raise_if_empty = bool::try_convert(arguments[25])?;
|
50
|
+
let truncate_ragged_lines = bool::try_convert(arguments[26])?;
|
51
|
+
let decimal_comma = bool::try_convert(arguments[27])?;
|
53
52
|
// end arguments
|
54
53
|
|
55
54
|
let null_values = null_values.map(|w| w.0);
|
56
55
|
let eol_char = eol_char.as_bytes()[0];
|
57
|
-
|
58
|
-
|
59
|
-
|
56
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
57
|
+
name: Arc::from(name.as_str()),
|
58
|
+
offset,
|
59
|
+
});
|
60
60
|
let quote_char = if let Some(s) = quote_char {
|
61
61
|
if s.is_empty() {
|
62
62
|
None
|
@@ -86,54 +86,55 @@ impl RbBatchedCsv {
|
|
86
86
|
|
87
87
|
let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
|
88
88
|
let reader = Box::new(file) as Box<dyn MmapBytesReader>;
|
89
|
-
let reader =
|
90
|
-
.
|
91
|
-
.
|
89
|
+
let reader = CsvReadOptions::default()
|
90
|
+
.with_infer_schema_length(infer_schema_length)
|
91
|
+
.with_has_header(has_header)
|
92
92
|
.with_n_rows(n_rows)
|
93
|
-
.with_separator(separator.as_bytes()[0])
|
94
93
|
.with_skip_rows(skip_rows)
|
95
94
|
.with_ignore_errors(ignore_errors)
|
96
|
-
.with_projection(projection)
|
95
|
+
.with_projection(projection.map(Arc::new))
|
97
96
|
.with_rechunk(rechunk)
|
98
97
|
.with_chunk_size(chunk_size)
|
99
|
-
.
|
100
|
-
.with_columns(columns)
|
98
|
+
.with_columns(columns.map(Arc::from))
|
101
99
|
.with_n_threads(n_threads)
|
102
|
-
.
|
103
|
-
.
|
104
|
-
.with_comment_prefix(comment_prefix.as_deref())
|
105
|
-
.with_null_values(null_values)
|
106
|
-
.with_try_parse_dates(try_parse_dates)
|
107
|
-
.with_quote_char(quote_char)
|
108
|
-
.with_end_of_line_char(eol_char)
|
100
|
+
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
101
|
+
.with_low_memory(low_memory)
|
109
102
|
.with_skip_rows_after_header(skip_rows_after_header)
|
110
103
|
.with_row_index(row_index)
|
111
|
-
.
|
112
|
-
.
|
104
|
+
.with_sample_size(sample_size)
|
105
|
+
.with_raise_if_empty(raise_if_empty)
|
106
|
+
.with_parse_options(
|
107
|
+
CsvParseOptions::default()
|
108
|
+
.with_separator(separator.as_bytes()[0])
|
109
|
+
.with_encoding(encoding.0)
|
110
|
+
.with_missing_is_null(!missing_utf8_is_empty_string)
|
111
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
112
|
+
.with_null_values(null_values)
|
113
|
+
.with_try_parse_dates(try_parse_dates)
|
114
|
+
.with_quote_char(quote_char)
|
115
|
+
.with_eol_char(eol_char)
|
116
|
+
.with_truncate_ragged_lines(truncate_ragged_lines)
|
117
|
+
.with_decimal_comma(decimal_comma),
|
118
|
+
)
|
119
|
+
.into_reader_with_file_handle(reader);
|
113
120
|
|
114
|
-
let reader =
|
115
|
-
|
116
|
-
|
117
|
-
.map_err(RbPolarsErr::from)?;
|
118
|
-
BatchedReader::Read(reader)
|
119
|
-
} else {
|
120
|
-
let reader = reader
|
121
|
-
.batched_mmap(overwrite_dtype.map(Arc::new))
|
122
|
-
.map_err(RbPolarsErr::from)?;
|
123
|
-
BatchedReader::MMap(reader)
|
124
|
-
};
|
121
|
+
let reader = reader
|
122
|
+
.batched(overwrite_dtype.map(Arc::new))
|
123
|
+
.map_err(RbPolarsErr::from)?;
|
125
124
|
|
126
125
|
Ok(RbBatchedCsv {
|
127
|
-
reader: RefCell::new(reader),
|
126
|
+
reader: RefCell::new(Mutex::new(reader)),
|
128
127
|
})
|
129
128
|
}
|
130
129
|
|
131
130
|
pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
|
132
|
-
let
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
131
|
+
let reader = &self.reader;
|
132
|
+
let batches = reader
|
133
|
+
.borrow()
|
134
|
+
.lock()
|
135
|
+
.map_err(|e| RbPolarsErr::other(e.to_string()))?
|
136
|
+
.next_batches(n)
|
137
|
+
.map_err(RbPolarsErr::from)?;
|
137
138
|
|
138
139
|
Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
|
139
140
|
}
|
@@ -0,0 +1,261 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value,
|
4
|
+
};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
7
|
+
|
8
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
9
|
+
|
10
|
+
use crate::error::RbOverflowError;
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
any_value_into_rb_object(self.0, ruby)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
rb_object_to_any_value(ob, true).map(Wrap)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
27
|
+
match av {
|
28
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
29
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
30
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
31
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
32
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
33
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
34
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
35
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
37
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
38
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
39
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
40
|
+
AnyValue::String(v) => ruby.into_value(v),
|
41
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
42
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
43
|
+
let s = if arr.is_null() {
|
44
|
+
rev.get(idx)
|
45
|
+
} else {
|
46
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
47
|
+
};
|
48
|
+
s.into_value()
|
49
|
+
}
|
50
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
51
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
|
+
let time_unit = time_unit.to_ascii();
|
53
|
+
utils()
|
54
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
55
|
+
.unwrap()
|
56
|
+
}
|
57
|
+
AnyValue::Duration(v, time_unit) => {
|
58
|
+
let time_unit = time_unit.to_ascii();
|
59
|
+
utils()
|
60
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
61
|
+
.unwrap()
|
62
|
+
}
|
63
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
64
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
65
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
66
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
67
|
+
AnyValue::Object(v) => {
|
68
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
69
|
+
object.to_object()
|
70
|
+
}
|
71
|
+
AnyValue::ObjectOwned(v) => {
|
72
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
73
|
+
object.to_object()
|
74
|
+
}
|
75
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
76
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
77
|
+
AnyValue::Decimal(v, scale) => utils()
|
78
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
79
|
+
.unwrap(),
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
|
84
|
+
// Conversion functions.
|
85
|
+
fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
86
|
+
Ok(AnyValue::Null)
|
87
|
+
}
|
88
|
+
|
89
|
+
fn get_bool(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
90
|
+
let b = bool::try_convert(ob)?;
|
91
|
+
Ok(AnyValue::Boolean(b))
|
92
|
+
}
|
93
|
+
|
94
|
+
fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
|
95
|
+
if let Ok(v) = i64::try_convert(ob) {
|
96
|
+
Ok(AnyValue::Int64(v))
|
97
|
+
} else if let Ok(v) = u64::try_convert(ob) {
|
98
|
+
Ok(AnyValue::UInt64(v))
|
99
|
+
} else if !strict {
|
100
|
+
let f = f64::try_convert(ob)?;
|
101
|
+
Ok(AnyValue::Float64(f))
|
102
|
+
} else {
|
103
|
+
Err(RbOverflowError::new_err(format!(
|
104
|
+
"int value too large for Polars integer types: {ob}"
|
105
|
+
)))
|
106
|
+
}
|
107
|
+
}
|
108
|
+
|
109
|
+
fn get_float(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
110
|
+
Ok(AnyValue::Float64(f64::try_convert(ob)?))
|
111
|
+
}
|
112
|
+
|
113
|
+
fn get_str(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
114
|
+
let v = RString::from_value(ob).unwrap();
|
115
|
+
if v.enc_get() == Index::utf8() {
|
116
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()))
|
117
|
+
} else {
|
118
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()))
|
119
|
+
}
|
120
|
+
}
|
121
|
+
|
122
|
+
fn get_list(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
123
|
+
let v = RArray::from_value(ob).unwrap();
|
124
|
+
if v.is_empty() {
|
125
|
+
Ok(AnyValue::List(Series::new_empty("", &DataType::Null)))
|
126
|
+
} else {
|
127
|
+
let list = v;
|
128
|
+
|
129
|
+
let mut avs = Vec::with_capacity(25);
|
130
|
+
let mut iter = list.into_iter();
|
131
|
+
|
132
|
+
for item in (&mut iter).take(25) {
|
133
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
134
|
+
}
|
135
|
+
|
136
|
+
let (dtype, _n_types) =
|
137
|
+
any_values_to_supertype_and_n_dtypes(&avs).map_err(RbPolarsErr::from)?;
|
138
|
+
|
139
|
+
// push the rest
|
140
|
+
avs.reserve(list.len());
|
141
|
+
for item in iter {
|
142
|
+
avs.push(Wrap::<AnyValue>::try_convert(item)?.0)
|
143
|
+
}
|
144
|
+
|
145
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
146
|
+
.map_err(RbPolarsErr::from)?;
|
147
|
+
Ok(AnyValue::List(s))
|
148
|
+
}
|
149
|
+
}
|
150
|
+
|
151
|
+
fn get_list_from_series(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
152
|
+
let s = super::get_series(ob)?;
|
153
|
+
Ok(AnyValue::List(s))
|
154
|
+
}
|
155
|
+
|
156
|
+
fn get_struct(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
157
|
+
let dict = RHash::from_value(ob).unwrap();
|
158
|
+
let len = dict.len();
|
159
|
+
let mut keys = Vec::with_capacity(len);
|
160
|
+
let mut vals = Vec::with_capacity(len);
|
161
|
+
dict.foreach(|k: Value, v: Value| {
|
162
|
+
let key = String::try_convert(k)?;
|
163
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
164
|
+
let dtype = DataType::from(&val);
|
165
|
+
keys.push(Field::new(&key, dtype));
|
166
|
+
vals.push(val);
|
167
|
+
Ok(ForEach::Continue)
|
168
|
+
})?;
|
169
|
+
Ok(AnyValue::StructOwned(Box::new((vals, keys))))
|
170
|
+
}
|
171
|
+
|
172
|
+
fn get_date(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
173
|
+
// convert to DateTime for UTC
|
174
|
+
let v = ob
|
175
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
176
|
+
.funcall::<_, _, Value>("to_time", ())?
|
177
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
178
|
+
Ok(AnyValue::Date((v / 86400) as i32))
|
179
|
+
}
|
180
|
+
|
181
|
+
fn get_time(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
182
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
183
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
184
|
+
let v = sec * 1_000_000_000 + nsec;
|
185
|
+
// TODO support time zone when possible
|
186
|
+
// https://github.com/pola-rs/polars/issues/9103
|
187
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
|
188
|
+
}
|
189
|
+
|
190
|
+
fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
191
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
192
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
193
|
+
Ok(AnyValue::Datetime(
|
194
|
+
sec * 1_000_000_000 + nsec,
|
195
|
+
TimeUnit::Nanoseconds,
|
196
|
+
&None,
|
197
|
+
))
|
198
|
+
}
|
199
|
+
|
200
|
+
fn get_decimal(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
201
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
202
|
+
let exp = exp - (digits.len() as i32);
|
203
|
+
match digits.parse::<i128>() {
|
204
|
+
Ok(mut v) => {
|
205
|
+
let scale = if exp > 0 {
|
206
|
+
v = 10_i128
|
207
|
+
.checked_pow(exp as u32)
|
208
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
209
|
+
0
|
210
|
+
} else {
|
211
|
+
(-exp) as usize
|
212
|
+
};
|
213
|
+
Some((v, scale))
|
214
|
+
}
|
215
|
+
Err(_) => None,
|
216
|
+
}
|
217
|
+
}
|
218
|
+
|
219
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
220
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
221
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
222
|
+
})?;
|
223
|
+
if sign < 0 {
|
224
|
+
// TODO better error
|
225
|
+
v = v.checked_neg().unwrap();
|
226
|
+
}
|
227
|
+
Ok(AnyValue::Decimal(v, scale))
|
228
|
+
}
|
229
|
+
|
230
|
+
if ob.is_nil() {
|
231
|
+
get_null(ob, strict)
|
232
|
+
} else if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
233
|
+
get_bool(ob, strict)
|
234
|
+
} else if ob.is_kind_of(class::integer()) {
|
235
|
+
get_int(ob, strict)
|
236
|
+
} else if ob.is_kind_of(class::float()) {
|
237
|
+
get_float(ob, strict)
|
238
|
+
} else if ob.is_kind_of(class::string()) {
|
239
|
+
get_str(ob, strict)
|
240
|
+
} else if ob.is_kind_of(class::array()) {
|
241
|
+
get_list(ob, strict)
|
242
|
+
} else if ob.is_kind_of(class::hash()) {
|
243
|
+
get_struct(ob, strict)
|
244
|
+
} else if ob.respond_to("_s", true)? {
|
245
|
+
get_list_from_series(ob, strict)
|
246
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
247
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
248
|
+
get_time(ob, strict)
|
249
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
250
|
+
get_datetime(ob, strict)
|
251
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
252
|
+
get_date(ob, strict)
|
253
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
254
|
+
get_decimal(ob, strict)
|
255
|
+
} else {
|
256
|
+
Err(RbPolarsErr::other(format!(
|
257
|
+
"object type not supported {:?}",
|
258
|
+
ob
|
259
|
+
)))
|
260
|
+
}
|
261
|
+
}
|
@@ -11,8 +11,8 @@ impl TryConvert for Wrap<StringChunked> {
|
|
11
11
|
let (seq, len) = get_rbseq(obj)?;
|
12
12
|
let mut builder = StringChunkedBuilder::new("", len);
|
13
13
|
|
14
|
-
for res in seq.
|
15
|
-
let item = res
|
14
|
+
for res in seq.into_iter() {
|
15
|
+
let item = res;
|
16
16
|
match String::try_convert(item) {
|
17
17
|
Ok(val) => builder.append_value(&val),
|
18
18
|
Err(_) => builder.append_null(),
|
@@ -27,8 +27,8 @@ impl TryConvert for Wrap<BinaryChunked> {
|
|
27
27
|
let (seq, len) = get_rbseq(obj)?;
|
28
28
|
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
29
|
|
30
|
-
for res in seq.
|
31
|
-
let item = res
|
30
|
+
for res in seq.into_iter() {
|
31
|
+
let item = res;
|
32
32
|
match RString::try_convert(item) {
|
33
33
|
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
34
|
Err(_) => builder.append_null(),
|