polars-df 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +125 -28
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +16 -11
- data/ext/polars/src/dataframe/io.rs +73 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +13 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +45 -14
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
data/LICENSE.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
2
|
Copyright (c) 2022-2024 Andrew Kane
|
3
|
+
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
3
4
|
|
4
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -17,8 +17,7 @@ gem "polars-df"
|
|
17
17
|
This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
Polars.
|
21
|
-
.lazy
|
20
|
+
Polars.scan_csv("iris.csv")
|
22
21
|
.filter(Polars.col("sepal_length") > 5)
|
23
22
|
.group_by("species")
|
24
23
|
.agg(Polars.all.sum)
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.15.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,16 +12,20 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.44.2" }
|
16
|
+
bytes = "1"
|
15
17
|
chrono = "0.4"
|
16
18
|
either = "1.8"
|
17
19
|
magnus = "0.7"
|
18
|
-
polars-core = "=0.
|
19
|
-
polars-
|
20
|
-
polars-
|
20
|
+
polars-core = "=0.44.2"
|
21
|
+
polars-plan = "=0.44.2"
|
22
|
+
polars-parquet = "=0.44.2"
|
23
|
+
polars-utils = "=0.44.2"
|
24
|
+
regex = "1"
|
21
25
|
serde_json = "1"
|
22
26
|
|
23
27
|
[dependencies.polars]
|
24
|
-
version = "=0.
|
28
|
+
version = "=0.44.2"
|
25
29
|
features = [
|
26
30
|
"abs",
|
27
31
|
"approx_unique",
|
@@ -30,7 +34,11 @@ features = [
|
|
30
34
|
"array_count",
|
31
35
|
"asof_join",
|
32
36
|
"avro",
|
37
|
+
"aws",
|
38
|
+
"azure",
|
33
39
|
"binary_encoding",
|
40
|
+
"business",
|
41
|
+
"cloud",
|
34
42
|
"concat_str",
|
35
43
|
"cov",
|
36
44
|
"cross_join",
|
@@ -51,6 +59,8 @@ features = [
|
|
51
59
|
"extract_jsonpath",
|
52
60
|
"find_many",
|
53
61
|
"fmt",
|
62
|
+
"gcp",
|
63
|
+
"http",
|
54
64
|
"interpolate",
|
55
65
|
"ipc",
|
56
66
|
"ipc_streaming",
|
@@ -34,8 +34,7 @@ impl RbBatchedCsv {
|
|
34
34
|
let n_threads = Option::<usize>::try_convert(arguments[11])?;
|
35
35
|
let path = PathBuf::try_convert(arguments[12])?;
|
36
36
|
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
|
37
|
-
|
38
|
-
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
37
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
|
39
38
|
let low_memory = bool::try_convert(arguments[15])?;
|
40
39
|
let comment_prefix = Option::<String>::try_convert(arguments[16])?;
|
41
40
|
let quote_char = Option::<String>::try_convert(arguments[17])?;
|
@@ -44,11 +43,10 @@ impl RbBatchedCsv {
|
|
44
43
|
let try_parse_dates = bool::try_convert(arguments[20])?;
|
45
44
|
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
46
45
|
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
47
|
-
let
|
48
|
-
let
|
49
|
-
let
|
50
|
-
let
|
51
|
-
let decimal_comma = bool::try_convert(arguments[27])?;
|
46
|
+
let eol_char = String::try_convert(arguments[23])?;
|
47
|
+
let raise_if_empty = bool::try_convert(arguments[24])?;
|
48
|
+
let truncate_ragged_lines = bool::try_convert(arguments[25])?;
|
49
|
+
let decimal_comma = bool::try_convert(arguments[26])?;
|
52
50
|
// end arguments
|
53
51
|
|
54
52
|
let null_values = null_values.map(|w| w.0);
|
@@ -84,7 +82,7 @@ impl RbBatchedCsv {
|
|
84
82
|
.collect::<Vec<_>>()
|
85
83
|
});
|
86
84
|
|
87
|
-
let file = std::fs::File::open(path).map_err(RbPolarsErr::
|
85
|
+
let file = std::fs::File::open(path).map_err(RbPolarsErr::from)?;
|
88
86
|
let reader = Box::new(file) as Box<dyn MmapBytesReader>;
|
89
87
|
let reader = CsvReadOptions::default()
|
90
88
|
.with_infer_schema_length(infer_schema_length)
|
@@ -101,7 +99,6 @@ impl RbBatchedCsv {
|
|
101
99
|
.with_low_memory(low_memory)
|
102
100
|
.with_skip_rows_after_header(skip_rows_after_header)
|
103
101
|
.with_row_index(row_index)
|
104
|
-
.with_sample_size(sample_size)
|
105
102
|
.with_raise_if_empty(raise_if_empty)
|
106
103
|
.with_parse_options(
|
107
104
|
CsvParseOptions::default()
|
@@ -132,7 +129,7 @@ impl RbBatchedCsv {
|
|
132
129
|
let batches = reader
|
133
130
|
.borrow()
|
134
131
|
.lock()
|
135
|
-
.map_err(|e| RbPolarsErr::
|
132
|
+
.map_err(|e| RbPolarsErr::Other(e.to_string()))?
|
136
133
|
.next_batches(n)
|
137
134
|
.map_err(RbPolarsErr::from)?;
|
138
135
|
|
@@ -7,9 +7,9 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
|
7
7
|
|
8
8
|
use super::{struct_dict, ObjectValue, Wrap};
|
9
9
|
|
10
|
-
use crate::
|
10
|
+
use crate::exceptions::RbOverflowError;
|
11
11
|
use crate::rb_modules::utils;
|
12
|
-
use crate::{RbPolarsErr, RbResult, RbSeries};
|
12
|
+
use crate::{RbErr, RbPolarsErr, RbResult, RbSeries};
|
13
13
|
|
14
14
|
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
15
|
fn into_value_with(self, ruby: &Ruby) -> Value {
|
@@ -47,15 +47,20 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
47
47
|
};
|
48
48
|
s.into_value()
|
49
49
|
}
|
50
|
+
AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
|
51
|
+
let s = if arr.is_null() {
|
52
|
+
rev.get(idx)
|
53
|
+
} else {
|
54
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
55
|
+
};
|
56
|
+
s.into_value()
|
57
|
+
}
|
50
58
|
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
51
59
|
AnyValue::Datetime(v, time_unit, time_zone) => {
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
(v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
|
57
|
-
)
|
58
|
-
.unwrap()
|
60
|
+
datetime_to_rb_object(v, time_unit, time_zone)
|
61
|
+
}
|
62
|
+
AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
|
63
|
+
datetime_to_rb_object(v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
|
59
64
|
}
|
60
65
|
AnyValue::Duration(v, time_unit) => {
|
61
66
|
let time_unit = time_unit.to_ascii();
|
@@ -69,11 +74,11 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
69
74
|
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
70
75
|
AnyValue::Object(v) => {
|
71
76
|
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
72
|
-
object.
|
77
|
+
object.to_value()
|
73
78
|
}
|
74
79
|
AnyValue::ObjectOwned(v) => {
|
75
80
|
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
76
|
-
object.
|
81
|
+
object.to_value()
|
77
82
|
}
|
78
83
|
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
79
84
|
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
@@ -83,6 +88,13 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
83
88
|
}
|
84
89
|
}
|
85
90
|
|
91
|
+
fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value {
|
92
|
+
let tu = tu.to_ascii();
|
93
|
+
utils()
|
94
|
+
.funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string())))
|
95
|
+
.unwrap()
|
96
|
+
}
|
97
|
+
|
86
98
|
pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
|
87
99
|
// Conversion functions.
|
88
100
|
fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
@@ -164,9 +176,8 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
164
176
|
let len = dict.len();
|
165
177
|
let mut keys = Vec::with_capacity(len);
|
166
178
|
let mut vals = Vec::with_capacity(len);
|
167
|
-
dict.foreach(|
|
168
|
-
let
|
169
|
-
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
179
|
+
dict.foreach(|key: String, val: Wrap<AnyValue>| {
|
180
|
+
let val = val.0;
|
170
181
|
let dtype = DataType::from(&val);
|
171
182
|
keys.push(Field::new(key.into(), dtype));
|
172
183
|
vals.push(val);
|
@@ -190,7 +201,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
190
201
|
let v = sec * 1_000_000_000 + nsec;
|
191
202
|
// TODO support time zone when possible
|
192
203
|
// https://github.com/pola-rs/polars/issues/9103
|
193
|
-
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds,
|
204
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, None))
|
194
205
|
}
|
195
206
|
|
196
207
|
fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
|
@@ -199,7 +210,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
199
210
|
Ok(AnyValue::Datetime(
|
200
211
|
sec * 1_000_000_000 + nsec,
|
201
212
|
TimeUnit::Nanoseconds,
|
202
|
-
|
213
|
+
None,
|
203
214
|
))
|
204
215
|
}
|
205
216
|
|
@@ -224,7 +235,9 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
224
235
|
|
225
236
|
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
226
237
|
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
227
|
-
RbPolarsErr::
|
238
|
+
RbErr::from(RbPolarsErr::Other(
|
239
|
+
"BigDecimal is too large to fit in Decimal128".into(),
|
240
|
+
))
|
228
241
|
})?;
|
229
242
|
if sign < 0 {
|
230
243
|
// TODO better error
|
@@ -259,9 +272,6 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
259
272
|
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
260
273
|
get_decimal(ob, strict)
|
261
274
|
} else {
|
262
|
-
Err(RbPolarsErr::
|
263
|
-
"object type not supported {:?}",
|
264
|
-
ob
|
265
|
-
)))
|
275
|
+
Err(RbPolarsErr::Other(format!("object type not supported {:?}", ob)).into())
|
266
276
|
}
|
267
277
|
}
|
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
|
|
2
2
|
mod chunked_array;
|
3
3
|
|
4
4
|
use std::fmt::{Debug, Display, Formatter};
|
5
|
+
use std::fs::File;
|
5
6
|
use std::hash::{Hash, Hasher};
|
6
7
|
use std::num::NonZeroUsize;
|
8
|
+
use std::path::PathBuf;
|
7
9
|
|
8
10
|
use magnus::{
|
9
|
-
class, exception, prelude::*, r_hash::ForEach, value::Opaque,
|
10
|
-
Ruby, Symbol, TryConvert, Value,
|
11
|
+
class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
|
12
|
+
IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
|
11
13
|
};
|
12
14
|
use polars::chunked_array::object::PolarsObjectSafe;
|
13
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
@@ -15,12 +17,15 @@ use polars::datatypes::AnyValue;
|
|
15
17
|
use polars::frame::row::Row;
|
16
18
|
use polars::frame::NullStrategy;
|
17
19
|
use polars::io::avro::AvroCompression;
|
20
|
+
use polars::io::cloud::CloudOptions;
|
18
21
|
use polars::prelude::*;
|
19
22
|
use polars::series::ops::NullBehavior;
|
20
23
|
use polars_core::utils::arrow::array::Array;
|
21
24
|
use polars_core::utils::materialize_dyn_int;
|
25
|
+
use polars_plan::plans::ScanSources;
|
22
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
23
27
|
|
28
|
+
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
24
29
|
use crate::object::OBJECT_NAME;
|
25
30
|
use crate::rb_modules::series;
|
26
31
|
use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
|
|
104
109
|
.collect(),
|
105
110
|
)))
|
106
111
|
} else {
|
107
|
-
Err(
|
108
|
-
"could not extract value from null_values argument".into()
|
109
|
-
|
112
|
+
Err(
|
113
|
+
RbPolarsErr::Other("could not extract value from null_values argument".into())
|
114
|
+
.into(),
|
115
|
+
)
|
110
116
|
}
|
111
117
|
}
|
112
118
|
}
|
@@ -328,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
|
|
328
334
|
)))
|
329
335
|
}
|
330
336
|
}
|
331
|
-
// TODO improve
|
332
337
|
} else if String::try_convert(ob).is_err() {
|
333
338
|
let name = unsafe { ob.class().name() }.into_owned();
|
334
339
|
match name.as_str() {
|
@@ -434,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
|
|
434
439
|
}
|
435
440
|
}
|
436
441
|
|
442
|
+
unsafe impl TryConvertOwned for Wrap<DataType> {}
|
443
|
+
|
437
444
|
impl TryConvert for Wrap<StatisticsOptions> {
|
438
445
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
439
446
|
let mut statistics = StatisticsOptions::empty();
|
@@ -452,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
|
|
452
459
|
}
|
453
460
|
}
|
454
461
|
Ok(ForEach::Continue)
|
455
|
-
})
|
456
|
-
.unwrap();
|
462
|
+
})?;
|
457
463
|
|
458
464
|
Ok(Wrap(statistics))
|
459
465
|
}
|
@@ -478,13 +484,75 @@ impl TryConvert for Wrap<Schema> {
|
|
478
484
|
dict.foreach(|key: String, val: Wrap<DataType>| {
|
479
485
|
schema.push(Ok(Field::new((&*key).into(), val.0)));
|
480
486
|
Ok(ForEach::Continue)
|
481
|
-
})
|
482
|
-
.unwrap();
|
487
|
+
})?;
|
483
488
|
|
484
489
|
Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
|
485
490
|
}
|
486
491
|
}
|
487
492
|
|
493
|
+
impl TryConvert for Wrap<ScanSources> {
|
494
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
495
|
+
let list = RArray::try_convert(ob)?;
|
496
|
+
|
497
|
+
if list.is_empty() {
|
498
|
+
return Ok(Wrap(ScanSources::default()));
|
499
|
+
}
|
500
|
+
|
501
|
+
enum MutableSources {
|
502
|
+
Paths(Vec<PathBuf>),
|
503
|
+
Files(Vec<File>),
|
504
|
+
Buffers(Vec<bytes::Bytes>),
|
505
|
+
}
|
506
|
+
|
507
|
+
let num_items = list.len();
|
508
|
+
let mut iter = list
|
509
|
+
.into_iter()
|
510
|
+
.map(|val| get_ruby_scan_source_input(val, false));
|
511
|
+
|
512
|
+
let Some(first) = iter.next() else {
|
513
|
+
return Ok(Wrap(ScanSources::default()));
|
514
|
+
};
|
515
|
+
|
516
|
+
let mut sources = match first? {
|
517
|
+
RubyScanSourceInput::Path(path) => {
|
518
|
+
let mut sources = Vec::with_capacity(num_items);
|
519
|
+
sources.push(path);
|
520
|
+
MutableSources::Paths(sources)
|
521
|
+
}
|
522
|
+
RubyScanSourceInput::File(file) => {
|
523
|
+
let mut sources = Vec::with_capacity(num_items);
|
524
|
+
sources.push(file);
|
525
|
+
MutableSources::Files(sources)
|
526
|
+
}
|
527
|
+
RubyScanSourceInput::Buffer(buffer) => {
|
528
|
+
let mut sources = Vec::with_capacity(num_items);
|
529
|
+
sources.push(buffer);
|
530
|
+
MutableSources::Buffers(sources)
|
531
|
+
}
|
532
|
+
};
|
533
|
+
|
534
|
+
for source in iter {
|
535
|
+
match (&mut sources, source?) {
|
536
|
+
(MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
|
537
|
+
(MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
|
538
|
+
(MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
|
539
|
+
_ => {
|
540
|
+
return Err(RbTypeError::new_err(
|
541
|
+
"Cannot combine in-memory bytes, paths and files for scan sources"
|
542
|
+
.to_string(),
|
543
|
+
))
|
544
|
+
}
|
545
|
+
}
|
546
|
+
}
|
547
|
+
|
548
|
+
Ok(Wrap(match sources {
|
549
|
+
MutableSources::Paths(i) => ScanSources::Paths(i.into()),
|
550
|
+
MutableSources::Files(i) => ScanSources::Files(i.into()),
|
551
|
+
MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
|
552
|
+
}))
|
553
|
+
}
|
554
|
+
}
|
555
|
+
|
488
556
|
#[derive(Clone)]
|
489
557
|
pub struct ObjectValue {
|
490
558
|
pub inner: Opaque<Value>,
|
@@ -493,7 +561,7 @@ pub struct ObjectValue {
|
|
493
561
|
impl Debug for ObjectValue {
|
494
562
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
495
563
|
f.debug_struct("ObjectValue")
|
496
|
-
.field("inner", &self.
|
564
|
+
.field("inner", &self.to_value())
|
497
565
|
.finish()
|
498
566
|
}
|
499
567
|
}
|
@@ -501,7 +569,7 @@ impl Debug for ObjectValue {
|
|
501
569
|
impl Hash for ObjectValue {
|
502
570
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
503
571
|
let h = self
|
504
|
-
.
|
572
|
+
.to_value()
|
505
573
|
.funcall::<_, _, isize>("hash", ())
|
506
574
|
.expect("should be hashable");
|
507
575
|
state.write_isize(h)
|
@@ -512,7 +580,7 @@ impl Eq for ObjectValue {}
|
|
512
580
|
|
513
581
|
impl PartialEq for ObjectValue {
|
514
582
|
fn eq(&self, other: &Self) -> bool {
|
515
|
-
self.
|
583
|
+
self.to_value().eql(other.to_value()).unwrap_or(false)
|
516
584
|
}
|
517
585
|
}
|
518
586
|
|
@@ -533,7 +601,7 @@ impl TotalHash for ObjectValue {
|
|
533
601
|
|
534
602
|
impl Display for ObjectValue {
|
535
603
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
536
|
-
write!(f, "{}", self.
|
604
|
+
write!(f, "{}", self.to_value())
|
537
605
|
}
|
538
606
|
}
|
539
607
|
|
@@ -561,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
561
629
|
}
|
562
630
|
}
|
563
631
|
|
564
|
-
// TODO remove
|
565
632
|
impl ObjectValue {
|
566
|
-
pub fn
|
567
|
-
|
633
|
+
pub fn to_value(&self) -> Value {
|
634
|
+
self.clone().into_value()
|
568
635
|
}
|
569
636
|
}
|
570
637
|
|
571
638
|
impl IntoValue for ObjectValue {
|
572
|
-
fn into_value_with(self,
|
573
|
-
self.
|
639
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
640
|
+
ruby.get_inner(self.inner)
|
574
641
|
}
|
575
642
|
}
|
576
643
|
|
@@ -587,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
|
|
587
654
|
let parsed = match String::try_convert(ob)?.as_str() {
|
588
655
|
"backward" => AsofStrategy::Backward,
|
589
656
|
"forward" => AsofStrategy::Forward,
|
657
|
+
"nearest" => AsofStrategy::Nearest,
|
590
658
|
v => {
|
591
659
|
return Err(RbValueError::new_err(format!(
|
592
|
-
"strategy must be one of {{'backward', 'forward'}}, got {}",
|
593
|
-
v
|
660
|
+
"asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
|
594
661
|
)))
|
595
662
|
}
|
596
663
|
};
|
@@ -830,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
|
|
830
897
|
}
|
831
898
|
}
|
832
899
|
|
833
|
-
impl TryConvert for Wrap<
|
900
|
+
impl TryConvert for Wrap<QuantileMethod> {
|
834
901
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
835
902
|
let parsed = match String::try_convert(ob)?.as_str() {
|
836
|
-
"lower" =>
|
837
|
-
"higher" =>
|
838
|
-
"nearest" =>
|
839
|
-
"linear" =>
|
840
|
-
"midpoint" =>
|
903
|
+
"lower" => QuantileMethod::Lower,
|
904
|
+
"higher" => QuantileMethod::Higher,
|
905
|
+
"nearest" => QuantileMethod::Nearest,
|
906
|
+
"linear" => QuantileMethod::Linear,
|
907
|
+
"midpoint" => QuantileMethod::Midpoint,
|
841
908
|
v => {
|
842
909
|
return Err(RbValueError::new_err(format!(
|
843
910
|
"interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
|
@@ -1001,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
1001
1068
|
}
|
1002
1069
|
}
|
1003
1070
|
|
1071
|
+
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
1072
|
+
let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
|
1073
|
+
Ok(out)
|
1074
|
+
}
|
1075
|
+
|
1004
1076
|
pub fn parse_fill_null_strategy(
|
1005
1077
|
strategy: &str,
|
1006
1078
|
limit: FillNullLimit,
|
@@ -1071,7 +1143,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
|
|
1071
1143
|
let v = usize::try_convert(ob)?;
|
1072
1144
|
NonZeroUsize::new(v)
|
1073
1145
|
.map(Wrap)
|
1074
|
-
.ok_or(RbValueError::new_err("must be non-zero"
|
1146
|
+
.ok_or(RbValueError::new_err("must be non-zero"))
|
1075
1147
|
}
|
1076
1148
|
}
|
1077
1149
|
|
@@ -1085,3 +1157,28 @@ where
|
|
1085
1157
|
.map(|s| PlSmallStr::from_str(s.as_ref()))
|
1086
1158
|
.collect()
|
1087
1159
|
}
|
1160
|
+
|
1161
|
+
#[derive(Debug, Copy, Clone)]
|
1162
|
+
pub struct RbCompatLevel(pub CompatLevel);
|
1163
|
+
|
1164
|
+
impl TryConvert for RbCompatLevel {
|
1165
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
1166
|
+
Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
|
1167
|
+
if let Ok(compat_level) = CompatLevel::with_level(level) {
|
1168
|
+
compat_level
|
1169
|
+
} else {
|
1170
|
+
return Err(RbValueError::new_err("invalid compat level".to_string()));
|
1171
|
+
}
|
1172
|
+
} else if let Ok(future) = bool::try_convert(ob) {
|
1173
|
+
if future {
|
1174
|
+
CompatLevel::newest()
|
1175
|
+
} else {
|
1176
|
+
CompatLevel::oldest()
|
1177
|
+
}
|
1178
|
+
} else {
|
1179
|
+
return Err(RbTypeError::new_err(
|
1180
|
+
"'compat_level' argument accepts int or bool".to_string(),
|
1181
|
+
));
|
1182
|
+
}))
|
1183
|
+
}
|
1184
|
+
}
|
@@ -54,9 +54,6 @@ fn finish_from_rows(
|
|
54
54
|
schema_overrides: Option<Schema>,
|
55
55
|
infer_schema_length: Option<usize>,
|
56
56
|
) -> RbResult<RbDataFrame> {
|
57
|
-
// Object builder must be registered
|
58
|
-
crate::on_startup::register_object_builder();
|
59
|
-
|
60
57
|
let mut schema = if let Some(mut schema) = schema {
|
61
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
62
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
|
|
2
2
|
|
3
3
|
use super::*;
|
4
4
|
use crate::conversion::{ObjectValue, Wrap};
|
5
|
+
use crate::interop::arrow::to_ruby::dataframe_to_stream;
|
6
|
+
use crate::RbResult;
|
5
7
|
|
6
8
|
impl RbDataFrame {
|
7
9
|
pub fn row_tuple(&self, idx: i64) -> Value {
|
@@ -18,7 +20,7 @@ impl RbDataFrame {
|
|
18
20
|
.map(|s| match s.dtype() {
|
19
21
|
DataType::Object(_, _) => {
|
20
22
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
21
|
-
obj.unwrap().
|
23
|
+
obj.unwrap().to_value()
|
22
24
|
}
|
23
25
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
24
26
|
}),
|
@@ -37,7 +39,7 @@ impl RbDataFrame {
|
|
37
39
|
.map(|s| match s.dtype() {
|
38
40
|
DataType::Object(_, _) => {
|
39
41
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
40
|
-
obj.unwrap().
|
42
|
+
obj.unwrap().to_value()
|
41
43
|
}
|
42
44
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
43
45
|
}),
|
@@ -45,4 +47,9 @@ impl RbDataFrame {
|
|
45
47
|
}))
|
46
48
|
.as_value()
|
47
49
|
}
|
50
|
+
|
51
|
+
pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
|
52
|
+
self.df.borrow_mut().align_chunks();
|
53
|
+
dataframe_to_stream(&self.df.borrow())
|
54
|
+
}
|
48
55
|
}
|
@@ -10,14 +10,14 @@ use crate::map::dataframe::{
|
|
10
10
|
apply_lambda_with_utf8_out_type,
|
11
11
|
};
|
12
12
|
use crate::prelude::strings_to_pl_smallstr;
|
13
|
-
use crate::series::{
|
13
|
+
use crate::series::{to_rbseries, to_series};
|
14
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
15
15
|
|
16
16
|
impl RbDataFrame {
|
17
17
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
18
18
|
let mut cols = Vec::new();
|
19
19
|
for i in columns.into_iter() {
|
20
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
|
20
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
|
21
21
|
}
|
22
22
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
23
23
|
Ok(RbDataFrame::new(df))
|
@@ -128,7 +128,7 @@ impl RbDataFrame {
|
|
128
128
|
|
129
129
|
pub fn get_columns(&self) -> RArray {
|
130
130
|
let cols = self.df.borrow().get_columns().to_vec();
|
131
|
-
|
131
|
+
to_rbseries(cols)
|
132
132
|
}
|
133
133
|
|
134
134
|
pub fn columns(&self) -> Vec<String> {
|
@@ -174,7 +174,8 @@ impl RbDataFrame {
|
|
174
174
|
}
|
175
175
|
|
176
176
|
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
177
|
-
let columns =
|
177
|
+
let columns = to_series(columns)?;
|
178
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
178
179
|
let df = self
|
179
180
|
.df
|
180
181
|
.borrow()
|
@@ -184,7 +185,8 @@ impl RbDataFrame {
|
|
184
185
|
}
|
185
186
|
|
186
187
|
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
187
|
-
let columns =
|
188
|
+
let columns = to_series(columns)?;
|
189
|
+
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
188
190
|
self.df
|
189
191
|
.borrow_mut()
|
190
192
|
.hstack_mut(&columns)
|
@@ -223,6 +225,7 @@ impl RbDataFrame {
|
|
223
225
|
.borrow_mut()
|
224
226
|
.drop_in_place(&name)
|
225
227
|
.map_err(RbPolarsErr::from)?;
|
228
|
+
let s = s.take_materialized_series();
|
226
229
|
Ok(RbSeries::new(s))
|
227
230
|
}
|
228
231
|
|
@@ -230,7 +233,7 @@ impl RbDataFrame {
|
|
230
233
|
self.df
|
231
234
|
.borrow()
|
232
235
|
.select_at_idx(idx)
|
233
|
-
.map(|s| RbSeries::new(s.clone()))
|
236
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
234
237
|
}
|
235
238
|
|
236
239
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
@@ -238,11 +241,13 @@ impl RbDataFrame {
|
|
238
241
|
}
|
239
242
|
|
240
243
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
241
|
-
self
|
244
|
+
let series = self
|
245
|
+
.df
|
242
246
|
.borrow()
|
243
247
|
.column(&name)
|
244
|
-
.map(|s| RbSeries::new(s.clone()))
|
245
|
-
.map_err(RbPolarsErr::from)
|
248
|
+
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
249
|
+
.map_err(RbPolarsErr::from)?;
|
250
|
+
Ok(series)
|
246
251
|
}
|
247
252
|
|
248
253
|
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
@@ -411,7 +416,7 @@ impl RbDataFrame {
|
|
411
416
|
.borrow()
|
412
417
|
.max_horizontal()
|
413
418
|
.map_err(RbPolarsErr::from)?;
|
414
|
-
Ok(s.map(|s| s.into()))
|
419
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
415
420
|
}
|
416
421
|
|
417
422
|
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
@@ -420,7 +425,7 @@ impl RbDataFrame {
|
|
420
425
|
.borrow()
|
421
426
|
.min_horizontal()
|
422
427
|
.map_err(RbPolarsErr::from)?;
|
423
|
-
Ok(s.map(|s| s.into()))
|
428
|
+
Ok(s.map(|s| s.take_materialized_series().into()))
|
424
429
|
}
|
425
430
|
|
426
431
|
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|