polars-df 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +112 -89
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +13 -12
- data/ext/polars/src/conversion/any_value.rs +14 -8
- data/ext/polars/src/conversion/chunked_array.rs +5 -2
- data/ext/polars/src/conversion/mod.rs +27 -19
- data/ext/polars/src/dataframe/construction.rs +1 -14
- data/ext/polars/src/dataframe/general.rs +0 -5
- data/ext/polars/src/expr/datetime.rs +22 -14
- data/ext/polars/src/file.rs +5 -5
- data/ext/polars/src/io/mod.rs +22 -8
- data/ext/polars/src/lazyframe/general.rs +27 -41
- data/ext/polars/src/lazyframe/optflags.rs +0 -1
- data/ext/polars/src/lib.rs +10 -18
- data/ext/polars/src/series/aggregation.rs +8 -1
- data/ext/polars/src/series/construction.rs +1 -0
- data/ext/polars/src/series/export.rs +1 -0
- data/ext/polars/src/series/general.rs +0 -1
- data/lib/polars/data_frame.rb +11 -9
- data/lib/polars/data_types.rb +9 -1
- data/lib/polars/date_time_expr.rb +35 -14
- data/lib/polars/expr.rb +2 -2
- data/lib/polars/iceberg_dataset.rb +113 -0
- data/lib/polars/io/iceberg.rb +8 -1
- data/lib/polars/io/ipc.rb +28 -49
- data/lib/polars/io/scan_options.rb +9 -3
- data/lib/polars/io/utils.rb +17 -0
- data/lib/polars/lazy_frame.rb +5 -2
- data/lib/polars/scan_cast_options.rb +4 -1
- data/lib/polars/selectors.rb +8 -8
- data/lib/polars/series.rb +23 -1
- data/lib/polars/string_expr.rb +1 -1
- data/lib/polars/string_name_space.rb +1 -1
- data/lib/polars/utils/convert.rb +2 -2
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -0
- metadata +3 -1
|
@@ -3,6 +3,7 @@ use magnus::{
|
|
|
3
3
|
IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value, prelude::*, r_hash::ForEach,
|
|
4
4
|
};
|
|
5
5
|
use polars::prelude::*;
|
|
6
|
+
use polars_compute::decimal::{DEC128_MAX_PREC, DecimalFmtBuffer, dec128_fits};
|
|
6
7
|
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
|
|
7
8
|
|
|
8
9
|
use super::datetime::datetime_to_rb_object;
|
|
@@ -30,6 +31,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
|
30
31
|
AnyValue::UInt16(v) => ruby.into_value(v),
|
|
31
32
|
AnyValue::UInt32(v) => ruby.into_value(v),
|
|
32
33
|
AnyValue::UInt64(v) => ruby.into_value(v),
|
|
34
|
+
AnyValue::UInt128(v) => ruby.into_value(v),
|
|
33
35
|
AnyValue::Int8(v) => ruby.into_value(v),
|
|
34
36
|
AnyValue::Int16(v) => ruby.into_value(v),
|
|
35
37
|
AnyValue::Int32(v) => ruby.into_value(v),
|
|
@@ -74,9 +76,11 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
|
74
76
|
}
|
|
75
77
|
AnyValue::Binary(v) => ruby.str_from_slice(v).as_value(),
|
|
76
78
|
AnyValue::BinaryOwned(v) => ruby.str_from_slice(&v).as_value(),
|
|
77
|
-
AnyValue::Decimal(v, scale) =>
|
|
78
|
-
|
|
79
|
-
.
|
|
79
|
+
AnyValue::Decimal(v, prec, scale) => {
|
|
80
|
+
let mut buf = DecimalFmtBuffer::new();
|
|
81
|
+
let s = buf.format_dec128(v, scale, false, false);
|
|
82
|
+
pl_utils().funcall("_to_ruby_decimal", (prec, s)).unwrap()
|
|
83
|
+
}
|
|
80
84
|
}
|
|
81
85
|
}
|
|
82
86
|
|
|
@@ -94,8 +98,12 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
|
94
98
|
fn get_int(ob: Value, strict: bool) -> RbResult<AnyValue<'static>> {
|
|
95
99
|
if let Ok(v) = i64::try_convert(ob) {
|
|
96
100
|
Ok(AnyValue::Int64(v))
|
|
101
|
+
} else if let Ok(v) = i128::try_convert(ob) {
|
|
102
|
+
Ok(AnyValue::Int128(v))
|
|
97
103
|
} else if let Ok(v) = u64::try_convert(ob) {
|
|
98
104
|
Ok(AnyValue::UInt64(v))
|
|
105
|
+
} else if let Ok(v) = u128::try_convert(ob) {
|
|
106
|
+
Ok(AnyValue::UInt128(v))
|
|
99
107
|
} else if !strict {
|
|
100
108
|
let f = f64::try_convert(ob)?;
|
|
101
109
|
Ok(AnyValue::Float64(f))
|
|
@@ -206,14 +214,12 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
|
206
214
|
match digits.parse::<i128>() {
|
|
207
215
|
Ok(mut v) => {
|
|
208
216
|
let scale = if exp > 0 {
|
|
209
|
-
v = 10_i128
|
|
210
|
-
.checked_pow(exp as u32)
|
|
211
|
-
.and_then(|factor| v.checked_mul(factor))?;
|
|
217
|
+
v = 10_i128.checked_pow(exp as u32)?.checked_mul(v)?;
|
|
212
218
|
0
|
|
213
219
|
} else {
|
|
214
220
|
(-exp) as usize
|
|
215
221
|
};
|
|
216
|
-
|
|
222
|
+
dec128_fits(v, DEC128_MAX_PREC).then_some((v, scale))
|
|
217
223
|
}
|
|
218
224
|
Err(_) => None,
|
|
219
225
|
}
|
|
@@ -229,7 +235,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
|
|
|
229
235
|
// TODO better error
|
|
230
236
|
v = v.checked_neg().unwrap();
|
|
231
237
|
}
|
|
232
|
-
Ok(AnyValue::Decimal(v, scale))
|
|
238
|
+
Ok(AnyValue::Decimal(v, DEC128_MAX_PREC, scale))
|
|
233
239
|
}
|
|
234
240
|
|
|
235
241
|
let ruby = Ruby::get_with(ob);
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
use magnus::{IntoValue, RString, Ruby, TryConvert, Value, prelude::*};
|
|
2
2
|
use polars::prelude::*;
|
|
3
|
+
use polars_compute::decimal::DecimalFmtBuffer;
|
|
3
4
|
|
|
4
5
|
use super::{Wrap, get_rbseq, struct_dict};
|
|
5
6
|
|
|
@@ -129,11 +130,13 @@ impl IntoValue for Wrap<&DateChunked> {
|
|
|
129
130
|
impl IntoValue for Wrap<&DecimalChunked> {
|
|
130
131
|
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
131
132
|
let utils = pl_utils();
|
|
132
|
-
let
|
|
133
|
+
let rb_precision = self.0.precision().into_value_with(ruby);
|
|
134
|
+
let mut buf = DecimalFmtBuffer::new();
|
|
133
135
|
let iter = self.0.physical().into_iter().map(|opt_v| {
|
|
134
136
|
opt_v.map(|v| {
|
|
137
|
+
let s = buf.format_dec128(v, self.0.scale(), false, false);
|
|
135
138
|
utils
|
|
136
|
-
.funcall::<_, _, Value>("_to_ruby_decimal", (
|
|
139
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (rb_precision, s))
|
|
137
140
|
.unwrap()
|
|
138
141
|
})
|
|
139
142
|
});
|
|
@@ -25,6 +25,7 @@ use polars::prelude::default_values::{
|
|
|
25
25
|
use polars::prelude::deletion::DeletionFilesList;
|
|
26
26
|
use polars::prelude::*;
|
|
27
27
|
use polars::series::ops::NullBehavior;
|
|
28
|
+
use polars_compute::decimal::dec128_verify_prec_scale;
|
|
28
29
|
use polars_core::schema::iceberg::IcebergSchema;
|
|
29
30
|
use polars_core::utils::arrow::array::Array;
|
|
30
31
|
use polars_core::utils::materialize_dyn_int;
|
|
@@ -174,6 +175,10 @@ impl IntoValue for Wrap<DataType> {
|
|
|
174
175
|
let class = pl.const_get::<_, Value>("UInt64").unwrap();
|
|
175
176
|
class.funcall("new", ()).unwrap()
|
|
176
177
|
}
|
|
178
|
+
DataType::UInt128 => {
|
|
179
|
+
let class = pl.const_get::<_, Value>("UInt128").unwrap();
|
|
180
|
+
class.funcall("new", ()).unwrap()
|
|
181
|
+
}
|
|
177
182
|
DataType::Float32 => {
|
|
178
183
|
let class = pl.const_get::<_, Value>("Float32").unwrap();
|
|
179
184
|
class.funcall("new", ()).unwrap()
|
|
@@ -350,7 +355,11 @@ impl TryConvert for Wrap<DataType> {
|
|
|
350
355
|
"Polars::Time" => DataType::Time,
|
|
351
356
|
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
|
352
357
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
|
353
|
-
"Polars::Decimal" =>
|
|
358
|
+
"Polars::Decimal" => {
|
|
359
|
+
return Err(RbTypeError::new_err(
|
|
360
|
+
"Decimal without precision/scale set is not a valid Polars datatype",
|
|
361
|
+
));
|
|
362
|
+
}
|
|
354
363
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
|
355
364
|
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
|
356
365
|
"Polars::Struct" => DataType::Struct(vec![]),
|
|
@@ -415,7 +424,8 @@ impl TryConvert for Wrap<DataType> {
|
|
|
415
424
|
"Polars::Decimal" => {
|
|
416
425
|
let precision = ob.funcall("precision", ())?;
|
|
417
426
|
let scale = ob.funcall("scale", ())?;
|
|
418
|
-
|
|
427
|
+
dec128_verify_prec_scale(precision, scale).map_err(to_rb_err)?;
|
|
428
|
+
DataType::Decimal(precision, scale)
|
|
419
429
|
}
|
|
420
430
|
"Polars::List" => {
|
|
421
431
|
let inner: Value = ob.funcall("inner", ()).unwrap();
|
|
@@ -882,7 +892,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
|
|
|
882
892
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
883
893
|
"uncompressed" => None,
|
|
884
894
|
"lz4" => Some(IpcCompression::LZ4),
|
|
885
|
-
"zstd" => Some(IpcCompression::ZSTD),
|
|
895
|
+
"zstd" => Some(IpcCompression::ZSTD(Default::default())),
|
|
886
896
|
v => {
|
|
887
897
|
return Err(RbValueError::new_err(format!(
|
|
888
898
|
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {v}"
|
|
@@ -1091,21 +1101,6 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
|
|
|
1091
1101
|
}
|
|
1092
1102
|
}
|
|
1093
1103
|
|
|
1094
|
-
impl TryConvert for Wrap<IpcCompression> {
|
|
1095
|
-
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1096
|
-
let parsed = match String::try_convert(ob)?.as_str() {
|
|
1097
|
-
"lz4" => IpcCompression::LZ4,
|
|
1098
|
-
"zstd" => IpcCompression::ZSTD,
|
|
1099
|
-
v => {
|
|
1100
|
-
return Err(RbValueError::new_err(format!(
|
|
1101
|
-
"compression must be one of {{'lz4', 'zstd'}}, got {v}"
|
|
1102
|
-
)));
|
|
1103
|
-
}
|
|
1104
|
-
};
|
|
1105
|
-
Ok(Wrap(parsed))
|
|
1106
|
-
}
|
|
1107
|
-
}
|
|
1108
|
-
|
|
1109
1104
|
impl TryConvert for Wrap<SearchSortedSide> {
|
|
1110
1105
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1111
1106
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
@@ -1208,7 +1203,8 @@ impl TryConvert for Wrap<QuoteStyle> {
|
|
|
1208
1203
|
}
|
|
1209
1204
|
|
|
1210
1205
|
pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
|
|
1211
|
-
let out = CloudOptions::from_untyped_config(uri, kv)
|
|
1206
|
+
let out = CloudOptions::from_untyped_config(CloudScheme::from_uri(uri).as_ref(), kv)
|
|
1207
|
+
.map_err(RbPolarsErr::from)?;
|
|
1212
1208
|
Ok(out)
|
|
1213
1209
|
}
|
|
1214
1210
|
|
|
@@ -1307,6 +1303,17 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
|
|
|
1307
1303
|
}
|
|
1308
1304
|
};
|
|
1309
1305
|
|
|
1306
|
+
let categorical_to_string =
|
|
1307
|
+
match &*ob.funcall::<_, _, String>("categorical_to_string", ())? {
|
|
1308
|
+
"allow" => true,
|
|
1309
|
+
"forbid" => false,
|
|
1310
|
+
v => {
|
|
1311
|
+
return Err(RbValueError::new_err(format!(
|
|
1312
|
+
"unknown option for categorical_to_string: {v}"
|
|
1313
|
+
)));
|
|
1314
|
+
}
|
|
1315
|
+
};
|
|
1316
|
+
|
|
1310
1317
|
return Ok(Wrap(CastColumnsPolicy {
|
|
1311
1318
|
integer_upcast,
|
|
1312
1319
|
float_upcast,
|
|
@@ -1315,6 +1322,7 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
|
|
|
1315
1322
|
datetime_microseconds_downcast: false,
|
|
1316
1323
|
datetime_convert_timezone,
|
|
1317
1324
|
null_upcast: true,
|
|
1325
|
+
categorical_to_string,
|
|
1318
1326
|
missing_struct_fields,
|
|
1319
1327
|
extra_struct_fields,
|
|
1320
1328
|
}));
|
|
@@ -54,7 +54,7 @@ fn finish_from_rows(
|
|
|
54
54
|
schema_overrides: Option<Schema>,
|
|
55
55
|
infer_schema_length: Option<usize>,
|
|
56
56
|
) -> RbResult<RbDataFrame> {
|
|
57
|
-
let
|
|
57
|
+
let schema = if let Some(mut schema) = schema {
|
|
58
58
|
resolve_schema_overrides(&mut schema, schema_overrides);
|
|
59
59
|
update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
|
|
60
60
|
schema
|
|
@@ -62,11 +62,6 @@ fn finish_from_rows(
|
|
|
62
62
|
rows_to_schema_supertypes(&rows, infer_schema_length).map_err(RbPolarsErr::from)?
|
|
63
63
|
};
|
|
64
64
|
|
|
65
|
-
// TODO: Remove this step when Decimals are supported properly.
|
|
66
|
-
// Erasing the decimal precision/scale here will just require us to infer it again later.
|
|
67
|
-
// https://github.com/pola-rs/polars/issues/14427
|
|
68
|
-
erase_decimal_precision_scale(&mut schema);
|
|
69
|
-
|
|
70
65
|
let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
|
|
71
66
|
Ok(df.into())
|
|
72
67
|
}
|
|
@@ -106,14 +101,6 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
|
|
|
106
101
|
}
|
|
107
102
|
}
|
|
108
103
|
|
|
109
|
-
fn erase_decimal_precision_scale(schema: &mut Schema) {
|
|
110
|
-
for dtype in schema.iter_values_mut() {
|
|
111
|
-
if let DataType::Decimal(_, _) = dtype {
|
|
112
|
-
*dtype = DataType::Decimal(None, None)
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
104
|
fn columns_names_to_empty_schema<'a, I>(column_names: I) -> Schema
|
|
118
105
|
where
|
|
119
106
|
I: IntoIterator<Item = &'a str>,
|
|
@@ -552,11 +552,6 @@ impl RbDataFrame {
|
|
|
552
552
|
s.into_series().into()
|
|
553
553
|
}
|
|
554
554
|
|
|
555
|
-
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
|
556
|
-
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
|
557
|
-
Ok(df.into())
|
|
558
|
-
}
|
|
559
|
-
|
|
560
555
|
pub fn clear(&self) -> Self {
|
|
561
556
|
self.df.borrow().clear().into()
|
|
562
557
|
}
|
|
@@ -218,31 +218,39 @@ impl RbExpr {
|
|
|
218
218
|
self.inner.clone().dt().timestamp(tu.0).into()
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
-
pub fn dt_total_days(&self) -> Self {
|
|
222
|
-
self.inner.clone().dt().total_days().into()
|
|
221
|
+
pub fn dt_total_days(&self, fractional: bool) -> Self {
|
|
222
|
+
self.inner.clone().dt().total_days(fractional).into()
|
|
223
223
|
}
|
|
224
224
|
|
|
225
|
-
pub fn dt_total_hours(&self) -> Self {
|
|
226
|
-
self.inner.clone().dt().total_hours().into()
|
|
225
|
+
pub fn dt_total_hours(&self, fractional: bool) -> Self {
|
|
226
|
+
self.inner.clone().dt().total_hours(fractional).into()
|
|
227
227
|
}
|
|
228
228
|
|
|
229
|
-
pub fn dt_total_minutes(&self) -> Self {
|
|
230
|
-
self.inner.clone().dt().total_minutes().into()
|
|
229
|
+
pub fn dt_total_minutes(&self, fractional: bool) -> Self {
|
|
230
|
+
self.inner.clone().dt().total_minutes(fractional).into()
|
|
231
231
|
}
|
|
232
232
|
|
|
233
|
-
pub fn dt_total_seconds(&self) -> Self {
|
|
234
|
-
self.inner.clone().dt().total_seconds().into()
|
|
233
|
+
pub fn dt_total_seconds(&self, fractional: bool) -> Self {
|
|
234
|
+
self.inner.clone().dt().total_seconds(fractional).into()
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
-
pub fn dt_total_milliseconds(&self) -> Self {
|
|
238
|
-
self.inner
|
|
237
|
+
pub fn dt_total_milliseconds(&self, fractional: bool) -> Self {
|
|
238
|
+
self.inner
|
|
239
|
+
.clone()
|
|
240
|
+
.dt()
|
|
241
|
+
.total_milliseconds(fractional)
|
|
242
|
+
.into()
|
|
239
243
|
}
|
|
240
244
|
|
|
241
|
-
pub fn dt_total_microseconds(&self) -> Self {
|
|
242
|
-
self.inner
|
|
245
|
+
pub fn dt_total_microseconds(&self, fractional: bool) -> Self {
|
|
246
|
+
self.inner
|
|
247
|
+
.clone()
|
|
248
|
+
.dt()
|
|
249
|
+
.total_microseconds(fractional)
|
|
250
|
+
.into()
|
|
243
251
|
}
|
|
244
252
|
|
|
245
|
-
pub fn dt_total_nanoseconds(&self) -> Self {
|
|
246
|
-
self.inner.clone().dt().total_nanoseconds().into()
|
|
253
|
+
pub fn dt_total_nanoseconds(&self, fractional: bool) -> Self {
|
|
254
|
+
self.inner.clone().dt().total_nanoseconds(fractional).into()
|
|
247
255
|
}
|
|
248
256
|
}
|
data/ext/polars/src/file.rs
CHANGED
|
@@ -145,13 +145,13 @@ impl Seek for RbFileLikeObject {
|
|
|
145
145
|
SeekFrom::End(i) => (2, i),
|
|
146
146
|
};
|
|
147
147
|
|
|
148
|
-
let
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
.funcall("seek", (offset, whence))
|
|
148
|
+
let inner = Ruby::get().unwrap().get_inner(self.inner);
|
|
149
|
+
|
|
150
|
+
inner
|
|
151
|
+
.funcall::<_, _, Value>("seek", (offset, whence))
|
|
152
152
|
.map_err(rberr_to_io_err)?;
|
|
153
153
|
|
|
154
|
-
|
|
154
|
+
inner.funcall("tell", ()).map_err(rberr_to_io_err)
|
|
155
155
|
}
|
|
156
156
|
}
|
|
157
157
|
|
data/ext/polars/src/io/mod.rs
CHANGED
|
@@ -5,7 +5,7 @@ use polars::prelude::default_values::DefaultFieldValues;
|
|
|
5
5
|
use polars::prelude::deletion::DeletionFilesList;
|
|
6
6
|
use polars::prelude::{
|
|
7
7
|
CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
|
|
8
|
-
UnifiedScanArgs,
|
|
8
|
+
TableStatistics, UnifiedScanArgs,
|
|
9
9
|
};
|
|
10
10
|
use polars_io::{HiveOptions, RowIndex};
|
|
11
11
|
use polars_utils::IdxSize;
|
|
@@ -24,6 +24,12 @@ impl TryConvert for RbScanOptions {
|
|
|
24
24
|
}
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
impl TryConvert for Wrap<TableStatistics> {
|
|
28
|
+
fn try_convert(_ob: Value) -> RbResult<Self> {
|
|
29
|
+
todo!();
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
27
33
|
impl RbScanOptions {
|
|
28
34
|
pub fn extract_unified_scan_args(
|
|
29
35
|
&self,
|
|
@@ -38,6 +44,10 @@ impl RbScanOptions {
|
|
|
38
44
|
let include_file_paths: Option<Wrap<PlSmallStr>> =
|
|
39
45
|
self.0.funcall("include_file_paths", ())?;
|
|
40
46
|
let glob: bool = self.0.funcall("glob", ())?;
|
|
47
|
+
let hidden_file_prefix: Option<Vec<String>> = self.0.funcall("hidden_file_prefix", ())?;
|
|
48
|
+
let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
|
|
49
|
+
let default_values: Option<Wrap<DefaultFieldValues>> =
|
|
50
|
+
self.0.funcall("default_values", ())?;
|
|
41
51
|
let hive_partitioning: Option<bool> = self.0.funcall("hive_partitioning", ())?;
|
|
42
52
|
let hive_schema: Option<Wrap<Schema>> = self.0.funcall("hive_schema", ())?;
|
|
43
53
|
let try_parse_hive_dates: bool = self.0.funcall("try_parse_hive_dates", ())?;
|
|
@@ -48,9 +58,9 @@ impl RbScanOptions {
|
|
|
48
58
|
let retries: usize = self.0.funcall("retries", ())?;
|
|
49
59
|
let deletion_files: Option<Wrap<DeletionFilesList>> =
|
|
50
60
|
self.0.funcall("deletion_files", ())?;
|
|
51
|
-
let
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
let table_statistics: Option<Wrap<TableStatistics>> =
|
|
62
|
+
self.0.funcall("table_statistics", ())?;
|
|
63
|
+
let row_count: Option<(u64, u64)> = self.0.funcall("row_count", ())?;
|
|
54
64
|
|
|
55
65
|
let cloud_options = storage_options;
|
|
56
66
|
|
|
@@ -89,7 +99,13 @@ impl RbScanOptions {
|
|
|
89
99
|
rechunk,
|
|
90
100
|
cache,
|
|
91
101
|
glob,
|
|
102
|
+
hidden_file_prefix: hidden_file_prefix
|
|
103
|
+
.map(|x| x.into_iter().map(|x| (*x).into()).collect()),
|
|
92
104
|
projection: None,
|
|
105
|
+
column_mapping: column_mapping.map(|x| x.0),
|
|
106
|
+
default_values: default_values
|
|
107
|
+
.map(|x| x.0)
|
|
108
|
+
.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
|
|
93
109
|
row_index,
|
|
94
110
|
pre_slice: pre_slice.map(Slice::from),
|
|
95
111
|
cast_columns_policy: cast_options.0,
|
|
@@ -97,10 +113,8 @@ impl RbScanOptions {
|
|
|
97
113
|
extra_columns_policy: extra_columns.0,
|
|
98
114
|
include_file_paths: include_file_paths.map(|x| x.0),
|
|
99
115
|
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
.map(|x| x.0)
|
|
103
|
-
.filter(|DefaultFieldValues::Iceberg(v)| !v.is_empty()),
|
|
116
|
+
table_statistics: table_statistics.map(|x| x.0),
|
|
117
|
+
row_count,
|
|
104
118
|
};
|
|
105
119
|
|
|
106
120
|
Ok(unified_scan_args)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use magnus::{IntoValue, RArray, RHash, Ruby, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
|
|
2
|
-
use polars::io::
|
|
2
|
+
use polars::io::RowIndex;
|
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
|
4
4
|
use polars::prelude::*;
|
|
5
5
|
use polars_plan::dsl::ScanSources;
|
|
@@ -18,7 +18,10 @@ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResu
|
|
|
18
18
|
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
|
|
19
19
|
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
|
20
20
|
Ok(match get_ruby_scan_source_input(obj, false)? {
|
|
21
|
-
RubyScanSourceInput::Path(path) => (
|
|
21
|
+
RubyScanSourceInput::Path(path) => (
|
|
22
|
+
Some(path.clone()),
|
|
23
|
+
ScanSources::Paths(FromIterator::from_iter([path])),
|
|
24
|
+
),
|
|
22
25
|
RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
|
|
23
26
|
RubyScanSourceInput::Buffer(buff) => (None, ScanSources::Buffers([buff].into())),
|
|
24
27
|
})
|
|
@@ -180,48 +183,28 @@ impl RbLazyFrame {
|
|
|
180
183
|
Ok(lf.into())
|
|
181
184
|
}
|
|
182
185
|
|
|
183
|
-
#[allow(clippy::too_many_arguments)]
|
|
184
186
|
pub fn new_from_ipc(
|
|
185
|
-
source: Option<Value>,
|
|
186
187
|
sources: Wrap<ScanSources>,
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
rechunk: bool,
|
|
190
|
-
row_index: Option<(String, IdxSize)>,
|
|
191
|
-
hive_partitioning: Option<bool>,
|
|
192
|
-
hive_schema: Option<Wrap<Schema>>,
|
|
193
|
-
try_parse_hive_dates: bool,
|
|
194
|
-
include_file_paths: Option<String>,
|
|
188
|
+
scan_options: RbScanOptions,
|
|
189
|
+
file_cache_ttl: Option<u64>,
|
|
195
190
|
) -> RbResult<Self> {
|
|
196
|
-
let
|
|
197
|
-
name: name.into(),
|
|
198
|
-
offset,
|
|
199
|
-
});
|
|
191
|
+
let options = IpcScanOptions;
|
|
200
192
|
|
|
201
|
-
let
|
|
202
|
-
|
|
203
|
-
hive_start_idx: 0,
|
|
204
|
-
schema: hive_schema.map(|x| Arc::new(x.0)),
|
|
205
|
-
try_parse_dates: try_parse_hive_dates,
|
|
206
|
-
};
|
|
193
|
+
let sources = sources.0;
|
|
194
|
+
let first_path = sources.first_path().map(|p| p.into_owned());
|
|
207
195
|
|
|
208
|
-
let
|
|
209
|
-
|
|
210
|
-
cache,
|
|
211
|
-
rechunk,
|
|
212
|
-
row_index,
|
|
213
|
-
cloud_options: None,
|
|
214
|
-
hive_options,
|
|
215
|
-
include_file_paths: include_file_paths.map(|x| x.into()),
|
|
216
|
-
};
|
|
196
|
+
let mut unified_scan_args =
|
|
197
|
+
scan_options.extract_unified_scan_args(first_path.as_ref().map(|p| p.as_ref()))?;
|
|
217
198
|
|
|
218
|
-
let
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
199
|
+
if let Some(file_cache_ttl) = file_cache_ttl {
|
|
200
|
+
unified_scan_args
|
|
201
|
+
.cloud_options
|
|
202
|
+
.get_or_insert_default()
|
|
203
|
+
.file_cache_ttl = file_cache_ttl;
|
|
204
|
+
}
|
|
223
205
|
|
|
224
|
-
let lf = LazyFrame::scan_ipc_sources(sources,
|
|
206
|
+
let lf = LazyFrame::scan_ipc_sources(sources, options, unified_scan_args)
|
|
207
|
+
.map_err(RbPolarsErr::from)?;
|
|
225
208
|
Ok(lf.into())
|
|
226
209
|
}
|
|
227
210
|
|
|
@@ -406,13 +389,13 @@ impl RbLazyFrame {
|
|
|
406
389
|
pub fn sink_ipc(
|
|
407
390
|
&self,
|
|
408
391
|
target: SinkTarget,
|
|
409
|
-
compression: Option<
|
|
392
|
+
compression: Wrap<Option<IpcCompression>>,
|
|
410
393
|
cloud_options: Option<Vec<(String, String)>>,
|
|
411
394
|
retries: usize,
|
|
412
395
|
sink_options: Wrap<SinkOptions>,
|
|
413
396
|
) -> RbResult<RbLazyFrame> {
|
|
414
397
|
let options = IpcWriterOptions {
|
|
415
|
-
compression: compression.
|
|
398
|
+
compression: compression.0,
|
|
416
399
|
..Default::default()
|
|
417
400
|
};
|
|
418
401
|
|
|
@@ -949,11 +932,14 @@ impl RbLazyFrame {
|
|
|
949
932
|
Ok(schema_dict)
|
|
950
933
|
}
|
|
951
934
|
|
|
952
|
-
pub fn unnest(&self, columns: &RbSelector) -> Self {
|
|
935
|
+
pub fn unnest(&self, columns: &RbSelector, separator: Option<String>) -> Self {
|
|
953
936
|
self.ldf
|
|
954
937
|
.borrow()
|
|
955
938
|
.clone()
|
|
956
|
-
.unnest(
|
|
939
|
+
.unnest(
|
|
940
|
+
columns.inner.clone(),
|
|
941
|
+
separator.as_deref().map(PlSmallStr::from_str),
|
|
942
|
+
)
|
|
957
943
|
.into()
|
|
958
944
|
}
|
|
959
945
|
|
|
@@ -49,7 +49,6 @@ flag_getter_setters! {
|
|
|
49
49
|
(SLICE_PUSHDOWN, get_slice_pushdown, set_slice_pushdown, clear=true)
|
|
50
50
|
(COMM_SUBPLAN_ELIM, get_comm_subplan_elim, set_comm_subplan_elim, clear=true)
|
|
51
51
|
(COMM_SUBEXPR_ELIM, get_comm_subexpr_elim, set_comm_subexpr_elim, clear=true)
|
|
52
|
-
(COLLAPSE_JOINS, get_collapse_joins, set_collapse_joins, clear=true)
|
|
53
52
|
(CHECK_ORDER_OBSERVE, get_check_order_observe, set_check_order_observe, clear=true)
|
|
54
53
|
(FAST_PROJECTION, get_fast_projection, set_fast_projection, clear=true)
|
|
55
54
|
|
data/ext/polars/src/lib.rs
CHANGED
|
@@ -156,7 +156,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
156
156
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
|
157
157
|
class.define_method("upsample", method!(RbDataFrame::upsample, 4))?;
|
|
158
158
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
|
159
|
-
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
|
160
159
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
|
161
160
|
class.define_method(
|
|
162
161
|
"serialize_binary",
|
|
@@ -432,21 +431,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
432
431
|
class.define_method("dt_millisecond", method!(RbExpr::dt_millisecond, 0))?;
|
|
433
432
|
class.define_method("dt_microsecond", method!(RbExpr::dt_microsecond, 0))?;
|
|
434
433
|
class.define_method("dt_nanosecond", method!(RbExpr::dt_nanosecond, 0))?;
|
|
435
|
-
class.define_method("dt_total_days", method!(RbExpr::dt_total_days,
|
|
436
|
-
class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours,
|
|
437
|
-
class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes,
|
|
438
|
-
class.define_method("dt_total_seconds", method!(RbExpr::dt_total_seconds,
|
|
434
|
+
class.define_method("dt_total_days", method!(RbExpr::dt_total_days, 1))?;
|
|
435
|
+
class.define_method("dt_total_hours", method!(RbExpr::dt_total_hours, 1))?;
|
|
436
|
+
class.define_method("dt_total_minutes", method!(RbExpr::dt_total_minutes, 1))?;
|
|
437
|
+
class.define_method("dt_total_seconds", method!(RbExpr::dt_total_seconds, 1))?;
|
|
439
438
|
class.define_method(
|
|
440
439
|
"dt_total_nanoseconds",
|
|
441
|
-
method!(RbExpr::dt_total_nanoseconds,
|
|
440
|
+
method!(RbExpr::dt_total_nanoseconds, 1),
|
|
442
441
|
)?;
|
|
443
442
|
class.define_method(
|
|
444
443
|
"dt_total_microseconds",
|
|
445
|
-
method!(RbExpr::dt_total_microseconds,
|
|
444
|
+
method!(RbExpr::dt_total_microseconds, 1),
|
|
446
445
|
)?;
|
|
447
446
|
class.define_method(
|
|
448
447
|
"dt_total_milliseconds",
|
|
449
|
-
method!(RbExpr::dt_total_milliseconds,
|
|
448
|
+
method!(RbExpr::dt_total_milliseconds, 1),
|
|
450
449
|
)?;
|
|
451
450
|
class.define_method("dt_timestamp", method!(RbExpr::dt_timestamp, 1))?;
|
|
452
451
|
class.define_method("dt_to_string", method!(RbExpr::dt_to_string, 1))?;
|
|
@@ -845,7 +844,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
845
844
|
"new_from_parquet",
|
|
846
845
|
function!(RbLazyFrame::new_from_parquet, 6),
|
|
847
846
|
)?;
|
|
848
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
|
847
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 3))?;
|
|
849
848
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
|
850
849
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
|
851
850
|
class.define_method(
|
|
@@ -912,7 +911,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
912
911
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
|
913
912
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
|
914
913
|
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
|
915
|
-
class.define_method("unnest", method!(RbLazyFrame::unnest,
|
|
914
|
+
class.define_method("unnest", method!(RbLazyFrame::unnest, 2))?;
|
|
916
915
|
class.define_method("count", method!(RbLazyFrame::count, 0))?;
|
|
917
916
|
class.define_method("merge_sorted", method!(RbLazyFrame::merge_sorted, 2))?;
|
|
918
917
|
|
|
@@ -927,6 +926,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
927
926
|
class.define_singleton_method("new_opt_u16", function!(RbSeries::new_opt_u16, 3))?;
|
|
928
927
|
class.define_singleton_method("new_opt_u32", function!(RbSeries::new_opt_u32, 3))?;
|
|
929
928
|
class.define_singleton_method("new_opt_u64", function!(RbSeries::new_opt_u64, 3))?;
|
|
929
|
+
class.define_singleton_method("new_opt_u128", function!(RbSeries::new_opt_u128, 3))?;
|
|
930
930
|
class.define_singleton_method("new_opt_i8", function!(RbSeries::new_opt_i8, 3))?;
|
|
931
931
|
class.define_singleton_method("new_opt_i16", function!(RbSeries::new_opt_i16, 3))?;
|
|
932
932
|
class.define_singleton_method("new_opt_i32", function!(RbSeries::new_opt_i32, 3))?;
|
|
@@ -1402,14 +1402,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
|
1402
1402
|
"set_comm_subexpr_elim",
|
|
1403
1403
|
method!(RbOptFlags::set_comm_subexpr_elim, 1),
|
|
1404
1404
|
)?;
|
|
1405
|
-
class.define_method(
|
|
1406
|
-
"get_collapse_joins",
|
|
1407
|
-
method!(RbOptFlags::get_collapse_joins, 0),
|
|
1408
|
-
)?;
|
|
1409
|
-
class.define_method(
|
|
1410
|
-
"set_collapse_joins",
|
|
1411
|
-
method!(RbOptFlags::set_collapse_joins, 1),
|
|
1412
|
-
)?;
|
|
1413
1405
|
class.define_method(
|
|
1414
1406
|
"get_check_order_observe",
|
|
1415
1407
|
method!(RbOptFlags::get_check_order_observe, 0),
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
use crate::error::RbPolarsErr;
|
|
2
2
|
use crate::prelude::*;
|
|
3
|
+
use crate::utils::to_rb_err;
|
|
3
4
|
use crate::{RbResult, RbSeries};
|
|
4
5
|
use magnus::{IntoValue, Ruby, Value};
|
|
5
6
|
|
|
@@ -58,12 +59,18 @@ impl RbSeries {
|
|
|
58
59
|
.cast(&DataType::UInt8)
|
|
59
60
|
.unwrap()
|
|
60
61
|
.mean_reduce()
|
|
62
|
+
.map_err(to_rb_err)?
|
|
61
63
|
.as_any_value(),
|
|
62
64
|
)
|
|
63
65
|
.into_value_with(ruby)),
|
|
64
66
|
// For non-numeric output types we require mean_reduce.
|
|
65
67
|
dt if dt.is_temporal() => Ok(Wrap(
|
|
66
|
-
rb_self
|
|
68
|
+
rb_self
|
|
69
|
+
.series
|
|
70
|
+
.borrow()
|
|
71
|
+
.mean_reduce()
|
|
72
|
+
.map_err(to_rb_err)?
|
|
73
|
+
.as_any_value(),
|
|
67
74
|
)
|
|
68
75
|
.into_value_with(ruby)),
|
|
69
76
|
_ => Ok(rb_self.series.borrow().mean().into_value_with(ruby)),
|
|
@@ -75,6 +75,7 @@ init_method_opt!(new_opt_u8, UInt8Type, u8);
|
|
|
75
75
|
init_method_opt!(new_opt_u16, UInt16Type, u16);
|
|
76
76
|
init_method_opt!(new_opt_u32, UInt32Type, u32);
|
|
77
77
|
init_method_opt!(new_opt_u64, UInt64Type, u64);
|
|
78
|
+
init_method_opt!(new_opt_u128, UInt128Type, u128);
|
|
78
79
|
init_method_opt!(new_opt_i8, Int8Type, i8);
|
|
79
80
|
init_method_opt!(new_opt_i16, Int16Type, i16);
|
|
80
81
|
init_method_opt!(new_opt_i32, Int32Type, i32);
|
|
@@ -18,6 +18,7 @@ impl RbSeries {
|
|
|
18
18
|
DataType::UInt16 => ruby.ary_from_iter(series.u16().unwrap()).as_value(),
|
|
19
19
|
DataType::UInt32 => ruby.ary_from_iter(series.u32().unwrap()).as_value(),
|
|
20
20
|
DataType::UInt64 => ruby.ary_from_iter(series.u64().unwrap()).as_value(),
|
|
21
|
+
DataType::UInt128 => ruby.ary_from_iter(series.u128().unwrap()).as_value(),
|
|
21
22
|
DataType::Int8 => ruby.ary_from_iter(series.i8().unwrap()).as_value(),
|
|
22
23
|
DataType::Int16 => ruby.ary_from_iter(series.i16().unwrap()).as_value(),
|
|
23
24
|
DataType::Int32 => ruby.ary_from_iter(series.i32().unwrap()).as_value(),
|