polars-df 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/Cargo.lock +270 -97
- data/LICENSE.txt +1 -1
- data/README.md +1 -3
- data/ext/polars/Cargo.toml +19 -18
- data/ext/polars/src/catalog/unity.rs +15 -20
- data/ext/polars/src/conversion/any_value.rs +53 -29
- data/ext/polars/src/conversion/chunked_array.rs +58 -56
- data/ext/polars/src/conversion/datetime.rs +58 -7
- data/ext/polars/src/conversion/mod.rs +200 -150
- data/ext/polars/src/dataframe/export.rs +15 -12
- data/ext/polars/src/dataframe/general.rs +25 -7
- data/ext/polars/src/dataframe/map.rs +6 -4
- data/ext/polars/src/error.rs +1 -1
- data/ext/polars/src/expr/array.rs +0 -24
- data/ext/polars/src/expr/datatype.rs +13 -3
- data/ext/polars/src/expr/datetime.rs +4 -4
- data/ext/polars/src/expr/general.rs +35 -15
- data/ext/polars/src/expr/list.rs +0 -26
- data/ext/polars/src/expr/rolling.rs +24 -0
- data/ext/polars/src/functions/business.rs +2 -2
- data/ext/polars/src/functions/io.rs +4 -3
- data/ext/polars/src/functions/lazy.rs +65 -46
- data/ext/polars/src/functions/meta.rs +6 -5
- data/ext/polars/src/functions/mod.rs +0 -1
- data/ext/polars/src/functions/range.rs +13 -0
- data/ext/polars/src/functions/utils.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +4 -2
- data/ext/polars/src/interop/arrow/to_rb.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
- data/ext/polars/src/io/scan_options.rs +6 -3
- data/ext/polars/src/io/sink_options.rs +2 -0
- data/ext/polars/src/lazyframe/general.rs +243 -17
- data/ext/polars/src/lazyframe/optflags.rs +2 -1
- data/ext/polars/src/lib.rs +39 -35
- data/ext/polars/src/map/lazy.rs +5 -2
- data/ext/polars/src/map/series.rs +19 -18
- data/ext/polars/src/on_startup.rs +25 -6
- data/ext/polars/src/ruby/numo.rs +3 -4
- data/ext/polars/src/ruby/plan_callback.rs +1 -4
- data/ext/polars/src/ruby/rb_modules.rs +2 -4
- data/ext/polars/src/ruby/ruby_udf.rs +7 -9
- data/ext/polars/src/ruby/utils.rs +12 -1
- data/ext/polars/src/series/aggregation.rs +13 -1
- data/ext/polars/src/series/construction.rs +31 -50
- data/ext/polars/src/series/export.rs +33 -38
- data/ext/polars/src/series/general.rs +6 -6
- data/ext/polars/src/series/map.rs +3 -2
- data/ext/polars/src/series/scatter.rs +4 -4
- data/ext/polars/src/utils.rs +31 -7
- data/lib/polars/array_expr.rb +23 -7
- data/lib/polars/array_name_space.rb +16 -2
- data/lib/polars/binary_name_space.rb +32 -0
- data/lib/polars/collect_batches.rb +4 -0
- data/lib/polars/data_frame.rb +144 -11
- data/lib/polars/data_type_group.rb +5 -0
- data/lib/polars/date_time_expr.rb +91 -3
- data/lib/polars/date_time_name_space.rb +7 -1
- data/lib/polars/expr.rb +247 -44
- data/lib/polars/functions/business.rb +2 -2
- data/lib/polars/functions/datatype.rb +30 -0
- data/lib/polars/functions/eager.rb +80 -7
- data/lib/polars/functions/lazy.rb +97 -2
- data/lib/polars/functions/range/linear_space.rb +118 -0
- data/lib/polars/io/csv.rb +27 -5
- data/lib/polars/io/database.rb +2 -3
- data/lib/polars/io/ipc.rb +2 -2
- data/lib/polars/io/lines.rb +172 -0
- data/lib/polars/io/parquet.rb +1 -1
- data/lib/polars/io/sink_options.rb +5 -2
- data/lib/polars/lazy_frame.rb +517 -14
- data/lib/polars/list_expr.rb +21 -7
- data/lib/polars/list_name_space.rb +16 -2
- data/lib/polars/query_opt_flags.rb +23 -5
- data/lib/polars/selectors.rb +2 -2
- data/lib/polars/series.rb +176 -19
- data/lib/polars/sql_context.rb +2 -2
- data/lib/polars/string_cache.rb +19 -72
- data/lib/polars/string_expr.rb +1 -7
- data/lib/polars/string_name_space.rb +1 -7
- data/lib/polars/utils/construction/series.rb +24 -39
- data/lib/polars/utils/convert.rb +16 -6
- data/lib/polars/utils/parse.rb +7 -0
- data/lib/polars/utils/reduce_balanced.rb +43 -0
- data/lib/polars/utils/various.rb +5 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +4 -17
- data/ext/polars/src/functions/string_cache.rs +0 -24
|
@@ -16,6 +16,7 @@ use magnus::{
|
|
|
16
16
|
use polars::chunked_array::object::PolarsObjectSafe;
|
|
17
17
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
|
18
18
|
use polars::datatypes::AnyValue;
|
|
19
|
+
use polars::frame::PivotColumnNaming;
|
|
19
20
|
use polars::frame::row::Row;
|
|
20
21
|
use polars::io::avro::AvroCompression;
|
|
21
22
|
use polars::prelude::default_values::{
|
|
@@ -36,20 +37,12 @@ use polars_utils::total_ord::{TotalEq, TotalHash};
|
|
|
36
37
|
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
|
37
38
|
use crate::object::OBJECT_NAME;
|
|
38
39
|
use crate::rb_modules::pl_series;
|
|
40
|
+
use crate::ruby::gvl::GvlExt;
|
|
41
|
+
use crate::ruby::utils::TryIntoValue;
|
|
39
42
|
use crate::utils::to_rb_err;
|
|
40
|
-
use crate::{
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
// Safety:
|
|
44
|
-
// Wrap is transparent.
|
|
45
|
-
unsafe { std::mem::transmute(slice) }
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
|
|
49
|
-
// Safety:
|
|
50
|
-
// Wrap is transparent.
|
|
51
|
-
unsafe { std::mem::transmute(buf) }
|
|
52
|
-
}
|
|
43
|
+
use crate::{
|
|
44
|
+
RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError,
|
|
45
|
+
};
|
|
53
46
|
|
|
54
47
|
#[repr(transparent)]
|
|
55
48
|
pub struct Wrap<T>(pub T);
|
|
@@ -90,11 +83,9 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
|
|
|
90
83
|
Ok(rbs.series.read().clone())
|
|
91
84
|
}
|
|
92
85
|
|
|
93
|
-
pub(crate) fn to_series(rb: &Ruby, s: RbSeries) -> Value {
|
|
86
|
+
pub(crate) fn to_series(rb: &Ruby, s: RbSeries) -> RbResult<Value> {
|
|
94
87
|
let series = pl_series(rb);
|
|
95
|
-
series
|
|
96
|
-
.funcall::<_, _, Value>("_from_rbseries", (s,))
|
|
97
|
-
.unwrap()
|
|
88
|
+
series.funcall::<_, _, Value>("_from_rbseries", (s,))
|
|
98
89
|
}
|
|
99
90
|
|
|
100
91
|
impl TryConvert for Wrap<PlSmallStr> {
|
|
@@ -126,140 +117,135 @@ impl TryConvert for Wrap<NullValues> {
|
|
|
126
117
|
}
|
|
127
118
|
}
|
|
128
119
|
|
|
129
|
-
fn struct_dict<'a>(
|
|
120
|
+
fn struct_dict<'a>(
|
|
121
|
+
ruby: &Ruby,
|
|
122
|
+
vals: impl Iterator<Item = AnyValue<'a>>,
|
|
123
|
+
flds: &[Field],
|
|
124
|
+
) -> RbResult<Value> {
|
|
130
125
|
let dict = ruby.hash_new();
|
|
131
126
|
for (fld, val) in flds.iter().zip(vals) {
|
|
132
|
-
dict.aset(fld.name().as_str(), Wrap(val)
|
|
127
|
+
dict.aset(fld.name().as_str(), Wrap(val).try_into_value_with(ruby)?)?;
|
|
133
128
|
}
|
|
134
|
-
dict.as_value()
|
|
129
|
+
Ok(dict.as_value())
|
|
135
130
|
}
|
|
136
131
|
|
|
137
|
-
impl
|
|
138
|
-
fn
|
|
132
|
+
impl TryIntoValue for Wrap<Series> {
|
|
133
|
+
fn try_into_value_with(self, ruby: &Ruby) -> RbResult<Value> {
|
|
139
134
|
to_series(ruby, RbSeries::new(self.0))
|
|
140
135
|
}
|
|
141
136
|
}
|
|
142
137
|
|
|
143
|
-
impl
|
|
144
|
-
fn
|
|
138
|
+
impl TryIntoValue for Wrap<DataType> {
|
|
139
|
+
fn try_into_value_with(self, ruby: &Ruby) -> RbResult<Value> {
|
|
145
140
|
let pl = crate::rb_modules::polars(ruby);
|
|
146
141
|
|
|
147
142
|
match self.0 {
|
|
148
143
|
DataType::Int8 => {
|
|
149
|
-
let class = pl.const_get::<_, Value>("Int8")
|
|
150
|
-
class.funcall("new", ())
|
|
144
|
+
let class = pl.const_get::<_, Value>("Int8")?;
|
|
145
|
+
class.funcall("new", ())
|
|
151
146
|
}
|
|
152
147
|
DataType::Int16 => {
|
|
153
|
-
let class = pl.const_get::<_, Value>("Int16")
|
|
154
|
-
class.funcall("new", ())
|
|
148
|
+
let class = pl.const_get::<_, Value>("Int16")?;
|
|
149
|
+
class.funcall("new", ())
|
|
155
150
|
}
|
|
156
151
|
DataType::Int32 => {
|
|
157
|
-
let class = pl.const_get::<_, Value>("Int32")
|
|
158
|
-
class.funcall("new", ())
|
|
152
|
+
let class = pl.const_get::<_, Value>("Int32")?;
|
|
153
|
+
class.funcall("new", ())
|
|
159
154
|
}
|
|
160
155
|
DataType::Int64 => {
|
|
161
|
-
let class = pl.const_get::<_, Value>("Int64")
|
|
162
|
-
class.funcall("new", ())
|
|
156
|
+
let class = pl.const_get::<_, Value>("Int64")?;
|
|
157
|
+
class.funcall("new", ())
|
|
163
158
|
}
|
|
164
159
|
DataType::Int128 => {
|
|
165
|
-
let class = pl.const_get::<_, Value>("Int128")
|
|
166
|
-
class.funcall("new", ())
|
|
160
|
+
let class = pl.const_get::<_, Value>("Int128")?;
|
|
161
|
+
class.funcall("new", ())
|
|
167
162
|
}
|
|
168
163
|
DataType::UInt8 => {
|
|
169
|
-
let class = pl.const_get::<_, Value>("UInt8")
|
|
170
|
-
class.funcall("new", ())
|
|
164
|
+
let class = pl.const_get::<_, Value>("UInt8")?;
|
|
165
|
+
class.funcall("new", ())
|
|
171
166
|
}
|
|
172
167
|
DataType::UInt16 => {
|
|
173
|
-
let class = pl.const_get::<_, Value>("UInt16")
|
|
174
|
-
class.funcall("new", ())
|
|
168
|
+
let class = pl.const_get::<_, Value>("UInt16")?;
|
|
169
|
+
class.funcall("new", ())
|
|
175
170
|
}
|
|
176
171
|
DataType::UInt32 => {
|
|
177
|
-
let class = pl.const_get::<_, Value>("UInt32")
|
|
178
|
-
class.funcall("new", ())
|
|
172
|
+
let class = pl.const_get::<_, Value>("UInt32")?;
|
|
173
|
+
class.funcall("new", ())
|
|
179
174
|
}
|
|
180
175
|
DataType::UInt64 => {
|
|
181
|
-
let class = pl.const_get::<_, Value>("UInt64")
|
|
182
|
-
class.funcall("new", ())
|
|
176
|
+
let class = pl.const_get::<_, Value>("UInt64")?;
|
|
177
|
+
class.funcall("new", ())
|
|
183
178
|
}
|
|
184
179
|
DataType::UInt128 => {
|
|
185
|
-
let class = pl.const_get::<_, Value>("UInt128")
|
|
186
|
-
class.funcall("new", ())
|
|
180
|
+
let class = pl.const_get::<_, Value>("UInt128")?;
|
|
181
|
+
class.funcall("new", ())
|
|
187
182
|
}
|
|
188
183
|
DataType::Float16 => {
|
|
189
|
-
let class = pl.const_get::<_, Value>("Float16")
|
|
190
|
-
class.funcall("new", ())
|
|
184
|
+
let class = pl.const_get::<_, Value>("Float16")?;
|
|
185
|
+
class.funcall("new", ())
|
|
191
186
|
}
|
|
192
187
|
DataType::Float32 => {
|
|
193
|
-
let class = pl.const_get::<_, Value>("Float32")
|
|
194
|
-
class.funcall("new", ())
|
|
188
|
+
let class = pl.const_get::<_, Value>("Float32")?;
|
|
189
|
+
class.funcall("new", ())
|
|
195
190
|
}
|
|
196
191
|
DataType::Float64 | DataType::Unknown(UnknownKind::Float) => {
|
|
197
|
-
let class = pl.const_get::<_, Value>("Float64")
|
|
198
|
-
class.funcall("new", ())
|
|
192
|
+
let class = pl.const_get::<_, Value>("Float64")?;
|
|
193
|
+
class.funcall("new", ())
|
|
199
194
|
}
|
|
200
195
|
DataType::Decimal(precision, scale) => {
|
|
201
|
-
let class = pl.const_get::<_, Value>("Decimal")
|
|
202
|
-
class
|
|
203
|
-
.funcall::<_, _, Value>("new", (precision, scale))
|
|
204
|
-
.unwrap()
|
|
196
|
+
let class = pl.const_get::<_, Value>("Decimal")?;
|
|
197
|
+
class.funcall::<_, _, Value>("new", (precision, scale))
|
|
205
198
|
}
|
|
206
199
|
DataType::Boolean => {
|
|
207
|
-
let class = pl.const_get::<_, Value>("Boolean")
|
|
208
|
-
class.funcall("new", ())
|
|
200
|
+
let class = pl.const_get::<_, Value>("Boolean")?;
|
|
201
|
+
class.funcall("new", ())
|
|
209
202
|
}
|
|
210
203
|
DataType::String | DataType::Unknown(UnknownKind::Str) => {
|
|
211
|
-
let class = pl.const_get::<_, Value>("String")
|
|
212
|
-
class.funcall("new", ())
|
|
204
|
+
let class = pl.const_get::<_, Value>("String")?;
|
|
205
|
+
class.funcall("new", ())
|
|
213
206
|
}
|
|
214
207
|
DataType::Binary => {
|
|
215
|
-
let class = pl.const_get::<_, Value>("Binary")
|
|
216
|
-
class.funcall("new", ())
|
|
208
|
+
let class = pl.const_get::<_, Value>("Binary")?;
|
|
209
|
+
class.funcall("new", ())
|
|
217
210
|
}
|
|
218
211
|
DataType::Array(inner, size) => {
|
|
219
|
-
let class = pl.const_get::<_, Value>("Array")
|
|
220
|
-
let inner = Wrap(*inner)
|
|
212
|
+
let class = pl.const_get::<_, Value>("Array")?;
|
|
213
|
+
let inner = Wrap(*inner).try_into_value_with(ruby)?;
|
|
221
214
|
let args = (inner, size);
|
|
222
|
-
class.funcall::<_, _, Value>("new", args)
|
|
215
|
+
class.funcall::<_, _, Value>("new", args)
|
|
223
216
|
}
|
|
224
217
|
DataType::List(inner) => {
|
|
225
|
-
let class = pl.const_get::<_, Value>("List")
|
|
226
|
-
let inner = Wrap(*inner)
|
|
227
|
-
class.funcall::<_, _, Value>("new", (inner,))
|
|
218
|
+
let class = pl.const_get::<_, Value>("List")?;
|
|
219
|
+
let inner = Wrap(*inner).try_into_value_with(ruby)?;
|
|
220
|
+
class.funcall::<_, _, Value>("new", (inner,))
|
|
228
221
|
}
|
|
229
222
|
DataType::Date => {
|
|
230
|
-
let class = pl.const_get::<_, Value>("Date")
|
|
231
|
-
class.funcall("new", ())
|
|
223
|
+
let class = pl.const_get::<_, Value>("Date")?;
|
|
224
|
+
class.funcall("new", ())
|
|
232
225
|
}
|
|
233
226
|
DataType::Datetime(tu, tz) => {
|
|
234
|
-
let datetime_class = pl.const_get::<_, Value>("Datetime")
|
|
235
|
-
datetime_class
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
)
|
|
240
|
-
.unwrap()
|
|
227
|
+
let datetime_class = pl.const_get::<_, Value>("Datetime")?;
|
|
228
|
+
datetime_class.funcall::<_, _, Value>(
|
|
229
|
+
"new",
|
|
230
|
+
(tu.to_ascii(), tz.as_deref().map(|x| x.as_str())),
|
|
231
|
+
)
|
|
241
232
|
}
|
|
242
233
|
DataType::Duration(tu) => {
|
|
243
|
-
let duration_class = pl.const_get::<_, Value>("Duration")
|
|
244
|
-
duration_class
|
|
245
|
-
.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
|
246
|
-
.unwrap()
|
|
234
|
+
let duration_class = pl.const_get::<_, Value>("Duration")?;
|
|
235
|
+
duration_class.funcall::<_, _, Value>("new", (tu.to_ascii(),))
|
|
247
236
|
}
|
|
248
237
|
DataType::Object(_) => {
|
|
249
|
-
let class = pl.const_get::<_, Value>("Object")
|
|
250
|
-
class.funcall("new", ())
|
|
238
|
+
let class = pl.const_get::<_, Value>("Object")?;
|
|
239
|
+
class.funcall("new", ())
|
|
251
240
|
}
|
|
252
241
|
DataType::Categorical(cats, _) => {
|
|
253
|
-
let categories_class = pl.const_get::<_, Value>("Categories")
|
|
254
|
-
let categorical_class = pl.const_get::<_, Value>("Categorical")
|
|
242
|
+
let categories_class = pl.const_get::<_, Value>("Categories")?;
|
|
243
|
+
let categorical_class = pl.const_get::<_, Value>("Categorical")?;
|
|
255
244
|
let categories: Value = categories_class
|
|
256
|
-
.funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))
|
|
257
|
-
.unwrap();
|
|
245
|
+
.funcall("_from_rb_categories", (RbCategories::from(cats.clone()),))?;
|
|
258
246
|
let kwargs = ruby.hash_new();
|
|
259
|
-
kwargs
|
|
260
|
-
|
|
261
|
-
.unwrap();
|
|
262
|
-
categorical_class.funcall("new", (kwargs,)).unwrap()
|
|
247
|
+
kwargs.aset(ruby.to_symbol("categories"), categories)?;
|
|
248
|
+
categorical_class.funcall("new", (kwargs,))
|
|
263
249
|
}
|
|
264
250
|
DataType::Enum(_, mapping) => {
|
|
265
251
|
let categories = unsafe {
|
|
@@ -268,42 +254,38 @@ impl IntoValue for Wrap<DataType> {
|
|
|
268
254
|
vec![mapping.to_arrow(true)],
|
|
269
255
|
)
|
|
270
256
|
};
|
|
271
|
-
let class = pl.const_get::<_, Value>("Enum")
|
|
272
|
-
let series = to_series(ruby, categories.into_series().into())
|
|
273
|
-
class.funcall::<_, _, Value>("new", (series,))
|
|
257
|
+
let class = pl.const_get::<_, Value>("Enum")?;
|
|
258
|
+
let series = to_series(ruby, categories.into_series().into())?;
|
|
259
|
+
class.funcall::<_, _, Value>("new", (series,))
|
|
274
260
|
}
|
|
275
261
|
DataType::Time => {
|
|
276
|
-
let class = pl.const_get::<_, Value>("Time")
|
|
277
|
-
class.funcall("new", ())
|
|
262
|
+
let class = pl.const_get::<_, Value>("Time")?;
|
|
263
|
+
class.funcall("new", ())
|
|
278
264
|
}
|
|
279
265
|
DataType::Struct(fields) => {
|
|
280
|
-
let field_class = pl.const_get::<_, Value>("Field")
|
|
266
|
+
let field_class = pl.const_get::<_, Value>("Field")?;
|
|
281
267
|
let iter = fields.iter().map(|fld| {
|
|
282
268
|
let name = fld.name().as_str();
|
|
283
|
-
let dtype = Wrap(fld.dtype().clone());
|
|
284
|
-
field_class
|
|
285
|
-
.funcall::<_, _, Value>("new", (name, dtype))
|
|
286
|
-
.unwrap()
|
|
269
|
+
let dtype = Wrap(fld.dtype().clone()).try_into_value_with(ruby);
|
|
270
|
+
dtype.and_then(|dt| field_class.funcall::<_, _, Value>("new", (name, dt)))
|
|
287
271
|
});
|
|
288
|
-
let fields = ruby.
|
|
289
|
-
let struct_class = pl.const_get::<_, Value>("Struct")
|
|
290
|
-
struct_class
|
|
291
|
-
.funcall::<_, _, Value>("new", (fields,))
|
|
292
|
-
.unwrap()
|
|
272
|
+
let fields = ruby.ary_try_from_iter(iter)?;
|
|
273
|
+
let struct_class = pl.const_get::<_, Value>("Struct")?;
|
|
274
|
+
struct_class.funcall::<_, _, Value>("new", (fields,))
|
|
293
275
|
}
|
|
294
276
|
DataType::Null => {
|
|
295
|
-
let class = pl.const_get::<_, Value>("Null")
|
|
296
|
-
class.funcall("new", ())
|
|
277
|
+
let class = pl.const_get::<_, Value>("Null")?;
|
|
278
|
+
class.funcall("new", ())
|
|
297
279
|
}
|
|
298
280
|
DataType::Extension(_typ, _storage) => {
|
|
299
281
|
todo!();
|
|
300
282
|
}
|
|
301
283
|
DataType::Unknown(UnknownKind::Int(v)) => {
|
|
302
|
-
Wrap(materialize_dyn_int(v).dtype()).
|
|
284
|
+
Wrap(materialize_dyn_int(v).dtype()).try_into_value_with(ruby)
|
|
303
285
|
}
|
|
304
286
|
DataType::Unknown(_) => {
|
|
305
|
-
let class = pl.const_get::<_, Value>("Unknown")
|
|
306
|
-
class.funcall("new", ())
|
|
287
|
+
let class = pl.const_get::<_, Value>("Unknown")?;
|
|
288
|
+
class.funcall("new", ())
|
|
307
289
|
}
|
|
308
290
|
DataType::BinaryOffset => {
|
|
309
291
|
unimplemented!()
|
|
@@ -409,13 +391,12 @@ impl TryConvert for Wrap<DataType> {
|
|
|
409
391
|
"Polars::String" => DataType::String,
|
|
410
392
|
"Polars::Binary" => DataType::Binary,
|
|
411
393
|
"Polars::Categorical" => {
|
|
412
|
-
let categories: Value = ob.funcall("categories", ())
|
|
413
|
-
let rb_categories: &RbCategories =
|
|
414
|
-
categories.funcall("_categories", ()).unwrap();
|
|
394
|
+
let categories: Value = ob.funcall("categories", ())?;
|
|
395
|
+
let rb_categories: &RbCategories = categories.funcall("_categories", ())?;
|
|
415
396
|
DataType::from_categories(rb_categories.categories().clone())
|
|
416
397
|
}
|
|
417
398
|
"Polars::Enum" => {
|
|
418
|
-
let categories: Value = ob.funcall("categories", ())
|
|
399
|
+
let categories: Value = ob.funcall("categories", ())?;
|
|
419
400
|
let s = get_series(categories)?;
|
|
420
401
|
let ca = s.str().map_err(RbPolarsErr::from)?;
|
|
421
402
|
let categories = ca.downcast_iter().next().unwrap().clone();
|
|
@@ -427,7 +408,7 @@ impl TryConvert for Wrap<DataType> {
|
|
|
427
408
|
"Polars::Date" => DataType::Date,
|
|
428
409
|
"Polars::Time" => DataType::Time,
|
|
429
410
|
"Polars::Datetime" => {
|
|
430
|
-
let time_unit: Value = ob.funcall("time_unit", ())
|
|
411
|
+
let time_unit: Value = ob.funcall("time_unit", ())?;
|
|
431
412
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
|
432
413
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
|
433
414
|
DataType::Datetime(
|
|
@@ -436,7 +417,7 @@ impl TryConvert for Wrap<DataType> {
|
|
|
436
417
|
)
|
|
437
418
|
}
|
|
438
419
|
"Polars::Duration" => {
|
|
439
|
-
let time_unit: Value = ob.funcall("time_unit", ())
|
|
420
|
+
let time_unit: Value = ob.funcall("time_unit", ())?;
|
|
440
421
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
|
441
422
|
DataType::Duration(time_unit)
|
|
442
423
|
}
|
|
@@ -447,13 +428,13 @@ impl TryConvert for Wrap<DataType> {
|
|
|
447
428
|
DataType::Decimal(precision, scale)
|
|
448
429
|
}
|
|
449
430
|
"Polars::List" => {
|
|
450
|
-
let inner: Value = ob.funcall("inner", ())
|
|
431
|
+
let inner: Value = ob.funcall("inner", ())?;
|
|
451
432
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
|
452
433
|
DataType::List(Box::new(inner.0))
|
|
453
434
|
}
|
|
454
435
|
"Polars::Array" => {
|
|
455
|
-
let inner: Value = ob.funcall("inner", ())
|
|
456
|
-
let size: Value = ob.funcall("size", ())
|
|
436
|
+
let inner: Value = ob.funcall("inner", ())?;
|
|
437
|
+
let size: Value = ob.funcall("size", ())?;
|
|
457
438
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
|
458
439
|
let size = usize::try_convert(size)?;
|
|
459
440
|
DataType::Array(Box::new(inner.0), size)
|
|
@@ -648,6 +629,16 @@ impl TryConvert for Wrap<ScanSources> {
|
|
|
648
629
|
}
|
|
649
630
|
}
|
|
650
631
|
|
|
632
|
+
impl TryIntoValue for Wrap<Schema> {
|
|
633
|
+
fn try_into_value_with(self, ruby: &Ruby) -> RbResult<Value> {
|
|
634
|
+
let dict = ruby.hash_new();
|
|
635
|
+
for (k, v) in self.0.iter() {
|
|
636
|
+
dict.aset(k.as_str(), Wrap(v.clone()).try_into_value_with(ruby)?)?;
|
|
637
|
+
}
|
|
638
|
+
Ok(dict.as_value())
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
651
642
|
#[derive(Clone)]
|
|
652
643
|
pub struct ObjectValue {
|
|
653
644
|
pub inner: Opaque<Value>,
|
|
@@ -655,18 +646,17 @@ pub struct ObjectValue {
|
|
|
655
646
|
|
|
656
647
|
impl Debug for ObjectValue {
|
|
657
648
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
658
|
-
f
|
|
659
|
-
.field("inner", &self.to_value())
|
|
660
|
-
.finish()
|
|
649
|
+
write!(f, "{}", self)
|
|
661
650
|
}
|
|
662
651
|
}
|
|
663
652
|
|
|
664
653
|
impl Hash for ObjectValue {
|
|
665
654
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
666
|
-
let h =
|
|
667
|
-
.
|
|
668
|
-
|
|
669
|
-
|
|
655
|
+
let h = Ruby::attach(|rb| {
|
|
656
|
+
rb.get_inner(self.inner)
|
|
657
|
+
.funcall::<_, _, isize>("hash", ())
|
|
658
|
+
.expect("should be hashable")
|
|
659
|
+
});
|
|
670
660
|
state.write_isize(h)
|
|
671
661
|
}
|
|
672
662
|
}
|
|
@@ -675,7 +665,11 @@ impl Eq for ObjectValue {}
|
|
|
675
665
|
|
|
676
666
|
impl PartialEq for ObjectValue {
|
|
677
667
|
fn eq(&self, other: &Self) -> bool {
|
|
678
|
-
|
|
668
|
+
Ruby::attach(|ruby| {
|
|
669
|
+
ruby.get_inner(self.inner)
|
|
670
|
+
.eql(ruby.get_inner(other.inner))
|
|
671
|
+
.unwrap_or(false)
|
|
672
|
+
})
|
|
679
673
|
}
|
|
680
674
|
}
|
|
681
675
|
|
|
@@ -696,7 +690,10 @@ impl TotalHash for ObjectValue {
|
|
|
696
690
|
|
|
697
691
|
impl Display for ObjectValue {
|
|
698
692
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
|
699
|
-
|
|
693
|
+
Ruby::attach(|rb| {
|
|
694
|
+
let v = rb.get_inner(self.inner);
|
|
695
|
+
write!(f, "{}", v)
|
|
696
|
+
})
|
|
700
697
|
}
|
|
701
698
|
}
|
|
702
699
|
|
|
@@ -724,12 +721,6 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
|
|
|
724
721
|
}
|
|
725
722
|
}
|
|
726
723
|
|
|
727
|
-
impl ObjectValue {
|
|
728
|
-
pub fn to_value(&self) -> Value {
|
|
729
|
-
self.clone().into_value_with(&Ruby::get().unwrap())
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
|
|
733
724
|
impl IntoValue for ObjectValue {
|
|
734
725
|
fn into_value_with(self, ruby: &Ruby) -> Value {
|
|
735
726
|
ruby.get_inner(self.inner)
|
|
@@ -738,9 +729,9 @@ impl IntoValue for ObjectValue {
|
|
|
738
729
|
|
|
739
730
|
impl Default for ObjectValue {
|
|
740
731
|
fn default() -> Self {
|
|
741
|
-
ObjectValue {
|
|
742
|
-
inner:
|
|
743
|
-
}
|
|
732
|
+
Ruby::attach(|rb| ObjectValue {
|
|
733
|
+
inner: rb.qnil().as_value().into(),
|
|
734
|
+
})
|
|
744
735
|
}
|
|
745
736
|
}
|
|
746
737
|
|
|
@@ -1125,6 +1116,21 @@ impl TryConvert for Wrap<SearchSortedSide> {
|
|
|
1125
1116
|
}
|
|
1126
1117
|
}
|
|
1127
1118
|
|
|
1119
|
+
impl TryConvert for Wrap<PivotColumnNaming> {
|
|
1120
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1121
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
|
1122
|
+
"auto" => PivotColumnNaming::Auto,
|
|
1123
|
+
"combine" => PivotColumnNaming::Combine,
|
|
1124
|
+
v => {
|
|
1125
|
+
return Err(RbValueError::new_err(format!(
|
|
1126
|
+
"`column_naming` must be one of {{'auto', 'combine'}}, got {v}",
|
|
1127
|
+
)));
|
|
1128
|
+
}
|
|
1129
|
+
};
|
|
1130
|
+
Ok(Wrap(parsed))
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1128
1134
|
impl TryConvert for Wrap<ClosedInterval> {
|
|
1129
1135
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1130
1136
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
@@ -1234,15 +1240,25 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
|
|
|
1234
1240
|
return Ok(out);
|
|
1235
1241
|
}
|
|
1236
1242
|
|
|
1237
|
-
let integer_upcast =
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1243
|
+
let mut integer_upcast = false;
|
|
1244
|
+
let mut integer_to_float_cast = false;
|
|
1245
|
+
|
|
1246
|
+
let integer_cast_object: Value = ob.funcall("integer_cast", ())?;
|
|
1247
|
+
|
|
1248
|
+
parse_multiple_options("integer_cast", integer_cast_object, |v| {
|
|
1249
|
+
match v {
|
|
1250
|
+
"upcast" => integer_upcast = true,
|
|
1251
|
+
"allow-float" => integer_to_float_cast = true,
|
|
1252
|
+
"forbid" => {}
|
|
1253
|
+
v => {
|
|
1254
|
+
return Err(RbValueError::new_err(format!(
|
|
1255
|
+
"unknown option for integer_cast: {v}"
|
|
1256
|
+
)));
|
|
1257
|
+
}
|
|
1244
1258
|
}
|
|
1245
|
-
|
|
1259
|
+
|
|
1260
|
+
Ok(())
|
|
1261
|
+
})?;
|
|
1246
1262
|
|
|
1247
1263
|
let mut float_upcast = false;
|
|
1248
1264
|
let mut float_downcast = false;
|
|
@@ -1318,6 +1334,7 @@ impl TryConvert for Wrap<CastColumnsPolicy> {
|
|
|
1318
1334
|
|
|
1319
1335
|
return Ok(Wrap(CastColumnsPolicy {
|
|
1320
1336
|
integer_upcast,
|
|
1337
|
+
integer_to_float_cast,
|
|
1321
1338
|
float_upcast,
|
|
1322
1339
|
float_downcast,
|
|
1323
1340
|
datetime_nanoseconds_downcast,
|
|
@@ -1364,12 +1381,9 @@ pub fn parse_fill_null_strategy(
|
|
|
1364
1381
|
"zero" => FillNullStrategy::Zero,
|
|
1365
1382
|
"one" => FillNullStrategy::One,
|
|
1366
1383
|
e => {
|
|
1367
|
-
return Err(
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
"strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
|
|
1371
|
-
),
|
|
1372
|
-
));
|
|
1384
|
+
return Err(RbValueError::new_err(format!(
|
|
1385
|
+
"`strategy` must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {e}",
|
|
1386
|
+
)));
|
|
1373
1387
|
}
|
|
1374
1388
|
};
|
|
1375
1389
|
Ok(parsed)
|
|
@@ -1491,6 +1505,21 @@ impl TryConvert for Wrap<Option<TimeZone>> {
|
|
|
1491
1505
|
|
|
1492
1506
|
unsafe impl TryConvertOwned for Wrap<Option<TimeZone>> {}
|
|
1493
1507
|
|
|
1508
|
+
impl TryConvert for Wrap<UpcastOrForbid> {
|
|
1509
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1510
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
|
1511
|
+
"upcast" => UpcastOrForbid::Upcast,
|
|
1512
|
+
"forbid" => UpcastOrForbid::Forbid,
|
|
1513
|
+
v => {
|
|
1514
|
+
return Err(RbValueError::new_err(format!(
|
|
1515
|
+
"cast parameter must be one of {{'upcast', 'forbid'}}, got {v}",
|
|
1516
|
+
)));
|
|
1517
|
+
}
|
|
1518
|
+
};
|
|
1519
|
+
Ok(Wrap(parsed))
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1494
1523
|
impl TryConvert for Wrap<ExtraColumnsPolicy> {
|
|
1495
1524
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1496
1525
|
let parsed = match String::try_convert(ob)?.as_str() {
|
|
@@ -1521,6 +1550,27 @@ impl TryConvert for Wrap<MissingColumnsPolicy> {
|
|
|
1521
1550
|
}
|
|
1522
1551
|
}
|
|
1523
1552
|
|
|
1553
|
+
impl TryConvert for Wrap<MissingColumnsPolicyOrExpr> {
|
|
1554
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1555
|
+
if let Ok(rbexpr) = <&RbExpr>::try_convert(ob) {
|
|
1556
|
+
return Ok(Wrap(MissingColumnsPolicyOrExpr::InsertWith(
|
|
1557
|
+
rbexpr.inner.clone(),
|
|
1558
|
+
)));
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
|
1562
|
+
"insert" => MissingColumnsPolicyOrExpr::Insert,
|
|
1563
|
+
"raise" => MissingColumnsPolicyOrExpr::Raise,
|
|
1564
|
+
v => {
|
|
1565
|
+
return Err(RbValueError::new_err(format!(
|
|
1566
|
+
"missing column/field parameter must be one of {{'insert', 'raise', expression}}, got {v}",
|
|
1567
|
+
)));
|
|
1568
|
+
}
|
|
1569
|
+
};
|
|
1570
|
+
Ok(Wrap(parsed))
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
|
|
1524
1574
|
impl TryConvert for Wrap<ColumnMapping> {
|
|
1525
1575
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
|
1526
1576
|
let (column_mapping_type, ob) = <(String, Value)>::try_convert(ob)?;
|
|
@@ -1,39 +1,42 @@
|
|
|
1
|
-
use magnus::{
|
|
1
|
+
use magnus::{Ruby, Value, prelude::*};
|
|
2
2
|
|
|
3
3
|
use super::*;
|
|
4
4
|
use crate::RbResult;
|
|
5
5
|
use crate::conversion::{ObjectValue, Wrap};
|
|
6
6
|
use crate::interop::arrow::to_rb::dataframe_to_stream;
|
|
7
|
+
use crate::ruby::utils::TryIntoValue;
|
|
7
8
|
|
|
8
9
|
impl RbDataFrame {
|
|
9
|
-
pub fn row_tuple(ruby: &Ruby, self_: &Self, idx: i64) -> Value {
|
|
10
|
+
pub fn row_tuple(ruby: &Ruby, self_: &Self, idx: i64) -> RbResult<Value> {
|
|
10
11
|
let idx = if idx < 0 {
|
|
11
12
|
(self_.df.read().height() as i64 + idx) as usize
|
|
12
13
|
} else {
|
|
13
14
|
idx as usize
|
|
14
15
|
};
|
|
15
|
-
ruby.
|
|
16
|
+
ruby.ary_try_from_iter(self_.df.read().columns().iter().map(|s| match s.dtype() {
|
|
16
17
|
DataType::Object(_) => {
|
|
17
18
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
|
18
|
-
|
|
19
|
+
// TODO remove unwrap and clone
|
|
20
|
+
obj.unwrap().clone().try_into_value_with(ruby)
|
|
19
21
|
}
|
|
20
|
-
_ => Wrap(s.get(idx).unwrap()).
|
|
22
|
+
_ => Wrap(s.get(idx).unwrap()).try_into_value_with(ruby),
|
|
21
23
|
}))
|
|
22
|
-
.as_value()
|
|
24
|
+
.map(|v| v.as_value())
|
|
23
25
|
}
|
|
24
26
|
|
|
25
|
-
pub fn row_tuples(ruby: &Ruby, self_: &Self) -> Value {
|
|
27
|
+
pub fn row_tuples(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
|
|
26
28
|
let df = &self_.df;
|
|
27
|
-
ruby.
|
|
28
|
-
ruby.
|
|
29
|
+
ruby.ary_try_from_iter((0..df.read().height()).map(|idx| {
|
|
30
|
+
ruby.ary_try_from_iter(self_.df.read().columns().iter().map(|s| match s.dtype() {
|
|
29
31
|
DataType::Object(_) => {
|
|
30
32
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
|
31
|
-
|
|
33
|
+
// TODO remove unwrap and clone
|
|
34
|
+
obj.unwrap().clone().try_into_value_with(ruby)
|
|
32
35
|
}
|
|
33
|
-
_ => Wrap(s.get(idx).unwrap()).
|
|
36
|
+
_ => Wrap(s.get(idx).unwrap()).try_into_value_with(ruby),
|
|
34
37
|
}))
|
|
35
38
|
}))
|
|
36
|
-
.as_value()
|
|
39
|
+
.map(|v| v.as_value())
|
|
37
40
|
}
|
|
38
41
|
|
|
39
42
|
pub fn __arrow_c_stream__(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
|