polars-df 0.20.0 → 0.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +192 -186
- data/LICENSE.txt +1 -1
- data/ext/polars/Cargo.toml +19 -9
- data/ext/polars/src/batched_csv.rs +2 -2
- data/ext/polars/src/catalog/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +450 -0
- data/ext/polars/src/conversion/any_value.rs +9 -19
- data/ext/polars/src/conversion/categorical.rs +30 -0
- data/ext/polars/src/conversion/chunked_array.rs +8 -8
- data/ext/polars/src/conversion/mod.rs +275 -109
- data/ext/polars/src/dataframe/construction.rs +2 -2
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -2
- data/ext/polars/src/dataframe/io.rs +2 -2
- data/ext/polars/src/exceptions.rs +2 -1
- data/ext/polars/src/expr/array.rs +73 -4
- data/ext/polars/src/expr/binary.rs +26 -1
- data/ext/polars/src/expr/bitwise.rs +39 -0
- data/ext/polars/src/expr/categorical.rs +20 -0
- data/ext/polars/src/expr/datatype.rs +37 -0
- data/ext/polars/src/expr/datetime.rs +58 -0
- data/ext/polars/src/expr/general.rs +106 -22
- data/ext/polars/src/expr/list.rs +45 -2
- data/ext/polars/src/expr/meta.rs +5 -28
- data/ext/polars/src/expr/mod.rs +4 -1
- data/ext/polars/src/expr/name.rs +10 -2
- data/ext/polars/src/expr/rolling.rs +21 -1
- data/ext/polars/src/expr/selector.rs +219 -0
- data/ext/polars/src/expr/string.rs +73 -6
- data/ext/polars/src/expr/struct.rs +9 -1
- data/ext/polars/src/file.rs +11 -5
- data/ext/polars/src/functions/io.rs +21 -11
- data/ext/polars/src/functions/lazy.rs +26 -54
- data/ext/polars/src/functions/meta.rs +2 -2
- data/ext/polars/src/functions/misc.rs +1 -1
- data/ext/polars/src/functions/string_cache.rs +4 -5
- data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/io/mod.rs +102 -0
- data/ext/polars/src/lazyframe/general.rs +124 -111
- data/ext/polars/src/lazyframe/serde.rs +1 -1
- data/ext/polars/src/lazyframe/sink.rs +6 -6
- data/ext/polars/src/lib.rs +216 -29
- data/ext/polars/src/map/dataframe.rs +9 -9
- data/ext/polars/src/map/lazy.rs +1 -1
- data/ext/polars/src/map/mod.rs +31 -19
- data/ext/polars/src/map/series.rs +9 -9
- data/ext/polars/src/on_startup.rs +5 -2
- data/ext/polars/src/rb_modules.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +44 -0
- data/ext/polars/src/series/construction.rs +11 -7
- data/ext/polars/src/series/export.rs +6 -4
- data/ext/polars/src/series/general.rs +75 -210
- data/ext/polars/src/series/import.rs +2 -2
- data/ext/polars/src/series/map.rs +227 -0
- data/ext/polars/src/series/mod.rs +2 -1
- data/ext/polars/src/series/scatter.rs +1 -1
- data/ext/polars/src/utils.rs +10 -2
- data/lib/polars/array_expr.rb +382 -3
- data/lib/polars/array_name_space.rb +281 -0
- data/lib/polars/binary_expr.rb +67 -0
- data/lib/polars/binary_name_space.rb +43 -0
- data/lib/polars/cat_expr.rb +224 -0
- data/lib/polars/cat_name_space.rb +130 -32
- data/lib/polars/catalog/unity/catalog_info.rb +20 -0
- data/lib/polars/catalog/unity/column_info.rb +31 -0
- data/lib/polars/catalog/unity/namespace_info.rb +21 -0
- data/lib/polars/catalog/unity/table_info.rb +50 -0
- data/lib/polars/catalog.rb +448 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/convert.rb +12 -2
- data/lib/polars/data_frame.rb +834 -48
- data/lib/polars/data_type_expr.rb +52 -0
- data/lib/polars/data_types.rb +61 -5
- data/lib/polars/date_time_expr.rb +251 -0
- data/lib/polars/date_time_name_space.rb +299 -0
- data/lib/polars/exceptions.rb +7 -2
- data/lib/polars/expr.rb +1247 -211
- data/lib/polars/functions/col.rb +6 -5
- data/lib/polars/functions/datatype.rb +21 -0
- data/lib/polars/functions/lazy.rb +127 -15
- data/lib/polars/functions/repeat.rb +4 -0
- data/lib/polars/io/csv.rb +19 -1
- data/lib/polars/io/json.rb +16 -0
- data/lib/polars/io/ndjson.rb +13 -0
- data/lib/polars/io/parquet.rb +70 -66
- data/lib/polars/io/scan_options.rb +47 -0
- data/lib/polars/lazy_frame.rb +1099 -95
- data/lib/polars/list_expr.rb +400 -11
- data/lib/polars/list_name_space.rb +321 -5
- data/lib/polars/meta_expr.rb +71 -22
- data/lib/polars/name_expr.rb +36 -0
- data/lib/polars/scan_cast_options.rb +64 -0
- data/lib/polars/schema.rb +84 -3
- data/lib/polars/selector.rb +210 -0
- data/lib/polars/selectors.rb +932 -203
- data/lib/polars/series.rb +1083 -63
- data/lib/polars/string_expr.rb +435 -9
- data/lib/polars/string_name_space.rb +729 -45
- data/lib/polars/struct_expr.rb +103 -0
- data/lib/polars/struct_name_space.rb +19 -1
- data/lib/polars/utils/parse.rb +40 -0
- data/lib/polars/utils/various.rb +18 -1
- data/lib/polars/utils.rb +9 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +10 -0
- metadata +20 -2
@@ -1,13 +1,13 @@
|
|
1
1
|
use magnus::encoding::{self, EncodingCapable};
|
2
2
|
use magnus::{
|
3
|
-
|
3
|
+
Float, Integer, RArray, RString, Ruby, Value, class, prelude::*, typed_data::Obj, value::Opaque,
|
4
4
|
};
|
5
5
|
use polars::lazy::dsl;
|
6
6
|
use polars::prelude::*;
|
7
7
|
|
8
|
-
use crate::conversion::{get_lf, get_rbseq
|
8
|
+
use crate::conversion::{Wrap, get_lf, get_rbseq};
|
9
|
+
use crate::expr::datatype::RbDataTypeExpr;
|
9
10
|
use crate::map::lazy::binary_lambda;
|
10
|
-
use crate::prelude::vec_extract_wrapped;
|
11
11
|
use crate::rb_exprs_to_exprs;
|
12
12
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
13
13
|
|
@@ -85,6 +85,10 @@ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
|
85
85
|
Ok(dsl::as_struct(exprs).into())
|
86
86
|
}
|
87
87
|
|
88
|
+
pub fn field(names: Vec<String>) -> RbExpr {
|
89
|
+
dsl::Expr::Field(names.into_iter().map(|x| x.into()).collect()).into()
|
90
|
+
}
|
91
|
+
|
88
92
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
89
93
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
90
94
|
Ok(dsl::coalesce(&exprs).into())
|
@@ -103,10 +107,6 @@ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
|
|
103
107
|
})))
|
104
108
|
}
|
105
109
|
|
106
|
-
pub fn cols(names: Vec<String>) -> RbExpr {
|
107
|
-
dsl::cols(names).into()
|
108
|
-
}
|
109
|
-
|
110
110
|
pub fn concat_lf(
|
111
111
|
lfs: Value,
|
112
112
|
rechunk: bool,
|
@@ -166,20 +166,24 @@ pub fn cum_fold(
|
|
166
166
|
acc: &RbExpr,
|
167
167
|
lambda: Value,
|
168
168
|
exprs: RArray,
|
169
|
+
returns_scalar: bool,
|
170
|
+
return_dtype: Option<&RbDataTypeExpr>,
|
169
171
|
include_init: bool,
|
170
172
|
) -> RbResult<RbExpr> {
|
171
173
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
172
174
|
let lambda = Opaque::from(lambda);
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
175
|
+
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
176
|
+
binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
|
177
|
+
});
|
178
|
+
Ok(dsl::cum_fold_exprs(
|
179
|
+
acc.inner.clone(),
|
180
|
+
func,
|
181
|
+
exprs,
|
182
|
+
returns_scalar,
|
183
|
+
return_dtype.map(|v| v.inner.clone()),
|
184
|
+
include_init,
|
185
|
+
)
|
186
|
+
.into())
|
183
187
|
}
|
184
188
|
|
185
189
|
pub fn concat_lf_diagonal(
|
@@ -220,24 +224,6 @@ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame
|
|
220
224
|
Ok(lf.into())
|
221
225
|
}
|
222
226
|
|
223
|
-
pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
|
224
|
-
let dtypes = dtypes
|
225
|
-
.into_iter()
|
226
|
-
.map(Wrap::<DataType>::try_convert)
|
227
|
-
.collect::<RbResult<Vec<Wrap<DataType>>>>()?;
|
228
|
-
let dtypes = vec_extract_wrapped(dtypes);
|
229
|
-
Ok(dsl::dtype_cols(dtypes).into())
|
230
|
-
}
|
231
|
-
|
232
|
-
pub fn index_cols(indices: Vec<i64>) -> RbExpr {
|
233
|
-
if indices.len() == 1 {
|
234
|
-
dsl::nth(indices[0])
|
235
|
-
} else {
|
236
|
-
dsl::index_cols(indices)
|
237
|
-
}
|
238
|
-
.into()
|
239
|
-
}
|
240
|
-
|
241
227
|
#[allow(clippy::too_many_arguments)]
|
242
228
|
pub fn duration(
|
243
229
|
weeks: Option<&RbExpr>,
|
@@ -274,42 +260,28 @@ pub fn duration(
|
|
274
260
|
dsl::duration(args).into()
|
275
261
|
}
|
276
262
|
|
277
|
-
pub fn first() -> RbExpr {
|
278
|
-
dsl::first().into()
|
279
|
-
}
|
280
|
-
|
281
263
|
pub fn fold(
|
282
264
|
acc: &RbExpr,
|
283
265
|
lambda: Value,
|
284
266
|
exprs: RArray,
|
285
267
|
returns_scalar: bool,
|
286
|
-
return_dtype: Option
|
268
|
+
return_dtype: Option<&RbDataTypeExpr>,
|
287
269
|
) -> RbResult<RbExpr> {
|
288
270
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
289
271
|
let lambda = Opaque::from(lambda);
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
Ruby::get().unwrap().get_inner(lambda),
|
294
|
-
a.take_materialized_series(),
|
295
|
-
b.take_materialized_series(),
|
296
|
-
)
|
297
|
-
.map(|v| v.map(Column::from))
|
298
|
-
};
|
272
|
+
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
273
|
+
binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
|
274
|
+
});
|
299
275
|
Ok(dsl::fold_exprs(
|
300
276
|
acc.inner.clone(),
|
301
277
|
func,
|
302
278
|
exprs,
|
303
279
|
returns_scalar,
|
304
|
-
return_dtype.map(|w| w.
|
280
|
+
return_dtype.map(|w| w.inner.clone()),
|
305
281
|
)
|
306
282
|
.into())
|
307
283
|
}
|
308
284
|
|
309
|
-
pub fn last() -> RbExpr {
|
310
|
-
dsl::last().into()
|
311
|
-
}
|
312
|
-
|
313
285
|
pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
|
314
286
|
if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
|
315
287
|
Ok(dsl::lit(bool::try_convert(value)?).into())
|
@@ -1,8 +1,8 @@
|
|
1
1
|
use magnus::{IntoValue, Value};
|
2
2
|
use polars_core;
|
3
|
+
use polars_core::POOL;
|
3
4
|
use polars_core::fmt::FloatFmt;
|
4
5
|
use polars_core::prelude::IDX_DTYPE;
|
5
|
-
use polars_core::POOL;
|
6
6
|
|
7
7
|
use crate::conversion::Wrap;
|
8
8
|
use crate::{RbResult, RbValueError};
|
@@ -22,7 +22,7 @@ pub fn set_float_fmt(fmt: String) -> RbResult<()> {
|
|
22
22
|
e => {
|
23
23
|
return Err(RbValueError::new_err(format!(
|
24
24
|
"fmt must be one of {{'full', 'mixed'}}, got {e}",
|
25
|
-
)))
|
25
|
+
)));
|
26
26
|
}
|
27
27
|
};
|
28
28
|
polars_core::fmt::set_float_fmt(fmt);
|
@@ -1,17 +1,17 @@
|
|
1
1
|
use crate::RbResult;
|
2
2
|
use magnus::{RArray, Ruby, Value};
|
3
|
-
use polars_core::StringCacheHolder;
|
4
3
|
|
5
4
|
pub fn enable_string_cache() {
|
6
|
-
|
5
|
+
// The string cache no longer exists.
|
7
6
|
}
|
8
7
|
|
9
8
|
pub fn disable_string_cache() {
|
10
|
-
|
9
|
+
// The string cache no longer exists.
|
11
10
|
}
|
12
11
|
|
13
12
|
pub fn using_string_cache() -> bool {
|
14
|
-
|
13
|
+
// The string cache no longer exists.
|
14
|
+
true
|
15
15
|
}
|
16
16
|
|
17
17
|
#[magnus::wrap(class = "Polars::RbStringCacheHolder")]
|
@@ -19,7 +19,6 @@ pub struct RbStringCacheHolder {}
|
|
19
19
|
|
20
20
|
impl RbStringCacheHolder {
|
21
21
|
pub fn hold() -> RbResult<Value> {
|
22
|
-
let _hold = StringCacheHolder::hold();
|
23
22
|
Ruby::get().unwrap().yield_splat(RArray::new())
|
24
23
|
}
|
25
24
|
}
|
@@ -3,10 +3,10 @@ use num_traits::{Float, NumCast};
|
|
3
3
|
use polars_core::prelude::*;
|
4
4
|
|
5
5
|
use super::numo_rs::{Element, RbArray1};
|
6
|
+
use crate::RbResult;
|
6
7
|
use crate::error::RbPolarsErr;
|
7
8
|
use crate::raise_err;
|
8
9
|
use crate::series::RbSeries;
|
9
|
-
use crate::RbResult;
|
10
10
|
|
11
11
|
impl RbSeries {
|
12
12
|
/// Convert this Series to a Numo array.
|
@@ -0,0 +1,102 @@
|
|
1
|
+
use std::sync::Arc;
|
2
|
+
|
3
|
+
use magnus::{TryConvert, Value, value::ReprValue};
|
4
|
+
use polars::prelude::deletion::DeletionFilesList;
|
5
|
+
use polars::prelude::{
|
6
|
+
CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
|
7
|
+
UnifiedScanArgs,
|
8
|
+
};
|
9
|
+
use polars_io::{HiveOptions, RowIndex};
|
10
|
+
use polars_utils::IdxSize;
|
11
|
+
use polars_utils::plpath::PlPathRef;
|
12
|
+
use polars_utils::slice_enum::Slice;
|
13
|
+
|
14
|
+
use crate::RbResult;
|
15
|
+
use crate::prelude::Wrap;
|
16
|
+
|
17
|
+
/// Interface to `class ScanOptions` on the Ruby side
|
18
|
+
pub struct RbScanOptions(Value);
|
19
|
+
|
20
|
+
impl TryConvert for RbScanOptions {
|
21
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
22
|
+
Ok(Self(ob))
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
impl RbScanOptions {
|
27
|
+
pub fn extract_unified_scan_args(
|
28
|
+
&self,
|
29
|
+
// For cloud_options init
|
30
|
+
first_path: Option<PlPathRef>,
|
31
|
+
) -> RbResult<UnifiedScanArgs> {
|
32
|
+
let row_index: Option<(Wrap<PlSmallStr>, IdxSize)> = self.0.funcall("row_index", ())?;
|
33
|
+
let pre_slice: Option<(i64, usize)> = self.0.funcall("pre_slice", ())?;
|
34
|
+
let cast_options: Wrap<CastColumnsPolicy> = self.0.funcall("cast_options", ())?;
|
35
|
+
let extra_columns: Wrap<ExtraColumnsPolicy> = self.0.funcall("extra_columns", ())?;
|
36
|
+
let missing_columns: Wrap<MissingColumnsPolicy> = self.0.funcall("missing_columns", ())?;
|
37
|
+
let include_file_paths: Option<Wrap<PlSmallStr>> =
|
38
|
+
self.0.funcall("include_file_paths", ())?;
|
39
|
+
let glob: bool = self.0.funcall("glob", ())?;
|
40
|
+
let hive_partitioning: Option<bool> = self.0.funcall("hive_partitioning", ())?;
|
41
|
+
let hive_schema: Option<Wrap<Schema>> = self.0.funcall("hive_schema", ())?;
|
42
|
+
let try_parse_hive_dates: bool = self.0.funcall("try_parse_hive_dates", ())?;
|
43
|
+
let rechunk: bool = self.0.funcall("rechunk", ())?;
|
44
|
+
let cache: bool = self.0.funcall("cache", ())?;
|
45
|
+
let storage_options: Option<Vec<(String, String)>> =
|
46
|
+
self.0.funcall("storage_options", ())?;
|
47
|
+
let retries: usize = self.0.funcall("retries", ())?;
|
48
|
+
let deletion_files: Option<Wrap<DeletionFilesList>> =
|
49
|
+
self.0.funcall("deletion_files", ())?;
|
50
|
+
let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
|
51
|
+
|
52
|
+
let cloud_options = storage_options;
|
53
|
+
|
54
|
+
let cloud_options = if let Some(first_path) = first_path {
|
55
|
+
use crate::prelude::parse_cloud_options;
|
56
|
+
|
57
|
+
let first_path_url = first_path.to_str();
|
58
|
+
let cloud_options =
|
59
|
+
parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
|
60
|
+
|
61
|
+
Some(cloud_options.with_max_retries(retries))
|
62
|
+
} else {
|
63
|
+
None
|
64
|
+
};
|
65
|
+
|
66
|
+
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
67
|
+
|
68
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
69
|
+
name: name.0,
|
70
|
+
offset,
|
71
|
+
});
|
72
|
+
|
73
|
+
let hive_options = HiveOptions {
|
74
|
+
enabled: hive_partitioning,
|
75
|
+
hive_start_idx: 0,
|
76
|
+
schema: hive_schema,
|
77
|
+
try_parse_dates: try_parse_hive_dates,
|
78
|
+
};
|
79
|
+
|
80
|
+
let unified_scan_args = UnifiedScanArgs {
|
81
|
+
// Schema is currently still stored inside the options per scan type, but we do eventually
|
82
|
+
// want to put it here instead.
|
83
|
+
schema: None,
|
84
|
+
cloud_options,
|
85
|
+
hive_options,
|
86
|
+
rechunk,
|
87
|
+
cache,
|
88
|
+
glob,
|
89
|
+
projection: None,
|
90
|
+
row_index,
|
91
|
+
pre_slice: pre_slice.map(Slice::from),
|
92
|
+
cast_columns_policy: cast_options.0,
|
93
|
+
missing_columns_policy: missing_columns.0,
|
94
|
+
extra_columns_policy: extra_columns.0,
|
95
|
+
include_file_paths: include_file_paths.map(|x| x.0),
|
96
|
+
deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
|
97
|
+
column_mapping: column_mapping.map(|x| x.0),
|
98
|
+
};
|
99
|
+
|
100
|
+
Ok(unified_scan_args)
|
101
|
+
}
|
102
|
+
}
|