polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -2,12 +2,15 @@ use std::io::BufReader;
|
|
|
2
2
|
|
|
3
3
|
use magnus::{RHash, Ruby, Value};
|
|
4
4
|
use polars::prelude::ArrowSchema;
|
|
5
|
+
use polars::prelude::PlPathRef;
|
|
6
|
+
use polars_io::cloud::CloudOptions;
|
|
5
7
|
|
|
6
8
|
use crate::conversion::Wrap;
|
|
7
9
|
use crate::file::{EitherRustRubyFile, get_either_file};
|
|
10
|
+
use crate::utils::EnterPolarsExt;
|
|
8
11
|
use crate::{RbPolarsErr, RbResult};
|
|
9
12
|
|
|
10
|
-
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
13
|
+
pub fn read_ipc_schema(rb: &Ruby, rb_f: Value) -> RbResult<RHash> {
|
|
11
14
|
use arrow::io::ipc::read::read_file_metadata;
|
|
12
15
|
let metadata = match get_either_file(rb_f, false)? {
|
|
13
16
|
EitherRustRubyFile::Rust(r) => {
|
|
@@ -16,26 +19,70 @@ pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
|
|
16
19
|
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
|
17
20
|
};
|
|
18
21
|
|
|
19
|
-
let
|
|
20
|
-
let dict = ruby.hash_new();
|
|
22
|
+
let dict = rb.hash_new();
|
|
21
23
|
fields_to_rbdict(&metadata.schema, &dict)?;
|
|
22
24
|
Ok(dict)
|
|
23
25
|
}
|
|
24
26
|
|
|
25
|
-
pub fn read_parquet_metadata(
|
|
27
|
+
pub fn read_parquet_metadata(
|
|
28
|
+
rb: &Ruby,
|
|
29
|
+
rb_f: Value,
|
|
30
|
+
storage_options: Option<Vec<(String, String)>>,
|
|
31
|
+
credential_provider: Option<Value>,
|
|
32
|
+
retries: usize,
|
|
33
|
+
) -> RbResult<RHash> {
|
|
34
|
+
use std::io::Cursor;
|
|
35
|
+
|
|
36
|
+
use polars_io::pl_async::get_runtime;
|
|
26
37
|
use polars_parquet::read::read_metadata;
|
|
27
38
|
use polars_parquet::read::schema::read_custom_key_value_metadata;
|
|
39
|
+
use polars_utils::plpath::PlPath;
|
|
28
40
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
41
|
+
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
|
42
|
+
|
|
43
|
+
let metadata = match get_ruby_scan_source_input(rb_f, false)? {
|
|
44
|
+
RubyScanSourceInput::Buffer(buf) => {
|
|
45
|
+
read_metadata(&mut Cursor::new(buf)).map_err(RbPolarsErr::from)?
|
|
46
|
+
}
|
|
47
|
+
RubyScanSourceInput::Path(p) => {
|
|
48
|
+
let cloud_options = parse_cloud_options(
|
|
49
|
+
Some(p.as_ref()),
|
|
50
|
+
storage_options,
|
|
51
|
+
credential_provider,
|
|
52
|
+
retries,
|
|
53
|
+
)?;
|
|
54
|
+
match p {
|
|
55
|
+
PlPath::Local(local) => {
|
|
56
|
+
let file = polars_utils::open_file(&local).map_err(RbPolarsErr::from)?;
|
|
57
|
+
read_metadata(&mut BufReader::new(file)).map_err(RbPolarsErr::from)?
|
|
58
|
+
}
|
|
59
|
+
PlPath::Cloud(_) => {
|
|
60
|
+
use polars::prelude::ParquetObjectStore;
|
|
61
|
+
use polars_error::PolarsResult;
|
|
62
|
+
|
|
63
|
+
rb.detach(|| {
|
|
64
|
+
get_runtime().block_on(async {
|
|
65
|
+
let mut reader = ParquetObjectStore::from_uri(
|
|
66
|
+
p.as_ref(),
|
|
67
|
+
cloud_options.as_ref(),
|
|
68
|
+
None,
|
|
69
|
+
)
|
|
70
|
+
.await?;
|
|
71
|
+
let result = reader.get_metadata().await?;
|
|
72
|
+
PolarsResult::Ok((**result).clone())
|
|
73
|
+
})
|
|
74
|
+
})
|
|
75
|
+
.map_err(RbPolarsErr::from)?
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
RubyScanSourceInput::File(f) => {
|
|
80
|
+
read_metadata(&mut BufReader::new(f)).map_err(RbPolarsErr::from)?
|
|
32
81
|
}
|
|
33
|
-
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
|
34
82
|
};
|
|
35
83
|
|
|
36
84
|
let key_value_metadata = read_custom_key_value_metadata(metadata.key_value_metadata());
|
|
37
|
-
let
|
|
38
|
-
let dict = ruby.hash_new();
|
|
85
|
+
let dict = rb.hash_new();
|
|
39
86
|
for (key, value) in key_value_metadata.into_iter() {
|
|
40
87
|
dict.aset(key.as_str(), value.as_str())?;
|
|
41
88
|
}
|
|
@@ -66,3 +113,27 @@ fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
|
|
66
113
|
}
|
|
67
114
|
Ok(())
|
|
68
115
|
}
|
|
116
|
+
|
|
117
|
+
pub fn parse_cloud_options<'a>(
|
|
118
|
+
first_path: Option<PlPathRef<'a>>,
|
|
119
|
+
storage_options: Option<Vec<(String, String)>>,
|
|
120
|
+
credential_provider: Option<Value>,
|
|
121
|
+
retries: usize,
|
|
122
|
+
) -> RbResult<Option<CloudOptions>> {
|
|
123
|
+
let result = if let Some(first_path) = first_path {
|
|
124
|
+
use crate::prelude::parse_cloud_options;
|
|
125
|
+
|
|
126
|
+
let first_path_url = first_path.to_str();
|
|
127
|
+
let cloud_options =
|
|
128
|
+
parse_cloud_options(first_path_url, storage_options.unwrap_or_default())?;
|
|
129
|
+
|
|
130
|
+
Some(
|
|
131
|
+
cloud_options
|
|
132
|
+
.with_max_retries(retries)
|
|
133
|
+
.with_credential_provider(credential_provider.map(|_| todo!())),
|
|
134
|
+
)
|
|
135
|
+
} else {
|
|
136
|
+
None
|
|
137
|
+
};
|
|
138
|
+
Ok(result)
|
|
139
|
+
}
|
|
@@ -6,9 +6,11 @@ use polars::lazy::dsl;
|
|
|
6
6
|
use polars::prelude::*;
|
|
7
7
|
|
|
8
8
|
use crate::conversion::{Wrap, get_lf, get_rbseq};
|
|
9
|
+
use crate::expr::ToExprs;
|
|
9
10
|
use crate::expr::datatype::RbDataTypeExpr;
|
|
11
|
+
use crate::lazyframe::RbOptFlags;
|
|
10
12
|
use crate::map::lazy::binary_lambda;
|
|
11
|
-
use crate::
|
|
13
|
+
use crate::utils::{EnterPolarsExt, RubyAttach};
|
|
12
14
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
|
13
15
|
|
|
14
16
|
macro_rules! set_unwrapped_or_0 {
|
|
@@ -62,7 +64,7 @@ pub fn arg_sort_by(
|
|
|
62
64
|
multithreaded: bool,
|
|
63
65
|
maintain_order: bool,
|
|
64
66
|
) -> RbResult<RbExpr> {
|
|
65
|
-
let by =
|
|
67
|
+
let by = by.to_exprs()?;
|
|
66
68
|
Ok(dsl::arg_sort_by(
|
|
67
69
|
by,
|
|
68
70
|
SortMultipleOptions {
|
|
@@ -81,7 +83,12 @@ pub fn arg_where(condition: &RbExpr) -> RbExpr {
|
|
|
81
83
|
}
|
|
82
84
|
|
|
83
85
|
pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
|
84
|
-
let exprs =
|
|
86
|
+
let exprs = exprs.to_exprs()?;
|
|
87
|
+
if exprs.is_empty() {
|
|
88
|
+
return Err(RbValueError::new_err(
|
|
89
|
+
"expected at least 1 expression in 'as_struct'",
|
|
90
|
+
));
|
|
91
|
+
}
|
|
85
92
|
Ok(dsl::as_struct(exprs).into())
|
|
86
93
|
}
|
|
87
94
|
|
|
@@ -90,7 +97,7 @@ pub fn field(names: Vec<String>) -> RbExpr {
|
|
|
90
97
|
}
|
|
91
98
|
|
|
92
99
|
pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
|
|
93
|
-
let exprs =
|
|
100
|
+
let exprs = exprs.to_exprs()?;
|
|
94
101
|
Ok(dsl::coalesce(&exprs).into())
|
|
95
102
|
}
|
|
96
103
|
|
|
@@ -98,13 +105,25 @@ pub fn col(name: String) -> RbExpr {
|
|
|
98
105
|
dsl::col(&name).into()
|
|
99
106
|
}
|
|
100
107
|
|
|
101
|
-
|
|
108
|
+
fn lfs_to_plans(lfs: RArray) -> RbResult<Vec<DslPlan>> {
|
|
102
109
|
let lfs = lfs.typecheck::<Obj<RbLazyFrame>>()?;
|
|
110
|
+
Ok(lfs
|
|
111
|
+
.into_iter()
|
|
112
|
+
.map(|lf| lf.ldf.read().logical_plan.clone())
|
|
113
|
+
.collect())
|
|
114
|
+
}
|
|
103
115
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
116
|
+
pub fn collect_all(
|
|
117
|
+
ruby: &Ruby,
|
|
118
|
+
lfs: RArray,
|
|
119
|
+
engine: Wrap<Engine>,
|
|
120
|
+
optflags: &RbOptFlags,
|
|
121
|
+
) -> RbResult<RArray> {
|
|
122
|
+
let plans = lfs_to_plans(lfs)?;
|
|
123
|
+
let dfs = ruby.enter_polars(|| {
|
|
124
|
+
LazyFrame::collect_all_with_engine(plans, engine.0, optflags.clone().inner.into_inner())
|
|
125
|
+
})?;
|
|
126
|
+
Ok(ruby.ary_from_iter(dfs.into_iter().map(Into::<RbDataFrame>::into)))
|
|
108
127
|
}
|
|
109
128
|
|
|
110
129
|
pub fn concat_lf(
|
|
@@ -136,13 +155,19 @@ pub fn concat_lf(
|
|
|
136
155
|
}
|
|
137
156
|
|
|
138
157
|
pub fn concat_list(s: RArray) -> RbResult<RbExpr> {
|
|
139
|
-
let s =
|
|
158
|
+
let s = s.to_exprs()?;
|
|
140
159
|
let expr = dsl::concat_list(s).map_err(RbPolarsErr::from)?;
|
|
141
160
|
Ok(expr.into())
|
|
142
161
|
}
|
|
143
162
|
|
|
163
|
+
pub fn concat_arr(s: RArray) -> RbResult<RbExpr> {
|
|
164
|
+
let s = s.to_exprs()?;
|
|
165
|
+
let expr = dsl::concat_arr(s).map_err(RbPolarsErr::from)?;
|
|
166
|
+
Ok(expr.into())
|
|
167
|
+
}
|
|
168
|
+
|
|
144
169
|
pub fn concat_str(s: RArray, separator: String, ignore_nulls: bool) -> RbResult<RbExpr> {
|
|
145
|
-
let s =
|
|
170
|
+
let s = s.to_exprs()?;
|
|
146
171
|
Ok(dsl::concat_str(s, &separator, ignore_nulls).into())
|
|
147
172
|
}
|
|
148
173
|
|
|
@@ -158,10 +183,6 @@ pub fn arctan2(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
|
|
158
183
|
y.inner.clone().arctan2(x.inner.clone()).into()
|
|
159
184
|
}
|
|
160
185
|
|
|
161
|
-
pub fn arctan2d(y: &RbExpr, x: &RbExpr) -> RbExpr {
|
|
162
|
-
y.inner.clone().arctan2(x.inner.clone()).degrees().into()
|
|
163
|
-
}
|
|
164
|
-
|
|
165
186
|
pub fn cum_fold(
|
|
166
187
|
acc: &RbExpr,
|
|
167
188
|
lambda: Value,
|
|
@@ -170,10 +191,10 @@ pub fn cum_fold(
|
|
|
170
191
|
return_dtype: Option<&RbDataTypeExpr>,
|
|
171
192
|
include_init: bool,
|
|
172
193
|
) -> RbResult<RbExpr> {
|
|
173
|
-
let exprs =
|
|
194
|
+
let exprs = exprs.to_exprs()?;
|
|
174
195
|
let lambda = Opaque::from(lambda);
|
|
175
196
|
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
|
176
|
-
|
|
197
|
+
Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
|
|
177
198
|
});
|
|
178
199
|
Ok(dsl::cum_fold_exprs(
|
|
179
200
|
acc.inner.clone(),
|
|
@@ -186,6 +207,60 @@ pub fn cum_fold(
|
|
|
186
207
|
.into())
|
|
187
208
|
}
|
|
188
209
|
|
|
210
|
+
pub fn cum_reduce(
|
|
211
|
+
lambda: Value,
|
|
212
|
+
exprs: RArray,
|
|
213
|
+
returns_scalar: bool,
|
|
214
|
+
return_dtype: Option<&RbDataTypeExpr>,
|
|
215
|
+
) -> RbResult<RbExpr> {
|
|
216
|
+
let exprs = exprs.to_exprs()?;
|
|
217
|
+
let lambda = Opaque::from(lambda);
|
|
218
|
+
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
|
219
|
+
Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
|
|
220
|
+
});
|
|
221
|
+
Ok(dsl::cum_reduce_exprs(
|
|
222
|
+
func,
|
|
223
|
+
exprs,
|
|
224
|
+
returns_scalar,
|
|
225
|
+
return_dtype.map(|v| v.inner.clone()),
|
|
226
|
+
)
|
|
227
|
+
.into())
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
pub fn datetime(
|
|
231
|
+
year: &RbExpr,
|
|
232
|
+
month: &RbExpr,
|
|
233
|
+
day: &RbExpr,
|
|
234
|
+
hour: Option<&RbExpr>,
|
|
235
|
+
minute: Option<&RbExpr>,
|
|
236
|
+
second: Option<&RbExpr>,
|
|
237
|
+
microsecond: Option<&RbExpr>,
|
|
238
|
+
time_unit: Wrap<TimeUnit>,
|
|
239
|
+
time_zone: Wrap<Option<TimeZone>>,
|
|
240
|
+
ambiguous: &RbExpr,
|
|
241
|
+
) -> RbExpr {
|
|
242
|
+
let year = year.inner.clone();
|
|
243
|
+
let month = month.inner.clone();
|
|
244
|
+
let day = day.inner.clone();
|
|
245
|
+
set_unwrapped_or_0!(hour, minute, second, microsecond);
|
|
246
|
+
let ambiguous = ambiguous.inner.clone();
|
|
247
|
+
let time_unit = time_unit.0;
|
|
248
|
+
let time_zone = time_zone.0;
|
|
249
|
+
let args = DatetimeArgs {
|
|
250
|
+
year,
|
|
251
|
+
month,
|
|
252
|
+
day,
|
|
253
|
+
hour,
|
|
254
|
+
minute,
|
|
255
|
+
second,
|
|
256
|
+
microsecond,
|
|
257
|
+
time_unit,
|
|
258
|
+
time_zone,
|
|
259
|
+
ambiguous,
|
|
260
|
+
};
|
|
261
|
+
dsl::datetime(args).into()
|
|
262
|
+
}
|
|
263
|
+
|
|
189
264
|
pub fn concat_lf_diagonal(
|
|
190
265
|
lfs: RArray,
|
|
191
266
|
rechunk: bool,
|
|
@@ -224,7 +299,6 @@ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame
|
|
|
224
299
|
Ok(lf.into())
|
|
225
300
|
}
|
|
226
301
|
|
|
227
|
-
#[allow(clippy::too_many_arguments)]
|
|
228
302
|
pub fn duration(
|
|
229
303
|
weeks: Option<&RbExpr>,
|
|
230
304
|
days: Option<&RbExpr>,
|
|
@@ -267,10 +341,10 @@ pub fn fold(
|
|
|
267
341
|
returns_scalar: bool,
|
|
268
342
|
return_dtype: Option<&RbDataTypeExpr>,
|
|
269
343
|
) -> RbResult<RbExpr> {
|
|
270
|
-
let exprs =
|
|
344
|
+
let exprs = exprs.to_exprs()?;
|
|
271
345
|
let lambda = Opaque::from(lambda);
|
|
272
346
|
let func = PlanCallback::new(move |(a, b): (Series, Series)| {
|
|
273
|
-
|
|
347
|
+
Ruby::attach(|rb| binary_lambda(rb.get_inner(lambda), a, b).map(|v| v.unwrap()))
|
|
274
348
|
});
|
|
275
349
|
Ok(dsl::fold_exprs(
|
|
276
350
|
acc.inner.clone(),
|
|
@@ -309,7 +383,7 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
|
|
|
309
383
|
Ok(dsl::lit(unsafe { v.as_slice() }).into())
|
|
310
384
|
}
|
|
311
385
|
} else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
|
|
312
|
-
let s = series.series.
|
|
386
|
+
let s = series.series.read();
|
|
313
387
|
if is_scalar {
|
|
314
388
|
let av = s
|
|
315
389
|
.get(0)
|
|
@@ -3,7 +3,7 @@ use polars_core::datatypes::{TimeUnit, TimeZone};
|
|
|
3
3
|
|
|
4
4
|
use crate::conversion::Wrap;
|
|
5
5
|
use crate::prelude::*;
|
|
6
|
-
use crate::{RbExpr, RbResult};
|
|
6
|
+
use crate::{RbDataTypeExpr, RbExpr, RbPolarsErr, RbResult};
|
|
7
7
|
|
|
8
8
|
pub fn int_range(start: &RbExpr, end: &RbExpr, step: i64, dtype: Wrap<DataType>) -> RbExpr {
|
|
9
9
|
let start = start.inner.clone();
|
|
@@ -16,9 +16,9 @@ pub fn int_ranges(
|
|
|
16
16
|
start: &RbExpr,
|
|
17
17
|
end: &RbExpr,
|
|
18
18
|
step: &RbExpr,
|
|
19
|
-
dtype:
|
|
19
|
+
dtype: &RbDataTypeExpr,
|
|
20
20
|
) -> RbResult<RbExpr> {
|
|
21
|
-
let dtype = dtype.
|
|
21
|
+
let dtype = dtype.inner.clone();
|
|
22
22
|
Ok(dsl::int_ranges(
|
|
23
23
|
start.inner.clone(),
|
|
24
24
|
end.inner.clone(),
|
|
@@ -113,3 +113,19 @@ pub fn time_ranges(
|
|
|
113
113
|
let closed = closed.0;
|
|
114
114
|
dsl::time_ranges(start, end, every, closed).into()
|
|
115
115
|
}
|
|
116
|
+
|
|
117
|
+
pub fn linear_spaces(
|
|
118
|
+
start: &RbExpr,
|
|
119
|
+
end: &RbExpr,
|
|
120
|
+
num_samples: &RbExpr,
|
|
121
|
+
closed: Wrap<ClosedInterval>,
|
|
122
|
+
as_array: bool,
|
|
123
|
+
) -> RbResult<RbExpr> {
|
|
124
|
+
let start = start.inner.clone();
|
|
125
|
+
let end = end.inner.clone();
|
|
126
|
+
let num_samples = num_samples.inner.clone();
|
|
127
|
+
let closed = closed.0;
|
|
128
|
+
let out =
|
|
129
|
+
dsl::linear_spaces(start, end, num_samples, closed, as_array).map_err(RbPolarsErr::from)?;
|
|
130
|
+
Ok(out.into())
|
|
131
|
+
}
|
|
@@ -1 +1,50 @@
|
|
|
1
|
-
|
|
1
|
+
use magnus::prelude::*;
|
|
2
|
+
use magnus::{RHash, Value};
|
|
3
|
+
use polars::prelude::{ArrowDataType, DataType};
|
|
4
|
+
use polars_error::polars_err;
|
|
5
|
+
|
|
6
|
+
use crate::interop::arrow::to_rust::normalize_arrow_fields;
|
|
7
|
+
use crate::prelude::Wrap;
|
|
8
|
+
use crate::series::import_schema_rbcapsule;
|
|
9
|
+
use crate::utils::to_rb_err;
|
|
10
|
+
use crate::{RbResult, RbValueError};
|
|
11
|
+
|
|
12
|
+
pub mod to_rb;
|
|
13
|
+
pub mod to_rust;
|
|
14
|
+
|
|
15
|
+
pub fn init_polars_schema_from_arrow_c_schema(
|
|
16
|
+
polars_schema: RHash,
|
|
17
|
+
schema_object: Value,
|
|
18
|
+
) -> RbResult<()> {
|
|
19
|
+
let schema_capsule = schema_object.funcall("arrow_c_schema", ())?;
|
|
20
|
+
|
|
21
|
+
let field = import_schema_rbcapsule(schema_capsule)?;
|
|
22
|
+
let field = normalize_arrow_fields(&field);
|
|
23
|
+
|
|
24
|
+
let ArrowDataType::Struct(fields) = field.dtype else {
|
|
25
|
+
return Err(RbValueError::new_err(format!(
|
|
26
|
+
"arrow_c_schema of object passed to Polars::Schema did not return struct dtype: \
|
|
27
|
+
object: {}, dtype: {:?}",
|
|
28
|
+
schema_object, &field.dtype
|
|
29
|
+
)));
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
for field in fields {
|
|
33
|
+
let dtype = DataType::from_arrow_field(&field);
|
|
34
|
+
|
|
35
|
+
let name = field.name.as_str();
|
|
36
|
+
let dtype = Wrap(dtype);
|
|
37
|
+
|
|
38
|
+
if polars_schema.get(name).is_some() {
|
|
39
|
+
return Err(to_rb_err(polars_err!(
|
|
40
|
+
Duplicate:
|
|
41
|
+
"arrow schema contained duplicate name: {}",
|
|
42
|
+
name
|
|
43
|
+
)));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
polars_schema.aset(name, dtype)?;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
Ok(())
|
|
50
|
+
}
|
|
@@ -20,6 +20,17 @@ impl RbArrowArrayStream {
|
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
+
#[magnus::wrap(class = "Polars::ArrowSchema")]
|
|
24
|
+
pub struct RbArrowSchema {
|
|
25
|
+
pub(crate) schema: ffi::ArrowSchema,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
impl RbArrowSchema {
|
|
29
|
+
pub fn to_i(&self) -> usize {
|
|
30
|
+
(&self.schema as *const _) as usize
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
23
34
|
pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value> {
|
|
24
35
|
let iter = Box::new(DataFrameStreamIterator::new(df));
|
|
25
36
|
let field = iter.field();
|
|
@@ -27,6 +38,25 @@ pub(crate) fn dataframe_to_stream(df: &DataFrame, ruby: &Ruby) -> RbResult<Value
|
|
|
27
38
|
Ok(RbArrowArrayStream { stream }.into_value_with(ruby))
|
|
28
39
|
}
|
|
29
40
|
|
|
41
|
+
pub(crate) fn polars_schema_to_rbcapsule(
|
|
42
|
+
ruby: &Ruby,
|
|
43
|
+
schema: crate::prelude::Wrap<polars::prelude::Schema>,
|
|
44
|
+
) -> RbResult<Value> {
|
|
45
|
+
let schema: arrow::ffi::ArrowSchema = arrow::ffi::export_field_to_c(&ArrowField::new(
|
|
46
|
+
PlSmallStr::EMPTY,
|
|
47
|
+
ArrowDataType::Struct(
|
|
48
|
+
schema
|
|
49
|
+
.0
|
|
50
|
+
.iter_fields()
|
|
51
|
+
.map(|x| x.to_arrow(CompatLevel::newest()))
|
|
52
|
+
.collect(),
|
|
53
|
+
),
|
|
54
|
+
false,
|
|
55
|
+
));
|
|
56
|
+
|
|
57
|
+
Ok(RbArrowSchema { schema }.into_value_with(ruby))
|
|
58
|
+
}
|
|
59
|
+
|
|
30
60
|
pub struct DataFrameStreamIterator {
|
|
31
61
|
columns: Vec<Series>,
|
|
32
62
|
dtype: ArrowDataType,
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
use polars_core::prelude::*;
|
|
2
|
+
|
|
3
|
+
pub(crate) fn normalize_arrow_fields(field: &ArrowField) -> ArrowField {
|
|
4
|
+
// normalize fields with extension dtypes that are otherwise standard dtypes associated
|
|
5
|
+
// with (for us) irrelevant metadata; recreate the field using the inner (standard) dtype
|
|
6
|
+
match field {
|
|
7
|
+
ArrowField {
|
|
8
|
+
dtype: ArrowDataType::Struct(fields),
|
|
9
|
+
..
|
|
10
|
+
} => {
|
|
11
|
+
let mut normalized = false;
|
|
12
|
+
let normalized_fields: Vec<_> = fields
|
|
13
|
+
.iter()
|
|
14
|
+
.map(|f| {
|
|
15
|
+
// note: google bigquery column data is returned as a standard arrow dtype, but the
|
|
16
|
+
// sql type it was loaded from is associated as metadata (resulting in an extension dtype)
|
|
17
|
+
if let ArrowDataType::Extension(ext_type) = &f.dtype
|
|
18
|
+
&& ext_type.name.starts_with("google:sqlType:")
|
|
19
|
+
{
|
|
20
|
+
normalized = true;
|
|
21
|
+
return ArrowField::new(
|
|
22
|
+
f.name.clone(),
|
|
23
|
+
ext_type.inner.clone(),
|
|
24
|
+
f.is_nullable,
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
f.clone()
|
|
28
|
+
})
|
|
29
|
+
.collect();
|
|
30
|
+
|
|
31
|
+
if normalized {
|
|
32
|
+
ArrowField::new(
|
|
33
|
+
field.name.clone(),
|
|
34
|
+
ArrowDataType::Struct(normalized_fields),
|
|
35
|
+
field.is_nullable,
|
|
36
|
+
)
|
|
37
|
+
} else {
|
|
38
|
+
field.clone()
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
_ => field.clone(),
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -6,7 +6,7 @@ use crate::dataframe::RbDataFrame;
|
|
|
6
6
|
impl RbDataFrame {
|
|
7
7
|
pub fn to_numo(&self) -> Option<Value> {
|
|
8
8
|
let mut st = None;
|
|
9
|
-
for s in self.df.
|
|
9
|
+
for s in self.df.read().iter() {
|
|
10
10
|
let dt_i = s.dtype();
|
|
11
11
|
match st {
|
|
12
12
|
None => st = Some(dt_i.clone()),
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
use magnus::Ruby;
|
|
2
|
+
use polars::prelude::*;
|
|
3
|
+
|
|
4
|
+
use super::RbLazyFrame;
|
|
5
|
+
use crate::utils::EnterPolarsExt;
|
|
6
|
+
use crate::{RbDataFrame, RbResult};
|
|
7
|
+
|
|
8
|
+
impl RbLazyFrame {
|
|
9
|
+
pub fn collect_concurrently(ruby: &Ruby, self_: &Self) -> RbResult<RbInProcessQuery> {
|
|
10
|
+
let ipq = ruby.enter_polars(|| {
|
|
11
|
+
let ldf = self_.ldf.read().clone();
|
|
12
|
+
ldf.collect_concurrently()
|
|
13
|
+
})?;
|
|
14
|
+
Ok(RbInProcessQuery { ipq })
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
#[magnus::wrap(class = "Polars::RbInProcessQuery")]
|
|
19
|
+
#[repr(transparent)]
|
|
20
|
+
#[derive(Clone)]
|
|
21
|
+
pub struct RbInProcessQuery {
|
|
22
|
+
pub ipq: InProcessQuery,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
impl RbInProcessQuery {
|
|
26
|
+
pub fn cancel(ruby: &Ruby, self_: &Self) -> RbResult<()> {
|
|
27
|
+
ruby.enter_polars_ok(|| self_.ipq.cancel())
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pub fn fetch(ruby: &Ruby, self_: &Self) -> RbResult<Option<RbDataFrame>> {
|
|
31
|
+
let out = ruby.enter_polars(|| self_.ipq.fetch().transpose())?;
|
|
32
|
+
Ok(out.map(|df| df.into()))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
pub fn fetch_blocking(ruby: &Ruby, self_: &Self) -> RbResult<RbDataFrame> {
|
|
36
|
+
let out = ruby.enter_polars(|| self_.ipq.fetch_blocking())?;
|
|
37
|
+
Ok(out.into())
|
|
38
|
+
}
|
|
39
|
+
}
|