polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -1,19 +1,21 @@
|
|
|
1
|
-
use magnus::{
|
|
1
|
+
use magnus::{
|
|
2
|
+
IntoValue, RArray, RHash, Ruby, TryConvert, Value, r_hash::ForEach,
|
|
3
|
+
try_convert::TryConvertOwned,
|
|
4
|
+
};
|
|
2
5
|
use polars::io::RowIndex;
|
|
3
6
|
use polars::lazy::frame::LazyFrame;
|
|
4
7
|
use polars::prelude::*;
|
|
5
8
|
use polars_plan::dsl::ScanSources;
|
|
6
|
-
use
|
|
7
|
-
use std::io::BufWriter;
|
|
9
|
+
use polars_plan::plans::{HintIR, Sorted};
|
|
8
10
|
use std::num::NonZeroUsize;
|
|
9
11
|
|
|
10
|
-
use super::SinkTarget;
|
|
12
|
+
use super::{RbLazyFrame, RbOptFlags, SinkTarget};
|
|
11
13
|
use crate::conversion::*;
|
|
12
|
-
use crate::expr::
|
|
14
|
+
use crate::expr::ToExprs;
|
|
13
15
|
use crate::expr::selector::RbSelector;
|
|
14
|
-
use crate::file::get_file_like;
|
|
15
16
|
use crate::io::RbScanOptions;
|
|
16
|
-
use crate::
|
|
17
|
+
use crate::utils::EnterPolarsExt;
|
|
18
|
+
use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
|
|
17
19
|
|
|
18
20
|
fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
|
|
19
21
|
use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
|
|
@@ -22,35 +24,58 @@ fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPat
|
|
|
22
24
|
Some(path.clone()),
|
|
23
25
|
ScanSources::Paths(FromIterator::from_iter([path])),
|
|
24
26
|
),
|
|
25
|
-
RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
|
|
27
|
+
RubyScanSourceInput::File(file) => (None, ScanSources::Files([file.into()].into())),
|
|
26
28
|
RubyScanSourceInput::Buffer(buff) => (None, ScanSources::Buffers([buff].into())),
|
|
27
29
|
})
|
|
28
30
|
}
|
|
29
31
|
|
|
30
32
|
impl RbLazyFrame {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
33
|
+
pub fn new_from_ndjson(arguments: &[Value]) -> RbResult<Self> {
|
|
34
|
+
let source = Option::<Value>::try_convert(arguments[0])?;
|
|
35
|
+
let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
|
|
36
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[2])?;
|
|
37
|
+
let schema = Option::<Wrap<Schema>>::try_convert(arguments[3])?;
|
|
38
|
+
let schema_overrides = Option::<Wrap<Schema>>::try_convert(arguments[4])?;
|
|
39
|
+
let batch_size = Option::<NonZeroUsize>::try_convert(arguments[5])?;
|
|
40
|
+
let n_rows = Option::<usize>::try_convert(arguments[6])?;
|
|
41
|
+
let low_memory = bool::try_convert(arguments[7])?;
|
|
42
|
+
let rechunk = bool::try_convert(arguments[8])?;
|
|
43
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[9])?;
|
|
44
|
+
let ignore_errors = bool::try_convert(arguments[10])?;
|
|
45
|
+
let include_file_paths = Option::<String>::try_convert(arguments[11])?;
|
|
46
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[12])?;
|
|
47
|
+
let credential_provider = Option::<Value>::try_convert(arguments[13])?;
|
|
48
|
+
let retries = usize::try_convert(arguments[14])?;
|
|
49
|
+
let file_cache_ttl = Option::<u64>::try_convert(arguments[15])?;
|
|
50
|
+
|
|
42
51
|
let row_index = row_index.map(|(name, offset)| RowIndex {
|
|
43
52
|
name: name.into(),
|
|
44
53
|
offset,
|
|
45
54
|
});
|
|
46
55
|
|
|
47
56
|
let sources = sources.0;
|
|
48
|
-
let (
|
|
57
|
+
let (first_path, sources) = match source {
|
|
49
58
|
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
|
50
59
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
|
51
60
|
};
|
|
52
61
|
|
|
53
|
-
let r = LazyJsonLineReader::new_with_sources(sources);
|
|
62
|
+
let mut r = LazyJsonLineReader::new_with_sources(sources);
|
|
63
|
+
|
|
64
|
+
if let Some(first_path) = first_path {
|
|
65
|
+
let first_path_url = first_path.to_str();
|
|
66
|
+
|
|
67
|
+
let mut cloud_options =
|
|
68
|
+
parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
|
|
69
|
+
cloud_options = cloud_options
|
|
70
|
+
.with_max_retries(retries)
|
|
71
|
+
.with_credential_provider(credential_provider.map(|_| todo!()));
|
|
72
|
+
|
|
73
|
+
if let Some(file_cache_ttl) = file_cache_ttl {
|
|
74
|
+
cloud_options.file_cache_ttl = file_cache_ttl;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
r = r.with_cloud_options(Some(cloud_options));
|
|
78
|
+
};
|
|
54
79
|
|
|
55
80
|
let lf = r
|
|
56
81
|
.with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
|
|
@@ -58,11 +83,11 @@ impl RbLazyFrame {
|
|
|
58
83
|
.with_n_rows(n_rows)
|
|
59
84
|
.low_memory(low_memory)
|
|
60
85
|
.with_rechunk(rechunk)
|
|
61
|
-
|
|
62
|
-
|
|
86
|
+
.with_schema(schema.map(|schema| Arc::new(schema.0)))
|
|
87
|
+
.with_schema_overwrite(schema_overrides.map(|x| Arc::new(x.0)))
|
|
63
88
|
.with_row_index(row_index)
|
|
64
|
-
|
|
65
|
-
|
|
89
|
+
.with_ignore_errors(ignore_errors)
|
|
90
|
+
.with_include_file_paths(include_file_paths.map(|x| x.into()))
|
|
66
91
|
.finish()
|
|
67
92
|
.map_err(RbPolarsErr::from)?;
|
|
68
93
|
|
|
@@ -73,27 +98,38 @@ impl RbLazyFrame {
|
|
|
73
98
|
// start arguments
|
|
74
99
|
// this pattern is needed for more than 16
|
|
75
100
|
let source = Option::<Value>::try_convert(arguments[0])?;
|
|
76
|
-
let sources = Wrap::<ScanSources>::try_convert(arguments[
|
|
77
|
-
let separator = String::try_convert(arguments[
|
|
78
|
-
let has_header = bool::try_convert(arguments[
|
|
79
|
-
let ignore_errors = bool::try_convert(arguments[
|
|
80
|
-
let skip_rows = usize::try_convert(arguments[
|
|
81
|
-
let
|
|
82
|
-
let
|
|
83
|
-
let
|
|
84
|
-
let
|
|
85
|
-
let
|
|
86
|
-
let
|
|
87
|
-
let
|
|
88
|
-
let
|
|
89
|
-
let
|
|
90
|
-
let
|
|
91
|
-
let
|
|
92
|
-
let
|
|
93
|
-
let
|
|
94
|
-
let
|
|
95
|
-
let
|
|
96
|
-
let
|
|
101
|
+
let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
|
|
102
|
+
let separator = String::try_convert(arguments[2])?;
|
|
103
|
+
let has_header = bool::try_convert(arguments[3])?;
|
|
104
|
+
let ignore_errors = bool::try_convert(arguments[4])?;
|
|
105
|
+
let skip_rows = usize::try_convert(arguments[5])?;
|
|
106
|
+
let skip_lines = usize::try_convert(arguments[6])?;
|
|
107
|
+
let n_rows = Option::<usize>::try_convert(arguments[7])?;
|
|
108
|
+
let cache = bool::try_convert(arguments[8])?;
|
|
109
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[9])?;
|
|
110
|
+
let low_memory = bool::try_convert(arguments[10])?;
|
|
111
|
+
let comment_prefix = Option::<String>::try_convert(arguments[11])?;
|
|
112
|
+
let quote_char = Option::<String>::try_convert(arguments[12])?;
|
|
113
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[13])?;
|
|
114
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[14])?;
|
|
115
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[15])?;
|
|
116
|
+
let with_schema_modify = Option::<Value>::try_convert(arguments[16])?;
|
|
117
|
+
let rechunk = bool::try_convert(arguments[17])?;
|
|
118
|
+
let skip_rows_after_header = usize::try_convert(arguments[18])?;
|
|
119
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[19])?;
|
|
120
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[20])?;
|
|
121
|
+
let try_parse_dates = bool::try_convert(arguments[21])?;
|
|
122
|
+
let eol_char = String::try_convert(arguments[22])?;
|
|
123
|
+
let raise_if_empty = bool::try_convert(arguments[23])?;
|
|
124
|
+
let truncate_ragged_lines = bool::try_convert(arguments[24])?;
|
|
125
|
+
let decimal_comma = bool::try_convert(arguments[25])?;
|
|
126
|
+
let glob = bool::try_convert(arguments[26])?;
|
|
127
|
+
let schema = Option::<Wrap<Schema>>::try_convert(arguments[27])?;
|
|
128
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[28])?;
|
|
129
|
+
let _credential_provider = Option::<Value>::try_convert(arguments[29])?;
|
|
130
|
+
let retries = usize::try_convert(arguments[30])?;
|
|
131
|
+
let file_cache_ttl = Option::<u64>::try_convert(arguments[31])?;
|
|
132
|
+
let include_file_paths = Option::<String>::try_convert(arguments[32])?;
|
|
97
133
|
// end arguments
|
|
98
134
|
|
|
99
135
|
let null_values = null_values.map(|w| w.0);
|
|
@@ -113,12 +149,24 @@ impl RbLazyFrame {
|
|
|
113
149
|
});
|
|
114
150
|
|
|
115
151
|
let sources = sources.0;
|
|
116
|
-
let (
|
|
152
|
+
let (first_path, sources) = match source {
|
|
117
153
|
None => (sources.first_path().map(|p| p.into_owned()), sources),
|
|
118
154
|
Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
|
|
119
155
|
};
|
|
120
156
|
|
|
121
|
-
let r = LazyCsvReader::new_with_sources(sources);
|
|
157
|
+
let mut r = LazyCsvReader::new_with_sources(sources);
|
|
158
|
+
|
|
159
|
+
if let Some(first_path) = first_path {
|
|
160
|
+
let first_path_url = first_path.to_str();
|
|
161
|
+
|
|
162
|
+
let mut cloud_options =
|
|
163
|
+
parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
|
|
164
|
+
if let Some(file_cache_ttl) = file_cache_ttl {
|
|
165
|
+
cloud_options.file_cache_ttl = file_cache_ttl;
|
|
166
|
+
}
|
|
167
|
+
cloud_options = cloud_options.with_max_retries(retries);
|
|
168
|
+
r = r.with_cloud_options(Some(cloud_options));
|
|
169
|
+
}
|
|
122
170
|
|
|
123
171
|
let r = r
|
|
124
172
|
.with_infer_schema_length(infer_schema_length)
|
|
@@ -126,10 +174,11 @@ impl RbLazyFrame {
|
|
|
126
174
|
.with_has_header(has_header)
|
|
127
175
|
.with_ignore_errors(ignore_errors)
|
|
128
176
|
.with_skip_rows(skip_rows)
|
|
177
|
+
.with_skip_lines(skip_lines)
|
|
129
178
|
.with_n_rows(n_rows)
|
|
130
179
|
.with_cache(cache)
|
|
131
180
|
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
|
132
|
-
|
|
181
|
+
.with_schema(schema.map(|schema| Arc::new(schema.0)))
|
|
133
182
|
.with_low_memory(low_memory)
|
|
134
183
|
.with_comment_prefix(comment_prefix.map(|x| x.into()))
|
|
135
184
|
.with_quote_char(quote_char)
|
|
@@ -140,8 +189,12 @@ impl RbLazyFrame {
|
|
|
140
189
|
.with_row_index(row_index)
|
|
141
190
|
.with_try_parse_dates(try_parse_dates)
|
|
142
191
|
.with_null_values(null_values)
|
|
143
|
-
|
|
144
|
-
.with_truncate_ragged_lines(truncate_ragged_lines)
|
|
192
|
+
.with_missing_is_null(!missing_utf8_is_empty_string)
|
|
193
|
+
.with_truncate_ragged_lines(truncate_ragged_lines)
|
|
194
|
+
.with_decimal_comma(decimal_comma)
|
|
195
|
+
.with_glob(glob)
|
|
196
|
+
.with_raise_if_empty(raise_if_empty)
|
|
197
|
+
.with_include_file_paths(include_file_paths.map(|x| x.into()));
|
|
145
198
|
|
|
146
199
|
if let Some(_lambda) = with_schema_modify {
|
|
147
200
|
todo!();
|
|
@@ -208,57 +261,28 @@ impl RbLazyFrame {
|
|
|
208
261
|
Ok(lf.into())
|
|
209
262
|
}
|
|
210
263
|
|
|
211
|
-
pub fn
|
|
212
|
-
|
|
213
|
-
serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
|
|
214
|
-
.map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
|
|
215
|
-
Ok(())
|
|
264
|
+
pub fn describe_plan(rb: &Ruby, self_: &Self) -> RbResult<String> {
|
|
265
|
+
rb.enter_polars(|| self_.ldf.read().describe_plan())
|
|
216
266
|
}
|
|
217
267
|
|
|
218
|
-
pub fn
|
|
219
|
-
|
|
220
|
-
.borrow()
|
|
221
|
-
.describe_plan()
|
|
222
|
-
.map_err(RbPolarsErr::from)
|
|
223
|
-
.map_err(Into::into)
|
|
268
|
+
pub fn describe_optimized_plan(rb: &Ruby, self_: &Self) -> RbResult<String> {
|
|
269
|
+
rb.enter_polars(|| self_.ldf.read().describe_optimized_plan())
|
|
224
270
|
}
|
|
225
271
|
|
|
226
|
-
pub fn
|
|
227
|
-
|
|
228
|
-
.ldf
|
|
229
|
-
.borrow()
|
|
230
|
-
.describe_optimized_plan()
|
|
231
|
-
.map_err(RbPolarsErr::from)?;
|
|
232
|
-
Ok(result)
|
|
272
|
+
pub fn describe_plan_tree(rb: &Ruby, self_: &Self) -> RbResult<String> {
|
|
273
|
+
rb.enter_polars(|| self_.ldf.read().describe_plan_tree())
|
|
233
274
|
}
|
|
234
275
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
allow_streaming: bool,
|
|
246
|
-
_eager: bool,
|
|
247
|
-
) -> RbLazyFrame {
|
|
248
|
-
let ldf = self.ldf.borrow().clone();
|
|
249
|
-
let mut ldf = ldf
|
|
250
|
-
.with_type_coercion(type_coercion)
|
|
251
|
-
.with_predicate_pushdown(predicate_pushdown)
|
|
252
|
-
.with_simplify_expr(simplify_expr)
|
|
253
|
-
.with_slice_pushdown(slice_pushdown)
|
|
254
|
-
.with_new_streaming(allow_streaming)
|
|
255
|
-
._with_eager(_eager)
|
|
256
|
-
.with_projection_pushdown(projection_pushdown);
|
|
257
|
-
|
|
258
|
-
ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
|
|
259
|
-
ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
|
|
260
|
-
|
|
261
|
-
ldf.into()
|
|
276
|
+
pub fn describe_optimized_plan_tree(rb: &Ruby, self_: &Self) -> RbResult<String> {
|
|
277
|
+
rb.enter_polars(|| self_.ldf.read().describe_optimized_plan_tree())
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
pub fn to_dot(rb: &Ruby, self_: &Self, optimized: bool) -> RbResult<String> {
|
|
281
|
+
rb.enter_polars(|| self_.ldf.read().to_dot(optimized))
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
pub fn to_dot_streaming_phys(rb: &Ruby, self_: &Self, optimized: bool) -> RbResult<String> {
|
|
285
|
+
rb.enter_polars(|| self_.ldf.read().to_dot_streaming_phys(optimized))
|
|
262
286
|
}
|
|
263
287
|
|
|
264
288
|
pub fn sort(
|
|
@@ -269,7 +293,7 @@ impl RbLazyFrame {
|
|
|
269
293
|
maintain_order: bool,
|
|
270
294
|
multithreaded: bool,
|
|
271
295
|
) -> Self {
|
|
272
|
-
let ldf = self.ldf.
|
|
296
|
+
let ldf = self.ldf.read().clone();
|
|
273
297
|
ldf.sort(
|
|
274
298
|
[&by_column],
|
|
275
299
|
SortMultipleOptions {
|
|
@@ -291,8 +315,8 @@ impl RbLazyFrame {
|
|
|
291
315
|
maintain_order: bool,
|
|
292
316
|
multithreaded: bool,
|
|
293
317
|
) -> RbResult<Self> {
|
|
294
|
-
let ldf = self.ldf.
|
|
295
|
-
let exprs =
|
|
318
|
+
let ldf = self.ldf.read().clone();
|
|
319
|
+
let exprs = by.to_exprs()?;
|
|
296
320
|
Ok(ldf
|
|
297
321
|
.sort_by_exprs(
|
|
298
322
|
exprs,
|
|
@@ -308,8 +332,8 @@ impl RbLazyFrame {
|
|
|
308
332
|
}
|
|
309
333
|
|
|
310
334
|
pub fn top_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
|
|
311
|
-
let ldf = self.ldf.
|
|
312
|
-
let exprs =
|
|
335
|
+
let ldf = self.ldf.read().clone();
|
|
336
|
+
let exprs = by.to_exprs()?;
|
|
313
337
|
Ok(ldf
|
|
314
338
|
.top_k(
|
|
315
339
|
k,
|
|
@@ -320,8 +344,8 @@ impl RbLazyFrame {
|
|
|
320
344
|
}
|
|
321
345
|
|
|
322
346
|
pub fn bottom_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
|
|
323
|
-
let ldf = self.ldf.
|
|
324
|
-
let exprs =
|
|
347
|
+
let ldf = self.ldf.read().clone();
|
|
348
|
+
let exprs = by.to_exprs()?;
|
|
325
349
|
Ok(ldf
|
|
326
350
|
.bottom_k(
|
|
327
351
|
k,
|
|
@@ -332,19 +356,34 @@ impl RbLazyFrame {
|
|
|
332
356
|
}
|
|
333
357
|
|
|
334
358
|
pub fn cache(&self) -> Self {
|
|
335
|
-
let ldf = self.ldf.
|
|
359
|
+
let ldf = self.ldf.read().clone();
|
|
336
360
|
ldf.cache().into()
|
|
337
361
|
}
|
|
338
362
|
|
|
339
|
-
pub fn
|
|
340
|
-
let ldf = self.ldf.
|
|
341
|
-
|
|
342
|
-
|
|
363
|
+
pub fn with_optimizations(&self, optflags: &RbOptFlags) -> Self {
|
|
364
|
+
let ldf = self.ldf.read().clone();
|
|
365
|
+
ldf.with_optimizations(optflags.clone().inner.into_inner())
|
|
366
|
+
.into()
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
pub fn profile(rb: &Ruby, self_: &Self) -> RbResult<(RbDataFrame, RbDataFrame)> {
|
|
370
|
+
let (df, time_df) = rb.enter_polars(|| {
|
|
371
|
+
let ldf = self_.ldf.read().clone();
|
|
372
|
+
ldf.profile()
|
|
373
|
+
})?;
|
|
374
|
+
Ok((df.into(), time_df.into()))
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
pub fn collect(rb: &Ruby, self_: &Self, engine: Wrap<Engine>) -> RbResult<RbDataFrame> {
|
|
378
|
+
rb.enter_polars_df(|| {
|
|
379
|
+
let ldf = self_.ldf.read().clone();
|
|
380
|
+
ldf.collect_with_engine(engine.0)
|
|
381
|
+
})
|
|
343
382
|
}
|
|
344
383
|
|
|
345
|
-
#[allow(clippy::too_many_arguments)]
|
|
346
384
|
pub fn sink_parquet(
|
|
347
|
-
&
|
|
385
|
+
rb: &Ruby,
|
|
386
|
+
self_: &Self,
|
|
348
387
|
target: SinkTarget,
|
|
349
388
|
compression: String,
|
|
350
389
|
compression_level: Option<i32>,
|
|
@@ -352,8 +391,11 @@ impl RbLazyFrame {
|
|
|
352
391
|
row_group_size: Option<usize>,
|
|
353
392
|
data_page_size: Option<usize>,
|
|
354
393
|
cloud_options: Option<Vec<(String, String)>>,
|
|
394
|
+
credential_provider: Option<Value>,
|
|
355
395
|
retries: usize,
|
|
356
396
|
sink_options: Wrap<SinkOptions>,
|
|
397
|
+
metadata: Wrap<Option<KeyValueMetadata>>,
|
|
398
|
+
field_overwrites: Vec<Wrap<ParquetFieldOverwrites>>,
|
|
357
399
|
) -> RbResult<RbLazyFrame> {
|
|
358
400
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
|
359
401
|
|
|
@@ -362,8 +404,8 @@ impl RbLazyFrame {
|
|
|
362
404
|
statistics: statistics.0,
|
|
363
405
|
row_group_size,
|
|
364
406
|
data_page_size,
|
|
365
|
-
key_value_metadata:
|
|
366
|
-
field_overwrites:
|
|
407
|
+
key_value_metadata: metadata.0,
|
|
408
|
+
field_overwrites: field_overwrites.into_iter().map(|f| f.0).collect(),
|
|
367
409
|
};
|
|
368
410
|
|
|
369
411
|
let cloud_options = match target.base_path() {
|
|
@@ -371,31 +413,39 @@ impl RbLazyFrame {
|
|
|
371
413
|
Some(base_path) => {
|
|
372
414
|
let cloud_options =
|
|
373
415
|
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
|
374
|
-
Some(
|
|
416
|
+
Some(
|
|
417
|
+
cloud_options
|
|
418
|
+
.with_max_retries(retries)
|
|
419
|
+
.with_credential_provider(credential_provider.map(|_| todo!())),
|
|
420
|
+
)
|
|
375
421
|
}
|
|
376
422
|
};
|
|
377
423
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
424
|
+
rb.enter_polars(|| {
|
|
425
|
+
let ldf = self_.ldf.read().clone();
|
|
426
|
+
match target {
|
|
427
|
+
SinkTarget::File(target) => {
|
|
428
|
+
ldf.sink_parquet(target, options, cloud_options, sink_options.0)
|
|
429
|
+
}
|
|
382
430
|
}
|
|
383
|
-
}
|
|
384
|
-
.map_err(RbPolarsErr::from)
|
|
431
|
+
})
|
|
385
432
|
.map(Into::into)
|
|
386
|
-
.map_err(Into::into)
|
|
387
433
|
}
|
|
388
434
|
|
|
389
435
|
pub fn sink_ipc(
|
|
390
|
-
&
|
|
436
|
+
rb: &Ruby,
|
|
437
|
+
self_: &Self,
|
|
391
438
|
target: SinkTarget,
|
|
392
439
|
compression: Wrap<Option<IpcCompression>>,
|
|
440
|
+
compat_level: RbCompatLevel,
|
|
393
441
|
cloud_options: Option<Vec<(String, String)>>,
|
|
442
|
+
credential_provider: Option<Value>,
|
|
394
443
|
retries: usize,
|
|
395
444
|
sink_options: Wrap<SinkOptions>,
|
|
396
445
|
) -> RbResult<RbLazyFrame> {
|
|
397
446
|
let options = IpcWriterOptions {
|
|
398
447
|
compression: compression.0,
|
|
448
|
+
compat_level: compat_level.0,
|
|
399
449
|
..Default::default()
|
|
400
450
|
};
|
|
401
451
|
|
|
@@ -404,22 +454,26 @@ impl RbLazyFrame {
|
|
|
404
454
|
Some(base_path) => {
|
|
405
455
|
let cloud_options =
|
|
406
456
|
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
|
407
|
-
Some(
|
|
457
|
+
Some(
|
|
458
|
+
cloud_options
|
|
459
|
+
.with_max_retries(retries)
|
|
460
|
+
.with_credential_provider(credential_provider.map(|_| todo!())),
|
|
461
|
+
)
|
|
408
462
|
}
|
|
409
463
|
};
|
|
410
464
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
465
|
+
rb.enter_polars(|| {
|
|
466
|
+
let ldf = self_.ldf.read().clone();
|
|
467
|
+
match target {
|
|
468
|
+
SinkTarget::File(target) => {
|
|
469
|
+
ldf.sink_ipc(target, options, cloud_options, sink_options.0)
|
|
470
|
+
}
|
|
415
471
|
}
|
|
416
|
-
}
|
|
417
|
-
.map_err(RbPolarsErr::from)
|
|
472
|
+
})
|
|
418
473
|
.map(Into::into)
|
|
419
|
-
.map_err(Into::into)
|
|
420
474
|
}
|
|
421
475
|
|
|
422
|
-
pub fn sink_csv(&
|
|
476
|
+
pub fn sink_csv(rb: &Ruby, self_: &Self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
|
|
423
477
|
let target = SinkTarget::try_convert(arguments[0])?;
|
|
424
478
|
let include_bom = bool::try_convert(arguments[1])?;
|
|
425
479
|
let include_header = bool::try_convert(arguments[2])?;
|
|
@@ -436,8 +490,9 @@ impl RbLazyFrame {
|
|
|
436
490
|
let null_value = Option::<String>::try_convert(arguments[13])?;
|
|
437
491
|
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
|
|
438
492
|
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
|
|
439
|
-
let
|
|
440
|
-
let
|
|
493
|
+
let credential_provider = Option::<Value>::try_convert(arguments[16])?;
|
|
494
|
+
let retries = usize::try_convert(arguments[17])?;
|
|
495
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[18])?;
|
|
441
496
|
|
|
442
497
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
|
443
498
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
|
@@ -468,25 +523,31 @@ impl RbLazyFrame {
|
|
|
468
523
|
Some(base_path) => {
|
|
469
524
|
let cloud_options =
|
|
470
525
|
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
|
471
|
-
Some(
|
|
526
|
+
Some(
|
|
527
|
+
cloud_options
|
|
528
|
+
.with_max_retries(retries)
|
|
529
|
+
.with_credential_provider(credential_provider.map(|_| todo!())),
|
|
530
|
+
)
|
|
472
531
|
}
|
|
473
532
|
};
|
|
474
533
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
534
|
+
rb.enter_polars(|| {
|
|
535
|
+
let ldf = self_.ldf.read().clone();
|
|
536
|
+
match target {
|
|
537
|
+
SinkTarget::File(target) => {
|
|
538
|
+
ldf.sink_csv(target, options, cloud_options, sink_options.0)
|
|
539
|
+
}
|
|
479
540
|
}
|
|
480
|
-
}
|
|
481
|
-
.map_err(RbPolarsErr::from)
|
|
541
|
+
})
|
|
482
542
|
.map(Into::into)
|
|
483
|
-
.map_err(Into::into)
|
|
484
543
|
}
|
|
485
544
|
|
|
486
545
|
pub fn sink_json(
|
|
487
|
-
&
|
|
546
|
+
rb: &Ruby,
|
|
547
|
+
self_: &Self,
|
|
488
548
|
target: SinkTarget,
|
|
489
549
|
cloud_options: Option<Vec<(String, String)>>,
|
|
550
|
+
credential_provider: Option<Value>,
|
|
490
551
|
retries: usize,
|
|
491
552
|
sink_options: Wrap<SinkOptions>,
|
|
492
553
|
) -> RbResult<RbLazyFrame> {
|
|
@@ -497,52 +558,56 @@ impl RbLazyFrame {
|
|
|
497
558
|
Some(base_path) => {
|
|
498
559
|
let cloud_options =
|
|
499
560
|
parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
|
|
500
|
-
Some(
|
|
561
|
+
Some(
|
|
562
|
+
cloud_options
|
|
563
|
+
.with_max_retries(retries)
|
|
564
|
+
.with_credential_provider(credential_provider.map(|_| todo!())),
|
|
565
|
+
)
|
|
501
566
|
}
|
|
502
567
|
};
|
|
503
568
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
569
|
+
rb.enter_polars(|| {
|
|
570
|
+
let ldf = self_.ldf.read().clone();
|
|
571
|
+
match target {
|
|
572
|
+
SinkTarget::File(path) => {
|
|
573
|
+
ldf.sink_json(path, options, cloud_options, sink_options.0)
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
})
|
|
509
577
|
.map(Into::into)
|
|
510
|
-
.map_err(Into::into)
|
|
511
578
|
}
|
|
512
579
|
|
|
513
580
|
pub fn filter(&self, predicate: &RbExpr) -> Self {
|
|
514
|
-
let ldf = self.ldf.
|
|
581
|
+
let ldf = self.ldf.read().clone();
|
|
515
582
|
ldf.filter(predicate.inner.clone()).into()
|
|
516
583
|
}
|
|
517
584
|
|
|
518
585
|
pub fn remove(&self, predicate: &RbExpr) -> Self {
|
|
519
|
-
let ldf = self.ldf.
|
|
586
|
+
let ldf = self.ldf.read().clone();
|
|
520
587
|
ldf.remove(predicate.inner.clone()).into()
|
|
521
588
|
}
|
|
522
589
|
|
|
523
590
|
pub fn select(&self, exprs: RArray) -> RbResult<Self> {
|
|
524
|
-
let ldf = self.ldf.
|
|
525
|
-
let exprs =
|
|
591
|
+
let ldf = self.ldf.read().clone();
|
|
592
|
+
let exprs = exprs.to_exprs()?;
|
|
526
593
|
Ok(ldf.select(exprs).into())
|
|
527
594
|
}
|
|
528
595
|
|
|
529
596
|
pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
|
|
530
|
-
let ldf = self.ldf.
|
|
531
|
-
let exprs =
|
|
597
|
+
let ldf = self.ldf.read().clone();
|
|
598
|
+
let exprs = exprs.to_exprs()?;
|
|
532
599
|
Ok(ldf.select_seq(exprs).into())
|
|
533
600
|
}
|
|
534
601
|
|
|
535
602
|
pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
|
|
536
|
-
let ldf = self.ldf.
|
|
537
|
-
let by =
|
|
603
|
+
let ldf = self.ldf.read().clone();
|
|
604
|
+
let by = by.to_exprs()?;
|
|
538
605
|
let lazy_gb = if maintain_order {
|
|
539
606
|
ldf.group_by_stable(by)
|
|
540
607
|
} else {
|
|
541
608
|
ldf.group_by(by)
|
|
542
609
|
};
|
|
543
|
-
Ok(RbLazyGroupBy {
|
|
544
|
-
lgb: RefCell::new(Some(lazy_gb)),
|
|
545
|
-
})
|
|
610
|
+
Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
|
|
546
611
|
}
|
|
547
612
|
|
|
548
613
|
pub fn rolling(
|
|
@@ -554,8 +619,8 @@ impl RbLazyFrame {
|
|
|
554
619
|
by: RArray,
|
|
555
620
|
) -> RbResult<RbLazyGroupBy> {
|
|
556
621
|
let closed_window = closed.0;
|
|
557
|
-
let ldf = self.ldf.
|
|
558
|
-
let by =
|
|
622
|
+
let ldf = self.ldf.read().clone();
|
|
623
|
+
let by = by.to_exprs()?;
|
|
559
624
|
let lazy_gb = ldf.rolling(
|
|
560
625
|
index_column.inner.clone(),
|
|
561
626
|
by,
|
|
@@ -567,12 +632,9 @@ impl RbLazyFrame {
|
|
|
567
632
|
},
|
|
568
633
|
);
|
|
569
634
|
|
|
570
|
-
Ok(RbLazyGroupBy {
|
|
571
|
-
lgb: RefCell::new(Some(lazy_gb)),
|
|
572
|
-
})
|
|
635
|
+
Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
|
|
573
636
|
}
|
|
574
637
|
|
|
575
|
-
#[allow(clippy::too_many_arguments)]
|
|
576
638
|
pub fn group_by_dynamic(
|
|
577
639
|
&self,
|
|
578
640
|
index_column: &RbExpr,
|
|
@@ -586,8 +648,8 @@ impl RbLazyFrame {
|
|
|
586
648
|
start_by: Wrap<StartBy>,
|
|
587
649
|
) -> RbResult<RbLazyGroupBy> {
|
|
588
650
|
let closed_window = closed.0;
|
|
589
|
-
let by =
|
|
590
|
-
let ldf = self.ldf.
|
|
651
|
+
let by = by.to_exprs()?;
|
|
652
|
+
let ldf = self.ldf.read().clone();
|
|
591
653
|
let lazy_gb = ldf.group_by_dynamic(
|
|
592
654
|
index_column.inner.clone(),
|
|
593
655
|
by,
|
|
@@ -603,21 +665,9 @@ impl RbLazyFrame {
|
|
|
603
665
|
},
|
|
604
666
|
);
|
|
605
667
|
|
|
606
|
-
Ok(RbLazyGroupBy {
|
|
607
|
-
lgb: RefCell::new(Some(lazy_gb)),
|
|
608
|
-
})
|
|
609
|
-
}
|
|
610
|
-
|
|
611
|
-
pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
|
|
612
|
-
let contexts = contexts.typecheck::<Obj<RbLazyFrame>>()?;
|
|
613
|
-
let contexts = contexts
|
|
614
|
-
.into_iter()
|
|
615
|
-
.map(|ldf| ldf.ldf.borrow().clone())
|
|
616
|
-
.collect::<Vec<_>>();
|
|
617
|
-
Ok(self.ldf.borrow().clone().with_context(contexts).into())
|
|
668
|
+
Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
|
|
618
669
|
}
|
|
619
670
|
|
|
620
|
-
#[allow(clippy::too_many_arguments)]
|
|
621
671
|
pub fn join_asof(
|
|
622
672
|
&self,
|
|
623
673
|
other: &RbLazyFrame,
|
|
@@ -640,8 +690,8 @@ impl RbLazyFrame {
|
|
|
640
690
|
} else {
|
|
641
691
|
JoinCoalesce::KeepColumns
|
|
642
692
|
};
|
|
643
|
-
let ldf = self.ldf.
|
|
644
|
-
let other = other.ldf.
|
|
693
|
+
let ldf = self.ldf.read().clone();
|
|
694
|
+
let other = other.ldf.read().clone();
|
|
645
695
|
let left_on = left_on.inner.clone();
|
|
646
696
|
let right_on = right_on.inner.clone();
|
|
647
697
|
Ok(ldf
|
|
@@ -670,7 +720,6 @@ impl RbLazyFrame {
|
|
|
670
720
|
.into())
|
|
671
721
|
}
|
|
672
722
|
|
|
673
|
-
#[allow(clippy::too_many_arguments)]
|
|
674
723
|
pub fn join(
|
|
675
724
|
&self,
|
|
676
725
|
other: &RbLazyFrame,
|
|
@@ -690,10 +739,10 @@ impl RbLazyFrame {
|
|
|
690
739
|
Some(true) => JoinCoalesce::CoalesceColumns,
|
|
691
740
|
Some(false) => JoinCoalesce::KeepColumns,
|
|
692
741
|
};
|
|
693
|
-
let ldf = self.ldf.
|
|
694
|
-
let other = other.ldf.
|
|
695
|
-
let left_on =
|
|
696
|
-
let right_on =
|
|
742
|
+
let ldf = self.ldf.read().clone();
|
|
743
|
+
let other = other.ldf.read().clone();
|
|
744
|
+
let left_on = left_on.to_exprs()?;
|
|
745
|
+
let right_on = right_on.to_exprs()?;
|
|
697
746
|
|
|
698
747
|
Ok(ldf
|
|
699
748
|
.join_builder()
|
|
@@ -713,10 +762,10 @@ impl RbLazyFrame {
|
|
|
713
762
|
}
|
|
714
763
|
|
|
715
764
|
pub fn join_where(&self, other: &Self, predicates: RArray, suffix: String) -> RbResult<Self> {
|
|
716
|
-
let ldf = self.ldf.
|
|
717
|
-
let other = other.ldf.
|
|
765
|
+
let ldf = self.ldf.read().clone();
|
|
766
|
+
let other = other.ldf.read().clone();
|
|
718
767
|
|
|
719
|
-
let predicates =
|
|
768
|
+
let predicates = predicates.to_exprs()?;
|
|
720
769
|
|
|
721
770
|
Ok(ldf
|
|
722
771
|
.join_builder()
|
|
@@ -727,32 +776,32 @@ impl RbLazyFrame {
|
|
|
727
776
|
}
|
|
728
777
|
|
|
729
778
|
pub fn with_column(&self, expr: &RbExpr) -> Self {
|
|
730
|
-
let ldf = self.ldf.
|
|
779
|
+
let ldf = self.ldf.read().clone();
|
|
731
780
|
ldf.with_column(expr.inner.clone()).into()
|
|
732
781
|
}
|
|
733
782
|
|
|
734
783
|
pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
|
|
735
|
-
let ldf = self.ldf.
|
|
736
|
-
Ok(ldf.with_columns(
|
|
784
|
+
let ldf = self.ldf.read().clone();
|
|
785
|
+
Ok(ldf.with_columns(exprs.to_exprs()?).into())
|
|
737
786
|
}
|
|
738
787
|
|
|
739
788
|
pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
|
|
740
|
-
let ldf = self.ldf.
|
|
741
|
-
Ok(ldf.with_columns_seq(
|
|
789
|
+
let ldf = self.ldf.read().clone();
|
|
790
|
+
Ok(ldf.with_columns_seq(exprs.to_exprs()?).into())
|
|
742
791
|
}
|
|
743
792
|
|
|
744
793
|
pub fn rename(&self, existing: Vec<String>, new: Vec<String>, strict: bool) -> Self {
|
|
745
|
-
let ldf = self.ldf.
|
|
794
|
+
let ldf = self.ldf.read().clone();
|
|
746
795
|
ldf.rename(existing, new, strict).into()
|
|
747
796
|
}
|
|
748
797
|
|
|
749
798
|
pub fn reverse(&self) -> Self {
|
|
750
|
-
let ldf = self.ldf.
|
|
799
|
+
let ldf = self.ldf.read().clone();
|
|
751
800
|
ldf.reverse().into()
|
|
752
801
|
}
|
|
753
802
|
|
|
754
803
|
pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
|
|
755
|
-
let lf = self.ldf.
|
|
804
|
+
let lf = self.ldf.read().clone();
|
|
756
805
|
let out = match fill_value {
|
|
757
806
|
Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
|
|
758
807
|
None => lf.shift(n.inner.clone()),
|
|
@@ -761,68 +810,64 @@ impl RbLazyFrame {
|
|
|
761
810
|
}
|
|
762
811
|
|
|
763
812
|
pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
|
|
764
|
-
let ldf = self.ldf.
|
|
813
|
+
let ldf = self.ldf.read().clone();
|
|
765
814
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
|
766
815
|
}
|
|
767
816
|
|
|
768
817
|
pub fn min(&self) -> Self {
|
|
769
|
-
let ldf = self.ldf.
|
|
818
|
+
let ldf = self.ldf.read().clone();
|
|
770
819
|
let out = ldf.min();
|
|
771
820
|
out.into()
|
|
772
821
|
}
|
|
773
822
|
|
|
774
823
|
pub fn max(&self) -> Self {
|
|
775
|
-
let ldf = self.ldf.
|
|
824
|
+
let ldf = self.ldf.read().clone();
|
|
776
825
|
let out = ldf.max();
|
|
777
826
|
out.into()
|
|
778
827
|
}
|
|
779
828
|
|
|
780
829
|
pub fn sum(&self) -> Self {
|
|
781
|
-
let ldf = self.ldf.
|
|
830
|
+
let ldf = self.ldf.read().clone();
|
|
782
831
|
let out = ldf.sum();
|
|
783
832
|
out.into()
|
|
784
833
|
}
|
|
785
834
|
|
|
786
835
|
pub fn mean(&self) -> Self {
|
|
787
|
-
let ldf = self.ldf.
|
|
836
|
+
let ldf = self.ldf.read().clone();
|
|
788
837
|
let out = ldf.mean();
|
|
789
838
|
out.into()
|
|
790
839
|
}
|
|
791
840
|
|
|
792
841
|
pub fn std(&self, ddof: u8) -> Self {
|
|
793
|
-
let ldf = self.ldf.
|
|
842
|
+
let ldf = self.ldf.read().clone();
|
|
794
843
|
let out = ldf.std(ddof);
|
|
795
844
|
out.into()
|
|
796
845
|
}
|
|
797
846
|
|
|
798
847
|
pub fn var(&self, ddof: u8) -> Self {
|
|
799
|
-
let ldf = self.ldf.
|
|
848
|
+
let ldf = self.ldf.read().clone();
|
|
800
849
|
let out = ldf.var(ddof);
|
|
801
850
|
out.into()
|
|
802
851
|
}
|
|
803
852
|
|
|
804
853
|
pub fn median(&self) -> Self {
|
|
805
|
-
let ldf = self.ldf.
|
|
854
|
+
let ldf = self.ldf.read().clone();
|
|
806
855
|
let out = ldf.median();
|
|
807
856
|
out.into()
|
|
808
857
|
}
|
|
809
858
|
|
|
810
859
|
pub fn quantile(&self, quantile: &RbExpr, interpolation: Wrap<QuantileMethod>) -> Self {
|
|
811
|
-
let ldf = self.ldf.
|
|
860
|
+
let ldf = self.ldf.read().clone();
|
|
812
861
|
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
|
813
862
|
out.into()
|
|
814
863
|
}
|
|
815
864
|
|
|
816
865
|
pub fn explode(&self, subset: &RbSelector) -> Self {
|
|
817
|
-
self.ldf
|
|
818
|
-
.borrow()
|
|
819
|
-
.clone()
|
|
820
|
-
.explode(subset.inner.clone())
|
|
821
|
-
.into()
|
|
866
|
+
self.ldf.read().clone().explode(subset.inner.clone()).into()
|
|
822
867
|
}
|
|
823
868
|
|
|
824
869
|
pub fn null_count(&self) -> Self {
|
|
825
|
-
let ldf = self.ldf.
|
|
870
|
+
let ldf = self.ldf.read().clone();
|
|
826
871
|
ldf.null_count().into()
|
|
827
872
|
}
|
|
828
873
|
|
|
@@ -832,7 +877,7 @@ impl RbLazyFrame {
|
|
|
832
877
|
subset: Option<&RbSelector>,
|
|
833
878
|
keep: Wrap<UniqueKeepStrategy>,
|
|
834
879
|
) -> RbResult<Self> {
|
|
835
|
-
let ldf = self.ldf.
|
|
880
|
+
let ldf = self.ldf.read().clone();
|
|
836
881
|
let subset = subset.map(|e| e.inner.clone());
|
|
837
882
|
Ok(match maintain_order {
|
|
838
883
|
true => ldf.unique_stable_generic(subset, keep.0),
|
|
@@ -843,7 +888,7 @@ impl RbLazyFrame {
|
|
|
843
888
|
|
|
844
889
|
pub fn drop_nans(&self, subset: Option<&RbSelector>) -> Self {
|
|
845
890
|
self.ldf
|
|
846
|
-
.
|
|
891
|
+
.read()
|
|
847
892
|
.clone()
|
|
848
893
|
.drop_nans(subset.map(|e| e.inner.clone()))
|
|
849
894
|
.into()
|
|
@@ -851,19 +896,19 @@ impl RbLazyFrame {
|
|
|
851
896
|
|
|
852
897
|
pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
|
|
853
898
|
self.ldf
|
|
854
|
-
.
|
|
899
|
+
.read()
|
|
855
900
|
.clone()
|
|
856
901
|
.drop_nulls(subset.map(|e| e.inner.clone()))
|
|
857
902
|
.into()
|
|
858
903
|
}
|
|
859
904
|
|
|
860
905
|
pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
|
|
861
|
-
let ldf = self.ldf.
|
|
906
|
+
let ldf = self.ldf.read().clone();
|
|
862
907
|
ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
|
|
863
908
|
}
|
|
864
909
|
|
|
865
910
|
pub fn tail(&self, n: IdxSize) -> Self {
|
|
866
|
-
let ldf = self.ldf.
|
|
911
|
+
let ldf = self.ldf.read().clone();
|
|
867
912
|
ldf.tail(n).into()
|
|
868
913
|
}
|
|
869
914
|
|
|
@@ -881,17 +926,17 @@ impl RbLazyFrame {
|
|
|
881
926
|
variable_name: variable_name.map(|s| s.into()),
|
|
882
927
|
};
|
|
883
928
|
|
|
884
|
-
let ldf = self.ldf.
|
|
929
|
+
let ldf = self.ldf.read().clone();
|
|
885
930
|
Ok(ldf.unpivot(args).into())
|
|
886
931
|
}
|
|
887
932
|
|
|
888
933
|
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
|
|
889
|
-
let ldf = self.ldf.
|
|
934
|
+
let ldf = self.ldf.read().clone();
|
|
890
935
|
ldf.with_row_index(&name, offset).into()
|
|
891
936
|
}
|
|
892
937
|
|
|
893
938
|
pub fn drop(&self, columns: &RbSelector) -> Self {
|
|
894
|
-
self.ldf.
|
|
939
|
+
self.ldf.read().clone().drop(columns.inner.clone()).into()
|
|
895
940
|
}
|
|
896
941
|
|
|
897
942
|
pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
|
|
@@ -902,30 +947,26 @@ impl RbLazyFrame {
|
|
|
902
947
|
})?;
|
|
903
948
|
let mut cast_map = PlHashMap::with_capacity(dtypes.len());
|
|
904
949
|
cast_map.extend(dtypes.iter().map(|(k, v)| (k.as_ref(), v.clone())));
|
|
905
|
-
Ok(self.ldf.
|
|
950
|
+
Ok(self.ldf.read().clone().cast(cast_map, strict).into())
|
|
906
951
|
}
|
|
907
952
|
|
|
908
953
|
pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
|
|
909
|
-
self.ldf.
|
|
954
|
+
self.ldf.read().clone().cast_all(dtype.0, strict).into()
|
|
910
955
|
}
|
|
911
956
|
|
|
912
957
|
pub fn clone(&self) -> Self {
|
|
913
|
-
self.ldf.
|
|
958
|
+
self.ldf.read().clone().into()
|
|
914
959
|
}
|
|
915
960
|
|
|
916
|
-
pub fn collect_schema(
|
|
917
|
-
let schema =
|
|
918
|
-
.ldf
|
|
919
|
-
.borrow_mut()
|
|
920
|
-
.collect_schema()
|
|
921
|
-
.map_err(RbPolarsErr::from)?;
|
|
961
|
+
pub fn collect_schema(rb: &Ruby, self_: &Self) -> RbResult<RHash> {
|
|
962
|
+
let schema = rb.enter_polars(|| self_.ldf.write().collect_schema())?;
|
|
922
963
|
|
|
923
|
-
let schema_dict =
|
|
964
|
+
let schema_dict = rb.hash_new();
|
|
924
965
|
schema.iter_fields().for_each(|fld| {
|
|
925
966
|
schema_dict
|
|
926
967
|
.aset::<String, Value>(
|
|
927
968
|
fld.name().to_string(),
|
|
928
|
-
Wrap(fld.dtype().clone()).into_value_with(
|
|
969
|
+
Wrap(fld.dtype().clone()).into_value_with(rb),
|
|
929
970
|
)
|
|
930
971
|
.unwrap();
|
|
931
972
|
});
|
|
@@ -934,7 +975,7 @@ impl RbLazyFrame {
|
|
|
934
975
|
|
|
935
976
|
pub fn unnest(&self, columns: &RbSelector, separator: Option<String>) -> Self {
|
|
936
977
|
self.ldf
|
|
937
|
-
.
|
|
978
|
+
.read()
|
|
938
979
|
.clone()
|
|
939
980
|
.unnest(
|
|
940
981
|
columns.inner.clone(),
|
|
@@ -944,17 +985,80 @@ impl RbLazyFrame {
|
|
|
944
985
|
}
|
|
945
986
|
|
|
946
987
|
pub fn count(&self) -> Self {
|
|
947
|
-
let ldf = self.ldf.
|
|
988
|
+
let ldf = self.ldf.read().clone();
|
|
948
989
|
ldf.count().into()
|
|
949
990
|
}
|
|
950
991
|
|
|
951
992
|
pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
|
|
952
993
|
let out = self
|
|
953
994
|
.ldf
|
|
954
|
-
.
|
|
995
|
+
.read()
|
|
955
996
|
.clone()
|
|
956
|
-
.merge_sorted(other.ldf.
|
|
997
|
+
.merge_sorted(other.ldf.read().clone(), &key)
|
|
998
|
+
.map_err(RbPolarsErr::from)?;
|
|
999
|
+
Ok(out.into())
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
pub fn hint_sorted(
|
|
1003
|
+
&self,
|
|
1004
|
+
columns: Vec<String>,
|
|
1005
|
+
descending: Vec<bool>,
|
|
1006
|
+
nulls_last: Vec<bool>,
|
|
1007
|
+
) -> RbResult<Self> {
|
|
1008
|
+
if columns.len() != descending.len() && descending.len() != 1 {
|
|
1009
|
+
return Err(RbValueError::new_err(
|
|
1010
|
+
"`set_sorted` expects the same amount of `columns` as `descending` values.",
|
|
1011
|
+
));
|
|
1012
|
+
}
|
|
1013
|
+
if columns.len() != nulls_last.len() && nulls_last.len() != 1 {
|
|
1014
|
+
return Err(RbValueError::new_err(
|
|
1015
|
+
"`set_sorted` expects the same amount of `columns` as `nulls_last` values.",
|
|
1016
|
+
));
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
let mut sorted = columns
|
|
1020
|
+
.iter()
|
|
1021
|
+
.map(|c| Sorted {
|
|
1022
|
+
column: PlSmallStr::from_str(c.as_str()),
|
|
1023
|
+
descending: false,
|
|
1024
|
+
nulls_last: false,
|
|
1025
|
+
})
|
|
1026
|
+
.collect::<Vec<_>>();
|
|
1027
|
+
|
|
1028
|
+
if !columns.is_empty() {
|
|
1029
|
+
if descending.len() != 1 {
|
|
1030
|
+
sorted
|
|
1031
|
+
.iter_mut()
|
|
1032
|
+
.zip(descending)
|
|
1033
|
+
.for_each(|(s, d)| s.descending = d);
|
|
1034
|
+
} else if descending[0] {
|
|
1035
|
+
sorted.iter_mut().for_each(|s| s.descending = true);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if nulls_last.len() != 1 {
|
|
1039
|
+
sorted
|
|
1040
|
+
.iter_mut()
|
|
1041
|
+
.zip(nulls_last)
|
|
1042
|
+
.for_each(|(s, d)| s.nulls_last = d);
|
|
1043
|
+
} else if nulls_last[0] {
|
|
1044
|
+
sorted.iter_mut().for_each(|s| s.nulls_last = true);
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
let out = self
|
|
1049
|
+
.ldf
|
|
1050
|
+
.read()
|
|
1051
|
+
.clone()
|
|
1052
|
+
.hint(HintIR::Sorted(sorted.into()))
|
|
957
1053
|
.map_err(RbPolarsErr::from)?;
|
|
958
1054
|
Ok(out.into())
|
|
959
1055
|
}
|
|
960
1056
|
}
|
|
1057
|
+
|
|
1058
|
+
impl TryConvert for Wrap<polars_io::parquet::write::ParquetFieldOverwrites> {
|
|
1059
|
+
fn try_convert(_ob: Value) -> RbResult<Self> {
|
|
1060
|
+
todo!();
|
|
1061
|
+
}
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
unsafe impl TryConvertOwned for Wrap<polars_io::parquet::write::ParquetFieldOverwrites> {}
|