polars-df 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +7 -5
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/mod.rs +13 -60
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/datetime.rs +6 -2
- data/ext/polars/src/expr/general.rs +28 -6
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +9 -30
- data/ext/polars/src/functions/lazy.rs +2 -0
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +54 -38
- data/ext/polars/src/lib.rs +46 -21
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +1 -131
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +21 -5
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -48,7 +48,7 @@ impl RbLazyFrame {
|
|
48
48
|
// in this scope
|
49
49
|
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
50
50
|
|
51
|
-
let lp = serde_json::from_str::<
|
51
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
52
52
|
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
53
53
|
Ok(LazyFrame::from(lp).into())
|
54
54
|
}
|
@@ -63,7 +63,10 @@ impl RbLazyFrame {
|
|
63
63
|
row_index: Option<(String, IdxSize)>,
|
64
64
|
) -> RbResult<Self> {
|
65
65
|
let batch_size = batch_size.map(|v| v.0);
|
66
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
66
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
67
|
+
name: Arc::from(name.as_str()),
|
68
|
+
offset,
|
69
|
+
});
|
67
70
|
|
68
71
|
let lf = LazyJsonLineReader::new(path)
|
69
72
|
.with_infer_schema_length(infer_schema_length)
|
@@ -107,7 +110,10 @@ impl RbLazyFrame {
|
|
107
110
|
let quote_char = quote_char.map(|s| s.as_bytes()[0]);
|
108
111
|
let separator = separator.as_bytes()[0];
|
109
112
|
let eol_char = eol_char.as_bytes()[0];
|
110
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
113
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
114
|
+
name: Arc::from(name.as_str()),
|
115
|
+
offset,
|
116
|
+
});
|
111
117
|
|
112
118
|
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
113
119
|
overwrite_dtype
|
@@ -119,17 +125,17 @@ impl RbLazyFrame {
|
|
119
125
|
let r = LazyCsvReader::new(path)
|
120
126
|
.with_infer_schema_length(infer_schema_length)
|
121
127
|
.with_separator(separator)
|
122
|
-
.
|
128
|
+
.with_has_header(has_header)
|
123
129
|
.with_ignore_errors(ignore_errors)
|
124
130
|
.with_skip_rows(skip_rows)
|
125
131
|
.with_n_rows(n_rows)
|
126
132
|
.with_cache(cache)
|
127
|
-
.with_dtype_overwrite(overwrite_dtype.
|
133
|
+
.with_dtype_overwrite(overwrite_dtype.map(Arc::new))
|
128
134
|
// TODO add with_schema
|
129
|
-
.
|
135
|
+
.with_low_memory(low_memory)
|
130
136
|
.with_comment_prefix(comment_prefix.as_deref())
|
131
137
|
.with_quote_char(quote_char)
|
132
|
-
.
|
138
|
+
.with_eol_char(eol_char)
|
133
139
|
.with_rechunk(rechunk)
|
134
140
|
.with_skip_rows_after_header(skip_rows_after_header)
|
135
141
|
.with_encoding(encoding.0)
|
@@ -137,7 +143,7 @@ impl RbLazyFrame {
|
|
137
143
|
.with_try_parse_dates(try_parse_dates)
|
138
144
|
.with_null_values(null_values)
|
139
145
|
// TODO add with_missing_is_null
|
140
|
-
.
|
146
|
+
.with_truncate_ragged_lines(truncate_ragged_lines);
|
141
147
|
|
142
148
|
if let Some(_lambda) = with_schema_modify {
|
143
149
|
todo!();
|
@@ -159,6 +165,7 @@ impl RbLazyFrame {
|
|
159
165
|
use_statistics: bool,
|
160
166
|
hive_partitioning: bool,
|
161
167
|
hive_schema: Option<Wrap<Schema>>,
|
168
|
+
glob: bool,
|
162
169
|
) -> RbResult<Self> {
|
163
170
|
let parallel = parallel.0;
|
164
171
|
let hive_schema = hive_schema.map(|s| Arc::new(s.0));
|
@@ -171,7 +178,10 @@ impl RbLazyFrame {
|
|
171
178
|
.ok_or_else(|| RbValueError::new_err("expected a path argument".to_string()))?
|
172
179
|
};
|
173
180
|
|
174
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
181
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
182
|
+
name: Arc::from(name.as_str()),
|
183
|
+
offset,
|
184
|
+
});
|
175
185
|
let hive_options = HiveOptions {
|
176
186
|
enabled: hive_partitioning,
|
177
187
|
schema: hive_schema,
|
@@ -187,6 +197,7 @@ impl RbLazyFrame {
|
|
187
197
|
cloud_options: None,
|
188
198
|
use_statistics,
|
189
199
|
hive_options,
|
200
|
+
glob,
|
190
201
|
};
|
191
202
|
|
192
203
|
let lf = if path.is_some() {
|
@@ -206,7 +217,11 @@ impl RbLazyFrame {
|
|
206
217
|
row_index: Option<(String, IdxSize)>,
|
207
218
|
memory_map: bool,
|
208
219
|
) -> RbResult<Self> {
|
209
|
-
let row_index = row_index.map(|(name, offset)| RowIndex {
|
220
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
221
|
+
name: Arc::from(name.as_str()),
|
222
|
+
offset,
|
223
|
+
});
|
224
|
+
|
210
225
|
let args = ScanArgsIpc {
|
211
226
|
n_rows,
|
212
227
|
cache,
|
@@ -226,8 +241,11 @@ impl RbLazyFrame {
|
|
226
241
|
Ok(())
|
227
242
|
}
|
228
243
|
|
229
|
-
pub fn describe_plan(&self) -> String {
|
230
|
-
self.ldf
|
244
|
+
pub fn describe_plan(&self) -> RbResult<String> {
|
245
|
+
self.ldf
|
246
|
+
.describe_plan()
|
247
|
+
.map_err(RbPolarsErr::from)
|
248
|
+
.map_err(Into::into)
|
231
249
|
}
|
232
250
|
|
233
251
|
pub fn describe_optimized_plan(&self) -> RbResult<String> {
|
@@ -641,58 +659,56 @@ impl RbLazyFrame {
|
|
641
659
|
ldf.fill_nan(fill_value.inner.clone()).into()
|
642
660
|
}
|
643
661
|
|
644
|
-
pub fn min(&self) ->
|
662
|
+
pub fn min(&self) -> Self {
|
645
663
|
let ldf = self.ldf.clone();
|
646
|
-
let out = ldf.min()
|
647
|
-
|
664
|
+
let out = ldf.min();
|
665
|
+
out.into()
|
648
666
|
}
|
649
667
|
|
650
|
-
pub fn max(&self) ->
|
668
|
+
pub fn max(&self) -> Self {
|
651
669
|
let ldf = self.ldf.clone();
|
652
|
-
let out = ldf.max()
|
653
|
-
|
670
|
+
let out = ldf.max();
|
671
|
+
out.into()
|
654
672
|
}
|
655
673
|
|
656
|
-
pub fn sum(&self) ->
|
674
|
+
pub fn sum(&self) -> Self {
|
657
675
|
let ldf = self.ldf.clone();
|
658
|
-
let out = ldf.sum()
|
659
|
-
|
676
|
+
let out = ldf.sum();
|
677
|
+
out.into()
|
660
678
|
}
|
661
679
|
|
662
|
-
pub fn mean(&self) ->
|
680
|
+
pub fn mean(&self) -> Self {
|
663
681
|
let ldf = self.ldf.clone();
|
664
|
-
let out = ldf.mean()
|
665
|
-
|
682
|
+
let out = ldf.mean();
|
683
|
+
out.into()
|
666
684
|
}
|
667
685
|
|
668
|
-
pub fn std(&self, ddof: u8) ->
|
686
|
+
pub fn std(&self, ddof: u8) -> Self {
|
669
687
|
let ldf = self.ldf.clone();
|
670
|
-
let out = ldf.std(ddof)
|
671
|
-
|
688
|
+
let out = ldf.std(ddof);
|
689
|
+
out.into()
|
672
690
|
}
|
673
691
|
|
674
|
-
pub fn var(&self, ddof: u8) ->
|
692
|
+
pub fn var(&self, ddof: u8) -> Self {
|
675
693
|
let ldf = self.ldf.clone();
|
676
|
-
let out = ldf.var(ddof)
|
677
|
-
|
694
|
+
let out = ldf.var(ddof);
|
695
|
+
out.into()
|
678
696
|
}
|
679
697
|
|
680
|
-
pub fn median(&self) ->
|
698
|
+
pub fn median(&self) -> Self {
|
681
699
|
let ldf = self.ldf.clone();
|
682
|
-
let out = ldf.median()
|
683
|
-
|
700
|
+
let out = ldf.median();
|
701
|
+
out.into()
|
684
702
|
}
|
685
703
|
|
686
704
|
pub fn quantile(
|
687
705
|
&self,
|
688
706
|
quantile: &RbExpr,
|
689
707
|
interpolation: Wrap<QuantileInterpolOptions>,
|
690
|
-
) ->
|
708
|
+
) -> Self {
|
691
709
|
let ldf = self.ldf.clone();
|
692
|
-
let out = ldf
|
693
|
-
|
694
|
-
.map_err(RbPolarsErr::from)?;
|
695
|
-
Ok(out.into())
|
710
|
+
let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
|
711
|
+
out.into()
|
696
712
|
}
|
697
713
|
|
698
714
|
pub fn explode(&self, column: RArray) -> RbResult<Self> {
|
data/ext/polars/src/lib.rs
CHANGED
@@ -5,6 +5,7 @@ mod error;
|
|
5
5
|
mod expr;
|
6
6
|
mod file;
|
7
7
|
mod functions;
|
8
|
+
mod interop;
|
8
9
|
mod lazyframe;
|
9
10
|
mod lazygroupby;
|
10
11
|
mod map;
|
@@ -59,18 +60,23 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
59
60
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
60
61
|
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
61
62
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
63
|
+
class.define_singleton_method("read_ipc_stream", function!(RbDataFrame::read_ipc_stream, 6))?;
|
62
64
|
class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
|
63
|
-
class.define_singleton_method("
|
64
|
-
class.define_singleton_method("
|
65
|
-
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
65
|
+
class.define_singleton_method("from_rows", function!(RbDataFrame::from_rows, 3))?;
|
66
|
+
class.define_singleton_method("from_hashes", function!(RbDataFrame::from_hashes, 5))?;
|
66
67
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
67
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 1))?;
|
68
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
70
|
+
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
69
71
|
class.define_method("write_avro", method!(RbDataFrame::write_avro, 2))?;
|
70
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
71
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
72
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
73
75
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
76
|
+
class.define_method(
|
77
|
+
"write_ipc_stream",
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 2),
|
79
|
+
)?;
|
74
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
75
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
76
82
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
@@ -88,7 +94,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
88
94
|
class.define_method("sample_n", method!(RbDataFrame::sample_n, 4))?;
|
89
95
|
class.define_method("sample_frac", method!(RbDataFrame::sample_frac, 4))?;
|
90
96
|
class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
|
91
|
-
class.define_method("to_s", method!(RbDataFrame::
|
97
|
+
class.define_method("to_s", method!(RbDataFrame::as_str, 0))?;
|
92
98
|
class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
|
93
99
|
class.define_method("columns", method!(RbDataFrame::columns, 0))?;
|
94
100
|
class.define_method(
|
@@ -106,8 +112,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
106
112
|
class.define_method("vstack_mut", method!(RbDataFrame::vstack_mut, 1))?;
|
107
113
|
class.define_method("vstack", method!(RbDataFrame::vstack, 1))?;
|
108
114
|
class.define_method("drop_in_place", method!(RbDataFrame::drop_in_place, 1))?;
|
109
|
-
class.define_method("drop_nulls", method!(RbDataFrame::drop_nulls, 1))?;
|
110
|
-
class.define_method("drop", method!(RbDataFrame::drop, 1))?;
|
111
115
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
112
116
|
class.define_method(
|
113
117
|
"get_column_index",
|
@@ -115,7 +119,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
115
119
|
)?;
|
116
120
|
class.define_method("get_column", method!(RbDataFrame::get_column, 1))?;
|
117
121
|
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
118
|
-
class.define_method("take", method!(RbDataFrame::
|
122
|
+
class.define_method("take", method!(RbDataFrame::gather, 1))?;
|
119
123
|
class.define_method(
|
120
124
|
"take_with_series",
|
121
125
|
method!(RbDataFrame::take_with_series, 1),
|
@@ -134,7 +138,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
134
138
|
class.define_method("melt", method!(RbDataFrame::melt, 4))?;
|
135
139
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
136
140
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
137
|
-
class.define_method("shift", method!(RbDataFrame::shift, 1))?;
|
138
141
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
139
142
|
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
140
143
|
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
@@ -142,7 +145,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
142
145
|
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
143
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
144
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
145
|
-
class.define_method("
|
148
|
+
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
146
149
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
147
150
|
class.define_method("hash_rows", method!(RbDataFrame::hash_rows, 4))?;
|
148
151
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
@@ -205,8 +208,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
205
208
|
class.define_method("cast", method!(RbExpr::cast, 2))?;
|
206
209
|
class.define_method("sort_with", method!(RbExpr::sort_with, 2))?;
|
207
210
|
class.define_method("arg_sort", method!(RbExpr::arg_sort, 2))?;
|
208
|
-
class.define_method("top_k", method!(RbExpr::top_k,
|
209
|
-
class.define_method("bottom_k", method!(RbExpr::bottom_k,
|
211
|
+
class.define_method("top_k", method!(RbExpr::top_k, 3))?;
|
212
|
+
class.define_method("bottom_k", method!(RbExpr::bottom_k, 3))?;
|
210
213
|
class.define_method("peak_min", method!(RbExpr::peak_min, 0))?;
|
211
214
|
class.define_method("peak_max", method!(RbExpr::peak_max, 0))?;
|
212
215
|
class.define_method("arg_max", method!(RbExpr::arg_max, 0))?;
|
@@ -415,20 +418,31 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
415
418
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
416
419
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
417
420
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
418
|
-
class.define_method("
|
421
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 4))?;
|
419
422
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
420
423
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
421
424
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
422
425
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
423
426
|
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
424
|
-
class.define_method("rolling_sum", method!(RbExpr::rolling_sum,
|
425
|
-
class.define_method("
|
426
|
-
class.define_method("
|
427
|
-
class.define_method("
|
428
|
-
class.define_method("
|
429
|
-
class.define_method("
|
430
|
-
class.define_method("
|
431
|
-
class.define_method("
|
427
|
+
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
|
428
|
+
class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
|
429
|
+
class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
|
430
|
+
class.define_method("rolling_min_by", method!(RbExpr::rolling_min_by, 4))?;
|
431
|
+
class.define_method("rolling_max", method!(RbExpr::rolling_max, 4))?;
|
432
|
+
class.define_method("rolling_max_by", method!(RbExpr::rolling_max_by, 4))?;
|
433
|
+
class.define_method("rolling_mean", method!(RbExpr::rolling_mean, 4))?;
|
434
|
+
class.define_method("rolling_mean_by", method!(RbExpr::rolling_mean_by, 4))?;
|
435
|
+
class.define_method("rolling_std", method!(RbExpr::rolling_std, 5))?;
|
436
|
+
class.define_method("rolling_std_by", method!(RbExpr::rolling_std_by, 5))?;
|
437
|
+
class.define_method("rolling_var", method!(RbExpr::rolling_var, 5))?;
|
438
|
+
class.define_method("rolling_var_by", method!(RbExpr::rolling_var_by, 5))?;
|
439
|
+
class.define_method("rolling_median", method!(RbExpr::rolling_median, 4))?;
|
440
|
+
class.define_method("rolling_median_by", method!(RbExpr::rolling_median_by, 4))?;
|
441
|
+
class.define_method("rolling_quantile", method!(RbExpr::rolling_quantile, 6))?;
|
442
|
+
class.define_method(
|
443
|
+
"rolling_quantile_by",
|
444
|
+
method!(RbExpr::rolling_quantile_by, 6),
|
445
|
+
)?;
|
432
446
|
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
|
433
447
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
434
448
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
@@ -611,6 +625,17 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
611
625
|
)?;
|
612
626
|
class.define_singleton_method("collect_all", function!(functions::lazy::collect_all, 1))?;
|
613
627
|
class.define_singleton_method("date_range", function!(functions::range::date_range, 6))?;
|
628
|
+
class.define_singleton_method("date_ranges", function!(functions::range::date_ranges, 6))?;
|
629
|
+
class.define_singleton_method(
|
630
|
+
"datetime_range",
|
631
|
+
function!(functions::range::datetime_range, 6),
|
632
|
+
)?;
|
633
|
+
class.define_singleton_method(
|
634
|
+
"datetime_ranges",
|
635
|
+
function!(functions::range::datetime_ranges, 6),
|
636
|
+
)?;
|
637
|
+
class.define_singleton_method("time_range", function!(functions::range::time_range, 4))?;
|
638
|
+
class.define_singleton_method("time_ranges", function!(functions::range::time_ranges, 4))?;
|
614
639
|
class.define_singleton_method(
|
615
640
|
"dtype_str_repr",
|
616
641
|
function!(functions::misc::dtype_str_repr, 1),
|
@@ -689,7 +714,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
689
714
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
690
715
|
class.define_singleton_method(
|
691
716
|
"new_from_parquet",
|
692
|
-
function!(RbLazyFrame::new_from_parquet,
|
717
|
+
function!(RbLazyFrame::new_from_parquet, 12),
|
693
718
|
)?;
|
694
719
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
|
695
720
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -8,31 +8,11 @@ pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Opt
|
|
8
8
|
}
|
9
9
|
|
10
10
|
pub fn map_single(
|
11
|
-
|
11
|
+
_rbexpr: &RbExpr,
|
12
12
|
_lambda: Value,
|
13
|
-
|
14
|
-
|
13
|
+
_output_type: Option<Wrap<DataType>>,
|
14
|
+
_agg_list: bool,
|
15
|
+
_is_elementwise: bool,
|
15
16
|
) -> RbExpr {
|
16
|
-
|
17
|
-
|
18
|
-
let output_type2 = output_type.clone();
|
19
|
-
let function = move |_s: Series| {
|
20
|
-
let _output_type = output_type2.clone().unwrap_or(DataType::Unknown);
|
21
|
-
|
22
|
-
todo!();
|
23
|
-
};
|
24
|
-
|
25
|
-
let output_map = GetOutput::map_field(move |fld| match output_type {
|
26
|
-
Some(ref dt) => Field::new(fld.name(), dt.clone()),
|
27
|
-
None => {
|
28
|
-
let mut fld = fld.clone();
|
29
|
-
fld.coerce(DataType::Unknown);
|
30
|
-
fld
|
31
|
-
}
|
32
|
-
});
|
33
|
-
if agg_list {
|
34
|
-
rbexpr.clone().inner.map_list(function, output_map).into()
|
35
|
-
} else {
|
36
|
-
rbexpr.clone().inner.map(function, output_map).into()
|
37
|
-
}
|
17
|
+
todo!();
|
38
18
|
}
|
@@ -33,7 +33,12 @@ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
|
|
33
33
|
.apply_lambda_with_utf8_out_type(lambda, null_count, Some(first_value.as_str()))
|
34
34
|
.map(|ca| ca.into_series().into())
|
35
35
|
} else if out.respond_to("_s", true)? {
|
36
|
-
|
36
|
+
let rb_rbseries: &RbSeries = out.funcall("_s", ()).unwrap();
|
37
|
+
let series = rb_rbseries.series.borrow();
|
38
|
+
let dt = series.dtype();
|
39
|
+
applyer
|
40
|
+
.apply_lambda_with_list_out_type(lambda, null_count, &series, dt)
|
41
|
+
.map(|ca| ca.into_series().into())
|
37
42
|
} else if out.is_kind_of(class::array()) {
|
38
43
|
todo!()
|
39
44
|
} else if out.is_kind_of(class::hash()) {
|
@@ -66,6 +71,7 @@ pub trait ApplyLambda<'a> {
|
|
66
71
|
fn apply_lambda_unknown(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
67
72
|
|
68
73
|
/// Apply a lambda that doesn't change output types
|
74
|
+
#[allow(dead_code)]
|
69
75
|
fn apply_lambda(&'a self, _lambda: Value) -> RbResult<RbSeries>;
|
70
76
|
|
71
77
|
// Used to store a struct type
|
@@ -1,6 +1,6 @@
|
|
1
1
|
use crate::error::RbPolarsErr;
|
2
2
|
use crate::prelude::*;
|
3
|
-
use crate::{RbResult, RbSeries
|
3
|
+
use crate::{RbResult, RbSeries};
|
4
4
|
use magnus::{IntoValue, Value};
|
5
5
|
|
6
6
|
impl RbSeries {
|
@@ -36,31 +36,52 @@ impl RbSeries {
|
|
36
36
|
Ok(Wrap(
|
37
37
|
self.series
|
38
38
|
.borrow()
|
39
|
-
.
|
39
|
+
.max_reduce()
|
40
40
|
.map_err(RbPolarsErr::from)?
|
41
|
-
.
|
42
|
-
.map_err(RbPolarsErr::from)?,
|
41
|
+
.as_any_value(),
|
43
42
|
)
|
44
43
|
.into_value())
|
45
44
|
}
|
46
45
|
|
47
|
-
pub fn mean(&self) ->
|
46
|
+
pub fn mean(&self) -> RbResult<Value> {
|
48
47
|
match self.series.borrow().dtype() {
|
49
|
-
DataType::Boolean =>
|
50
|
-
|
51
|
-
|
48
|
+
DataType::Boolean => Ok(Wrap(
|
49
|
+
self.series
|
50
|
+
.borrow()
|
51
|
+
.cast(&DataType::UInt8)
|
52
|
+
.unwrap()
|
53
|
+
.mean_reduce()
|
54
|
+
.as_any_value(),
|
55
|
+
)
|
56
|
+
.into_value()),
|
57
|
+
DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => {
|
58
|
+
Ok(Wrap(self.series.borrow().mean_reduce().as_any_value()).into_value())
|
52
59
|
}
|
53
|
-
_ => self.series.borrow().mean(),
|
60
|
+
_ => Ok(self.series.borrow().mean().into_value()),
|
54
61
|
}
|
55
62
|
}
|
56
63
|
|
57
|
-
pub fn median(&self) ->
|
64
|
+
pub fn median(&self) -> RbResult<Value> {
|
58
65
|
match self.series.borrow().dtype() {
|
59
|
-
DataType::Boolean =>
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
66
|
+
DataType::Boolean => Ok(Wrap(
|
67
|
+
self.series
|
68
|
+
.borrow()
|
69
|
+
.cast(&DataType::UInt8)
|
70
|
+
.unwrap()
|
71
|
+
.median_reduce()
|
72
|
+
.map_err(RbPolarsErr::from)?
|
73
|
+
.as_any_value(),
|
74
|
+
)
|
75
|
+
.into_value()),
|
76
|
+
DataType::Datetime(_, _) | DataType::Duration(_) | DataType::Time => Ok(Wrap(
|
77
|
+
self.series
|
78
|
+
.borrow()
|
79
|
+
.median_reduce()
|
80
|
+
.map_err(RbPolarsErr::from)?
|
81
|
+
.as_any_value(),
|
82
|
+
)
|
83
|
+
.into_value()),
|
84
|
+
_ => Ok(self.series.borrow().median().into_value()),
|
64
85
|
}
|
65
86
|
}
|
66
87
|
|
@@ -68,10 +89,9 @@ impl RbSeries {
|
|
68
89
|
Ok(Wrap(
|
69
90
|
self.series
|
70
91
|
.borrow()
|
71
|
-
.
|
92
|
+
.min_reduce()
|
72
93
|
.map_err(RbPolarsErr::from)?
|
73
|
-
.
|
74
|
-
.map_err(RbPolarsErr::from)?,
|
94
|
+
.as_any_value(),
|
75
95
|
)
|
76
96
|
.into_value())
|
77
97
|
}
|
@@ -81,25 +101,22 @@ impl RbSeries {
|
|
81
101
|
quantile: f64,
|
82
102
|
interpolation: Wrap<QuantileInterpolOptions>,
|
83
103
|
) -> RbResult<Value> {
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
)
|
92
|
-
.into_value())
|
104
|
+
let bind = self
|
105
|
+
.series
|
106
|
+
.borrow()
|
107
|
+
.quantile_reduce(quantile, interpolation.0);
|
108
|
+
let sc = bind.map_err(RbPolarsErr::from)?;
|
109
|
+
|
110
|
+
Ok(Wrap(sc.as_any_value()).into_value())
|
93
111
|
}
|
94
112
|
|
95
113
|
pub fn sum(&self) -> RbResult<Value> {
|
96
114
|
Ok(Wrap(
|
97
115
|
self.series
|
98
116
|
.borrow()
|
99
|
-
.
|
117
|
+
.sum_reduce()
|
100
118
|
.map_err(RbPolarsErr::from)?
|
101
|
-
.
|
102
|
-
.map_err(RbPolarsErr::from)?,
|
119
|
+
.as_any_value(),
|
103
120
|
)
|
104
121
|
.into_value())
|
105
122
|
}
|