polars-df 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +125 -28
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +16 -11
- data/ext/polars/src/dataframe/io.rs +73 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +13 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +45 -14
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,643 @@
|
|
1
|
+
use magnus::{exception, Error, IntoValue, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars::series::IsSorted;
|
4
|
+
|
5
|
+
use crate::apply_method_all_arrow_series2;
|
6
|
+
use crate::conversion::*;
|
7
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
|
9
|
+
|
10
|
+
impl RbSeries {
|
11
|
+
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
12
|
+
let binding = self.series.borrow();
|
13
|
+
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
14
|
+
let df: DataFrame = ca.clone().unnest();
|
15
|
+
Ok(df.into())
|
16
|
+
}
|
17
|
+
|
18
|
+
// TODO add to Ruby
|
19
|
+
pub fn struct_fields(&self) -> RbResult<Vec<String>> {
|
20
|
+
let binding = self.series.borrow();
|
21
|
+
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
22
|
+
Ok(ca
|
23
|
+
.struct_fields()
|
24
|
+
.iter()
|
25
|
+
.map(|s| s.name().to_string())
|
26
|
+
.collect())
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn is_sorted_ascending_flag(&self) -> bool {
|
30
|
+
matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
|
31
|
+
}
|
32
|
+
|
33
|
+
pub fn is_sorted_descending_flag(&self) -> bool {
|
34
|
+
matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn can_fast_explode_flag(&self) -> bool {
|
38
|
+
match self.series.borrow().list() {
|
39
|
+
Err(_) => false,
|
40
|
+
Ok(list) => list._can_fast_explode(),
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
45
|
+
let binding = self.series.borrow();
|
46
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
47
|
+
Ok(ca.uses_lexical_ordering())
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn cat_is_local(&self) -> RbResult<bool> {
|
51
|
+
let binding = self.series.borrow();
|
52
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
53
|
+
Ok(ca.get_rev_map().is_local())
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cat_to_local(&self) -> RbResult<Self> {
|
57
|
+
let binding = self.series.borrow();
|
58
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
59
|
+
Ok(ca.to_local().into_series().into())
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn estimated_size(&self) -> usize {
|
63
|
+
self.series.borrow().estimated_size()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
67
|
+
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
68
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
69
|
+
let v_trunc = &val[..val
|
70
|
+
.char_indices()
|
71
|
+
.take(str_lengths)
|
72
|
+
.last()
|
73
|
+
.map(|(i, c)| i + c.len_utf8())
|
74
|
+
.unwrap_or(0)];
|
75
|
+
if val == v_trunc {
|
76
|
+
val
|
77
|
+
} else {
|
78
|
+
format!("{}…", v_trunc)
|
79
|
+
}
|
80
|
+
} else {
|
81
|
+
val
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
pub fn rechunk(&self, in_place: bool) -> Option<Self> {
|
86
|
+
let series = self.series.borrow_mut().rechunk();
|
87
|
+
if in_place {
|
88
|
+
*self.series.borrow_mut() = series;
|
89
|
+
None
|
90
|
+
} else {
|
91
|
+
Some(series.into())
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
|
96
|
+
Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
|
97
|
+
}
|
98
|
+
|
99
|
+
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
100
|
+
let out = self
|
101
|
+
.series
|
102
|
+
.borrow()
|
103
|
+
.bitand(&other.series.borrow())
|
104
|
+
.map_err(RbPolarsErr::from)?;
|
105
|
+
Ok(out.into())
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
109
|
+
let out = self
|
110
|
+
.series
|
111
|
+
.borrow()
|
112
|
+
.bitor(&other.series.borrow())
|
113
|
+
.map_err(RbPolarsErr::from)?;
|
114
|
+
Ok(out.into())
|
115
|
+
}
|
116
|
+
|
117
|
+
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
118
|
+
let out = self
|
119
|
+
.series
|
120
|
+
.borrow()
|
121
|
+
.bitxor(&other.series.borrow())
|
122
|
+
.map_err(RbPolarsErr::from)?;
|
123
|
+
Ok(out.into())
|
124
|
+
}
|
125
|
+
|
126
|
+
pub fn chunk_lengths(&self) -> Vec<usize> {
|
127
|
+
self.series.borrow().chunk_lengths().collect()
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn name(&self) -> String {
|
131
|
+
self.series.borrow().name().to_string()
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn rename(&self, name: String) {
|
135
|
+
self.series.borrow_mut().rename(name.into());
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn dtype(&self) -> Value {
|
139
|
+
Wrap(self.series.borrow().dtype().clone()).into_value()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn inner_dtype(&self) -> Option<Value> {
|
143
|
+
self.series
|
144
|
+
.borrow()
|
145
|
+
.dtype()
|
146
|
+
.inner_dtype()
|
147
|
+
.map(|dt| Wrap(dt.clone()).into_value())
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
151
|
+
let mut out = self.series.borrow().clone();
|
152
|
+
if descending {
|
153
|
+
out.set_sorted_flag(IsSorted::Descending);
|
154
|
+
} else {
|
155
|
+
out.set_sorted_flag(IsSorted::Ascending)
|
156
|
+
}
|
157
|
+
out.into()
|
158
|
+
}
|
159
|
+
|
160
|
+
pub fn n_chunks(&self) -> usize {
|
161
|
+
self.series.borrow().n_chunks()
|
162
|
+
}
|
163
|
+
|
164
|
+
pub fn append(&self, other: &RbSeries) -> RbResult<()> {
|
165
|
+
let mut binding = self.series.borrow_mut();
|
166
|
+
let res = binding.append(&other.series.borrow());
|
167
|
+
if let Err(e) = res {
|
168
|
+
Err(Error::new(exception::runtime_error(), e.to_string()))
|
169
|
+
} else {
|
170
|
+
Ok(())
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
|
175
|
+
self.series
|
176
|
+
.borrow_mut()
|
177
|
+
.extend(&other.series.borrow())
|
178
|
+
.map_err(RbPolarsErr::from)?;
|
179
|
+
Ok(())
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
|
183
|
+
if index >= self.series.borrow().len() {
|
184
|
+
Err(Error::new(exception::arg_error(), "index is out of bounds"))
|
185
|
+
} else {
|
186
|
+
Ok(self.series.borrow().new_from_index(index, length).into())
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
|
191
|
+
let filter_series = &filter.series.borrow();
|
192
|
+
if let Ok(ca) = filter_series.bool() {
|
193
|
+
let series = self.series.borrow().filter(ca).unwrap();
|
194
|
+
Ok(series.into())
|
195
|
+
} else {
|
196
|
+
Err(Error::new(
|
197
|
+
exception::runtime_error(),
|
198
|
+
"Expected a boolean mask".to_string(),
|
199
|
+
))
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
|
204
|
+
Ok(self
|
205
|
+
.series
|
206
|
+
.borrow_mut()
|
207
|
+
.sort(
|
208
|
+
SortOptions::default()
|
209
|
+
.with_order_descending(descending)
|
210
|
+
.with_nulls_last(nulls_last)
|
211
|
+
.with_multithreaded(multithreaded),
|
212
|
+
)
|
213
|
+
.map_err(RbPolarsErr::from)?
|
214
|
+
.into())
|
215
|
+
}
|
216
|
+
|
217
|
+
pub fn value_counts(
|
218
|
+
&self,
|
219
|
+
sort: bool,
|
220
|
+
parallel: bool,
|
221
|
+
name: String,
|
222
|
+
normalize: bool,
|
223
|
+
) -> RbResult<RbDataFrame> {
|
224
|
+
let out = self
|
225
|
+
.series
|
226
|
+
.borrow()
|
227
|
+
.value_counts(sort, parallel, name.into(), normalize)
|
228
|
+
.map_err(RbPolarsErr::from)?;
|
229
|
+
Ok(out.into())
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
233
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
234
|
+
self.series.borrow().slice(offset, length).into()
|
235
|
+
}
|
236
|
+
|
237
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
238
|
+
let binding = indices.series.borrow();
|
239
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
240
|
+
let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
241
|
+
Ok(RbSeries::new(take))
|
242
|
+
}
|
243
|
+
|
244
|
+
pub fn null_count(&self) -> RbResult<usize> {
|
245
|
+
Ok(self.series.borrow().null_count())
|
246
|
+
}
|
247
|
+
|
248
|
+
pub fn has_nulls(&self) -> bool {
|
249
|
+
self.series.borrow().has_nulls()
|
250
|
+
}
|
251
|
+
|
252
|
+
pub fn sample_n(
|
253
|
+
&self,
|
254
|
+
n: usize,
|
255
|
+
with_replacement: bool,
|
256
|
+
shuffle: bool,
|
257
|
+
seed: Option<u64>,
|
258
|
+
) -> RbResult<Self> {
|
259
|
+
let s = self
|
260
|
+
.series
|
261
|
+
.borrow()
|
262
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
263
|
+
.map_err(RbPolarsErr::from)?;
|
264
|
+
Ok(s.into())
|
265
|
+
}
|
266
|
+
|
267
|
+
pub fn sample_frac(
|
268
|
+
&self,
|
269
|
+
frac: f64,
|
270
|
+
with_replacement: bool,
|
271
|
+
shuffle: bool,
|
272
|
+
seed: Option<u64>,
|
273
|
+
) -> RbResult<Self> {
|
274
|
+
let s = self
|
275
|
+
.series
|
276
|
+
.borrow()
|
277
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
278
|
+
.map_err(RbPolarsErr::from)?;
|
279
|
+
Ok(s.into())
|
280
|
+
}
|
281
|
+
|
282
|
+
pub fn equals(
|
283
|
+
&self,
|
284
|
+
other: &RbSeries,
|
285
|
+
check_dtypes: bool,
|
286
|
+
check_names: bool,
|
287
|
+
null_equal: bool,
|
288
|
+
) -> bool {
|
289
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
290
|
+
return false;
|
291
|
+
}
|
292
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
293
|
+
return false;
|
294
|
+
}
|
295
|
+
if null_equal {
|
296
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
297
|
+
} else {
|
298
|
+
self.series.borrow().equals(&other.series.borrow())
|
299
|
+
}
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn not(&self) -> RbResult<Self> {
|
303
|
+
let binding = self.series.borrow();
|
304
|
+
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
305
|
+
Ok((!bool).into_series().into())
|
306
|
+
}
|
307
|
+
|
308
|
+
pub fn to_s(&self) -> String {
|
309
|
+
format!("{}", self.series.borrow())
|
310
|
+
}
|
311
|
+
|
312
|
+
pub fn len(&self) -> usize {
|
313
|
+
self.series.borrow().len()
|
314
|
+
}
|
315
|
+
|
316
|
+
pub fn clone(&self) -> Self {
|
317
|
+
RbSeries::new(self.series.borrow().clone())
|
318
|
+
}
|
319
|
+
|
320
|
+
pub fn apply_lambda(
|
321
|
+
&self,
|
322
|
+
lambda: Value,
|
323
|
+
output_type: Option<Wrap<DataType>>,
|
324
|
+
skip_nulls: bool,
|
325
|
+
) -> RbResult<Self> {
|
326
|
+
let series = &self.series.borrow();
|
327
|
+
|
328
|
+
let output_type = output_type.map(|dt| dt.0);
|
329
|
+
|
330
|
+
macro_rules! dispatch_apply {
|
331
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
332
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
333
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
334
|
+
// ca.$method($($args),*)
|
335
|
+
todo!()
|
336
|
+
} else {
|
337
|
+
apply_method_all_arrow_series2!(
|
338
|
+
$self,
|
339
|
+
$method,
|
340
|
+
$($args),*
|
341
|
+
)
|
342
|
+
}
|
343
|
+
|
344
|
+
}
|
345
|
+
|
346
|
+
}
|
347
|
+
|
348
|
+
if matches!(
|
349
|
+
series.dtype(),
|
350
|
+
DataType::Datetime(_, _)
|
351
|
+
| DataType::Date
|
352
|
+
| DataType::Duration(_)
|
353
|
+
| DataType::Categorical(_, _)
|
354
|
+
| DataType::Time
|
355
|
+
) || !skip_nulls
|
356
|
+
{
|
357
|
+
let mut avs = Vec::with_capacity(series.len());
|
358
|
+
let iter = series.iter().map(|av| {
|
359
|
+
let input = Wrap(av);
|
360
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
361
|
+
.unwrap()
|
362
|
+
.0
|
363
|
+
});
|
364
|
+
avs.extend(iter);
|
365
|
+
return Ok(Series::new(self.name().into(), &avs).into());
|
366
|
+
}
|
367
|
+
|
368
|
+
let out = match output_type {
|
369
|
+
Some(DataType::Int8) => {
|
370
|
+
let ca: Int8Chunked = dispatch_apply!(
|
371
|
+
series,
|
372
|
+
apply_lambda_with_primitive_out_type,
|
373
|
+
lambda,
|
374
|
+
0,
|
375
|
+
None
|
376
|
+
)?;
|
377
|
+
ca.into_series()
|
378
|
+
}
|
379
|
+
Some(DataType::Int16) => {
|
380
|
+
let ca: Int16Chunked = dispatch_apply!(
|
381
|
+
series,
|
382
|
+
apply_lambda_with_primitive_out_type,
|
383
|
+
lambda,
|
384
|
+
0,
|
385
|
+
None
|
386
|
+
)?;
|
387
|
+
ca.into_series()
|
388
|
+
}
|
389
|
+
Some(DataType::Int32) => {
|
390
|
+
let ca: Int32Chunked = dispatch_apply!(
|
391
|
+
series,
|
392
|
+
apply_lambda_with_primitive_out_type,
|
393
|
+
lambda,
|
394
|
+
0,
|
395
|
+
None
|
396
|
+
)?;
|
397
|
+
ca.into_series()
|
398
|
+
}
|
399
|
+
Some(DataType::Int64) => {
|
400
|
+
let ca: Int64Chunked = dispatch_apply!(
|
401
|
+
series,
|
402
|
+
apply_lambda_with_primitive_out_type,
|
403
|
+
lambda,
|
404
|
+
0,
|
405
|
+
None
|
406
|
+
)?;
|
407
|
+
ca.into_series()
|
408
|
+
}
|
409
|
+
Some(DataType::UInt8) => {
|
410
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
411
|
+
series,
|
412
|
+
apply_lambda_with_primitive_out_type,
|
413
|
+
lambda,
|
414
|
+
0,
|
415
|
+
None
|
416
|
+
)?;
|
417
|
+
ca.into_series()
|
418
|
+
}
|
419
|
+
Some(DataType::UInt16) => {
|
420
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
421
|
+
series,
|
422
|
+
apply_lambda_with_primitive_out_type,
|
423
|
+
lambda,
|
424
|
+
0,
|
425
|
+
None
|
426
|
+
)?;
|
427
|
+
ca.into_series()
|
428
|
+
}
|
429
|
+
Some(DataType::UInt32) => {
|
430
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
431
|
+
series,
|
432
|
+
apply_lambda_with_primitive_out_type,
|
433
|
+
lambda,
|
434
|
+
0,
|
435
|
+
None
|
436
|
+
)?;
|
437
|
+
ca.into_series()
|
438
|
+
}
|
439
|
+
Some(DataType::UInt64) => {
|
440
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
441
|
+
series,
|
442
|
+
apply_lambda_with_primitive_out_type,
|
443
|
+
lambda,
|
444
|
+
0,
|
445
|
+
None
|
446
|
+
)?;
|
447
|
+
ca.into_series()
|
448
|
+
}
|
449
|
+
Some(DataType::Float32) => {
|
450
|
+
let ca: Float32Chunked = dispatch_apply!(
|
451
|
+
series,
|
452
|
+
apply_lambda_with_primitive_out_type,
|
453
|
+
lambda,
|
454
|
+
0,
|
455
|
+
None
|
456
|
+
)?;
|
457
|
+
ca.into_series()
|
458
|
+
}
|
459
|
+
Some(DataType::Float64) => {
|
460
|
+
let ca: Float64Chunked = dispatch_apply!(
|
461
|
+
series,
|
462
|
+
apply_lambda_with_primitive_out_type,
|
463
|
+
lambda,
|
464
|
+
0,
|
465
|
+
None
|
466
|
+
)?;
|
467
|
+
ca.into_series()
|
468
|
+
}
|
469
|
+
Some(DataType::Boolean) => {
|
470
|
+
let ca: BooleanChunked =
|
471
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
472
|
+
ca.into_series()
|
473
|
+
}
|
474
|
+
Some(DataType::Date) => {
|
475
|
+
let ca: Int32Chunked = dispatch_apply!(
|
476
|
+
series,
|
477
|
+
apply_lambda_with_primitive_out_type,
|
478
|
+
lambda,
|
479
|
+
0,
|
480
|
+
None
|
481
|
+
)?;
|
482
|
+
ca.into_date().into_series()
|
483
|
+
}
|
484
|
+
Some(DataType::Datetime(tu, tz)) => {
|
485
|
+
let ca: Int64Chunked = dispatch_apply!(
|
486
|
+
series,
|
487
|
+
apply_lambda_with_primitive_out_type,
|
488
|
+
lambda,
|
489
|
+
0,
|
490
|
+
None
|
491
|
+
)?;
|
492
|
+
ca.into_datetime(tu, tz).into_series()
|
493
|
+
}
|
494
|
+
Some(DataType::String) => {
|
495
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
496
|
+
|
497
|
+
ca.into_series()
|
498
|
+
}
|
499
|
+
Some(DataType::Object(_, _)) => {
|
500
|
+
let ca =
|
501
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
502
|
+
ca.into_series()
|
503
|
+
}
|
504
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
505
|
+
|
506
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
507
|
+
};
|
508
|
+
|
509
|
+
Ok(RbSeries::new(out))
|
510
|
+
}
|
511
|
+
|
512
|
+
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
513
|
+
let binding = mask.series.borrow();
|
514
|
+
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
515
|
+
let s = self
|
516
|
+
.series
|
517
|
+
.borrow()
|
518
|
+
.zip_with(mask, &other.series.borrow())
|
519
|
+
.map_err(RbPolarsErr::from)?;
|
520
|
+
Ok(RbSeries::new(s))
|
521
|
+
}
|
522
|
+
|
523
|
+
pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
|
524
|
+
let df = self
|
525
|
+
.series
|
526
|
+
.borrow()
|
527
|
+
.to_dummies(sep.as_deref(), drop_first)
|
528
|
+
.map_err(RbPolarsErr::from)?;
|
529
|
+
Ok(df.into())
|
530
|
+
}
|
531
|
+
|
532
|
+
pub fn n_unique(&self) -> RbResult<usize> {
|
533
|
+
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
534
|
+
Ok(n)
|
535
|
+
}
|
536
|
+
|
537
|
+
pub fn floor(&self) -> RbResult<Self> {
|
538
|
+
let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(s.into())
|
540
|
+
}
|
541
|
+
|
542
|
+
pub fn shrink_to_fit(&self) {
|
543
|
+
self.series.borrow_mut().shrink_to_fit();
|
544
|
+
}
|
545
|
+
|
546
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
547
|
+
let out = self
|
548
|
+
.series
|
549
|
+
.borrow()
|
550
|
+
.dot(&other.series.borrow())
|
551
|
+
.map_err(RbPolarsErr::from)?;
|
552
|
+
Ok(out)
|
553
|
+
}
|
554
|
+
|
555
|
+
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
556
|
+
let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
|
557
|
+
Ok(out)
|
558
|
+
}
|
559
|
+
|
560
|
+
pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
|
561
|
+
let out = self
|
562
|
+
.series
|
563
|
+
.borrow()
|
564
|
+
.kurtosis(fisher, bias)
|
565
|
+
.map_err(RbPolarsErr::from)?;
|
566
|
+
Ok(out)
|
567
|
+
}
|
568
|
+
|
569
|
+
pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
|
570
|
+
let dtype = dtype.0;
|
571
|
+
let out = if strict {
|
572
|
+
self.series.borrow().strict_cast(&dtype)
|
573
|
+
} else {
|
574
|
+
self.series.borrow().cast(&dtype)
|
575
|
+
};
|
576
|
+
let out = out.map_err(RbPolarsErr::from)?;
|
577
|
+
Ok(out.into())
|
578
|
+
}
|
579
|
+
|
580
|
+
pub fn time_unit(&self) -> Option<String> {
|
581
|
+
if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
|
582
|
+
Some(
|
583
|
+
match tu {
|
584
|
+
TimeUnit::Nanoseconds => "ns",
|
585
|
+
TimeUnit::Microseconds => "us",
|
586
|
+
TimeUnit::Milliseconds => "ms",
|
587
|
+
}
|
588
|
+
.to_string(),
|
589
|
+
)
|
590
|
+
} else {
|
591
|
+
None
|
592
|
+
}
|
593
|
+
}
|
594
|
+
}
|
595
|
+
|
596
|
+
macro_rules! impl_set_with_mask {
|
597
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
598
|
+
fn $name(
|
599
|
+
series: &Series,
|
600
|
+
filter: &RbSeries,
|
601
|
+
value: Option<$native>,
|
602
|
+
) -> PolarsResult<Series> {
|
603
|
+
let binding = filter.series.borrow();
|
604
|
+
let mask = binding.bool()?;
|
605
|
+
let ca = series.$cast()?;
|
606
|
+
let new = ca.set(mask, value)?;
|
607
|
+
Ok(new.into_series())
|
608
|
+
}
|
609
|
+
|
610
|
+
impl RbSeries {
|
611
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
612
|
+
let series =
|
613
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
614
|
+
Ok(Self::new(series))
|
615
|
+
}
|
616
|
+
}
|
617
|
+
};
|
618
|
+
}
|
619
|
+
|
620
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
621
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
622
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
623
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
624
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
625
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
626
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
627
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
628
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
629
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
630
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
631
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
632
|
+
|
633
|
+
impl RbSeries {
|
634
|
+
pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
|
635
|
+
Ok(self
|
636
|
+
.series
|
637
|
+
.borrow()
|
638
|
+
.clone()
|
639
|
+
.extend_constant(value.0, n)
|
640
|
+
.map_err(RbPolarsErr::from)?
|
641
|
+
.into())
|
642
|
+
}
|
643
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
use magnus::prelude::*;
|
2
|
+
use magnus::Value;
|
3
|
+
use polars::export::arrow::array::Array;
|
4
|
+
use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
5
|
+
use polars::prelude::*;
|
6
|
+
|
7
|
+
use super::RbSeries;
|
8
|
+
|
9
|
+
use crate::exceptions::RbValueError;
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
/// Import `arrow_c_stream` across Ruby boundary.
|
13
|
+
fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
|
14
|
+
let capsule = ob.funcall("arrow_c_stream", ())?;
|
15
|
+
Ok(capsule)
|
16
|
+
}
|
17
|
+
|
18
|
+
pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
|
19
|
+
let capsule_pointer: usize = capsule.funcall("to_i", ())?;
|
20
|
+
|
21
|
+
// # Safety
|
22
|
+
// capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
|
23
|
+
// Interface
|
24
|
+
let mut stream = unsafe {
|
25
|
+
// Takes ownership of the pointed to ArrowArrayStream
|
26
|
+
// This acts to move the data out of the capsule pointer, setting the release callback to NULL
|
27
|
+
let stream_ptr = Box::new(std::ptr::replace(
|
28
|
+
capsule_pointer as _,
|
29
|
+
ArrowArrayStream::empty(),
|
30
|
+
));
|
31
|
+
ArrowArrayStreamReader::try_new(stream_ptr)
|
32
|
+
.map_err(|err| RbValueError::new_err(err.to_string()))?
|
33
|
+
};
|
34
|
+
|
35
|
+
let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
|
36
|
+
while let Some(array) = unsafe { stream.next() } {
|
37
|
+
produced_arrays.push(array.unwrap());
|
38
|
+
}
|
39
|
+
|
40
|
+
// Series::try_from fails for an empty vec of chunks
|
41
|
+
let s = if produced_arrays.is_empty() {
|
42
|
+
let polars_dt = DataType::from_arrow(stream.field().dtype(), false);
|
43
|
+
Series::new_empty(stream.field().name.clone(), &polars_dt)
|
44
|
+
} else {
|
45
|
+
Series::try_from((stream.field(), produced_arrays)).unwrap()
|
46
|
+
};
|
47
|
+
Ok(RbSeries::new(s))
|
48
|
+
}
|
49
|
+
|
50
|
+
impl RbSeries {
|
51
|
+
pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
|
52
|
+
let capsule = call_arrow_c_stream(ob)?;
|
53
|
+
import_stream_rbcapsule(capsule)
|
54
|
+
}
|
55
|
+
}
|