polars-df 0.14.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/Cargo.lock +1296 -283
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +125 -28
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +16 -11
- data/ext/polars/src/dataframe/io.rs +73 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +13 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +45 -14
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +275 -52
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,643 @@
|
|
1
|
+
use magnus::{exception, Error, IntoValue, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars::series::IsSorted;
|
4
|
+
|
5
|
+
use crate::apply_method_all_arrow_series2;
|
6
|
+
use crate::conversion::*;
|
7
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
8
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
|
9
|
+
|
10
|
+
impl RbSeries {
|
11
|
+
pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
|
12
|
+
let binding = self.series.borrow();
|
13
|
+
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
14
|
+
let df: DataFrame = ca.clone().unnest();
|
15
|
+
Ok(df.into())
|
16
|
+
}
|
17
|
+
|
18
|
+
// TODO add to Ruby
|
19
|
+
pub fn struct_fields(&self) -> RbResult<Vec<String>> {
|
20
|
+
let binding = self.series.borrow();
|
21
|
+
let ca = binding.struct_().map_err(RbPolarsErr::from)?;
|
22
|
+
Ok(ca
|
23
|
+
.struct_fields()
|
24
|
+
.iter()
|
25
|
+
.map(|s| s.name().to_string())
|
26
|
+
.collect())
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn is_sorted_ascending_flag(&self) -> bool {
|
30
|
+
matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
|
31
|
+
}
|
32
|
+
|
33
|
+
pub fn is_sorted_descending_flag(&self) -> bool {
|
34
|
+
matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn can_fast_explode_flag(&self) -> bool {
|
38
|
+
match self.series.borrow().list() {
|
39
|
+
Err(_) => false,
|
40
|
+
Ok(list) => list._can_fast_explode(),
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
|
45
|
+
let binding = self.series.borrow();
|
46
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
47
|
+
Ok(ca.uses_lexical_ordering())
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn cat_is_local(&self) -> RbResult<bool> {
|
51
|
+
let binding = self.series.borrow();
|
52
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
53
|
+
Ok(ca.get_rev_map().is_local())
|
54
|
+
}
|
55
|
+
|
56
|
+
pub fn cat_to_local(&self) -> RbResult<Self> {
|
57
|
+
let binding = self.series.borrow();
|
58
|
+
let ca = binding.categorical().map_err(RbPolarsErr::from)?;
|
59
|
+
Ok(ca.to_local().into_series().into())
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn estimated_size(&self) -> usize {
|
63
|
+
self.series.borrow().estimated_size()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
67
|
+
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
68
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
69
|
+
let v_trunc = &val[..val
|
70
|
+
.char_indices()
|
71
|
+
.take(str_lengths)
|
72
|
+
.last()
|
73
|
+
.map(|(i, c)| i + c.len_utf8())
|
74
|
+
.unwrap_or(0)];
|
75
|
+
if val == v_trunc {
|
76
|
+
val
|
77
|
+
} else {
|
78
|
+
format!("{}…", v_trunc)
|
79
|
+
}
|
80
|
+
} else {
|
81
|
+
val
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
pub fn rechunk(&self, in_place: bool) -> Option<Self> {
|
86
|
+
let series = self.series.borrow_mut().rechunk();
|
87
|
+
if in_place {
|
88
|
+
*self.series.borrow_mut() = series;
|
89
|
+
None
|
90
|
+
} else {
|
91
|
+
Some(series.into())
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
|
96
|
+
Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
|
97
|
+
}
|
98
|
+
|
99
|
+
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
100
|
+
let out = self
|
101
|
+
.series
|
102
|
+
.borrow()
|
103
|
+
.bitand(&other.series.borrow())
|
104
|
+
.map_err(RbPolarsErr::from)?;
|
105
|
+
Ok(out.into())
|
106
|
+
}
|
107
|
+
|
108
|
+
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
109
|
+
let out = self
|
110
|
+
.series
|
111
|
+
.borrow()
|
112
|
+
.bitor(&other.series.borrow())
|
113
|
+
.map_err(RbPolarsErr::from)?;
|
114
|
+
Ok(out.into())
|
115
|
+
}
|
116
|
+
|
117
|
+
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
118
|
+
let out = self
|
119
|
+
.series
|
120
|
+
.borrow()
|
121
|
+
.bitxor(&other.series.borrow())
|
122
|
+
.map_err(RbPolarsErr::from)?;
|
123
|
+
Ok(out.into())
|
124
|
+
}
|
125
|
+
|
126
|
+
pub fn chunk_lengths(&self) -> Vec<usize> {
|
127
|
+
self.series.borrow().chunk_lengths().collect()
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn name(&self) -> String {
|
131
|
+
self.series.borrow().name().to_string()
|
132
|
+
}
|
133
|
+
|
134
|
+
pub fn rename(&self, name: String) {
|
135
|
+
self.series.borrow_mut().rename(name.into());
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn dtype(&self) -> Value {
|
139
|
+
Wrap(self.series.borrow().dtype().clone()).into_value()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn inner_dtype(&self) -> Option<Value> {
|
143
|
+
self.series
|
144
|
+
.borrow()
|
145
|
+
.dtype()
|
146
|
+
.inner_dtype()
|
147
|
+
.map(|dt| Wrap(dt.clone()).into_value())
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn set_sorted_flag(&self, descending: bool) -> Self {
|
151
|
+
let mut out = self.series.borrow().clone();
|
152
|
+
if descending {
|
153
|
+
out.set_sorted_flag(IsSorted::Descending);
|
154
|
+
} else {
|
155
|
+
out.set_sorted_flag(IsSorted::Ascending)
|
156
|
+
}
|
157
|
+
out.into()
|
158
|
+
}
|
159
|
+
|
160
|
+
pub fn n_chunks(&self) -> usize {
|
161
|
+
self.series.borrow().n_chunks()
|
162
|
+
}
|
163
|
+
|
164
|
+
pub fn append(&self, other: &RbSeries) -> RbResult<()> {
|
165
|
+
let mut binding = self.series.borrow_mut();
|
166
|
+
let res = binding.append(&other.series.borrow());
|
167
|
+
if let Err(e) = res {
|
168
|
+
Err(Error::new(exception::runtime_error(), e.to_string()))
|
169
|
+
} else {
|
170
|
+
Ok(())
|
171
|
+
}
|
172
|
+
}
|
173
|
+
|
174
|
+
pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
|
175
|
+
self.series
|
176
|
+
.borrow_mut()
|
177
|
+
.extend(&other.series.borrow())
|
178
|
+
.map_err(RbPolarsErr::from)?;
|
179
|
+
Ok(())
|
180
|
+
}
|
181
|
+
|
182
|
+
pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
|
183
|
+
if index >= self.series.borrow().len() {
|
184
|
+
Err(Error::new(exception::arg_error(), "index is out of bounds"))
|
185
|
+
} else {
|
186
|
+
Ok(self.series.borrow().new_from_index(index, length).into())
|
187
|
+
}
|
188
|
+
}
|
189
|
+
|
190
|
+
pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
|
191
|
+
let filter_series = &filter.series.borrow();
|
192
|
+
if let Ok(ca) = filter_series.bool() {
|
193
|
+
let series = self.series.borrow().filter(ca).unwrap();
|
194
|
+
Ok(series.into())
|
195
|
+
} else {
|
196
|
+
Err(Error::new(
|
197
|
+
exception::runtime_error(),
|
198
|
+
"Expected a boolean mask".to_string(),
|
199
|
+
))
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
|
204
|
+
Ok(self
|
205
|
+
.series
|
206
|
+
.borrow_mut()
|
207
|
+
.sort(
|
208
|
+
SortOptions::default()
|
209
|
+
.with_order_descending(descending)
|
210
|
+
.with_nulls_last(nulls_last)
|
211
|
+
.with_multithreaded(multithreaded),
|
212
|
+
)
|
213
|
+
.map_err(RbPolarsErr::from)?
|
214
|
+
.into())
|
215
|
+
}
|
216
|
+
|
217
|
+
pub fn value_counts(
|
218
|
+
&self,
|
219
|
+
sort: bool,
|
220
|
+
parallel: bool,
|
221
|
+
name: String,
|
222
|
+
normalize: bool,
|
223
|
+
) -> RbResult<RbDataFrame> {
|
224
|
+
let out = self
|
225
|
+
.series
|
226
|
+
.borrow()
|
227
|
+
.value_counts(sort, parallel, name.into(), normalize)
|
228
|
+
.map_err(RbPolarsErr::from)?;
|
229
|
+
Ok(out.into())
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
|
233
|
+
let length = length.unwrap_or_else(|| self.series.borrow().len());
|
234
|
+
self.series.borrow().slice(offset, length).into()
|
235
|
+
}
|
236
|
+
|
237
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
238
|
+
let binding = indices.series.borrow();
|
239
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
240
|
+
let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
241
|
+
Ok(RbSeries::new(take))
|
242
|
+
}
|
243
|
+
|
244
|
+
pub fn null_count(&self) -> RbResult<usize> {
|
245
|
+
Ok(self.series.borrow().null_count())
|
246
|
+
}
|
247
|
+
|
248
|
+
pub fn has_nulls(&self) -> bool {
|
249
|
+
self.series.borrow().has_nulls()
|
250
|
+
}
|
251
|
+
|
252
|
+
pub fn sample_n(
|
253
|
+
&self,
|
254
|
+
n: usize,
|
255
|
+
with_replacement: bool,
|
256
|
+
shuffle: bool,
|
257
|
+
seed: Option<u64>,
|
258
|
+
) -> RbResult<Self> {
|
259
|
+
let s = self
|
260
|
+
.series
|
261
|
+
.borrow()
|
262
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
263
|
+
.map_err(RbPolarsErr::from)?;
|
264
|
+
Ok(s.into())
|
265
|
+
}
|
266
|
+
|
267
|
+
pub fn sample_frac(
|
268
|
+
&self,
|
269
|
+
frac: f64,
|
270
|
+
with_replacement: bool,
|
271
|
+
shuffle: bool,
|
272
|
+
seed: Option<u64>,
|
273
|
+
) -> RbResult<Self> {
|
274
|
+
let s = self
|
275
|
+
.series
|
276
|
+
.borrow()
|
277
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
278
|
+
.map_err(RbPolarsErr::from)?;
|
279
|
+
Ok(s.into())
|
280
|
+
}
|
281
|
+
|
282
|
+
pub fn equals(
|
283
|
+
&self,
|
284
|
+
other: &RbSeries,
|
285
|
+
check_dtypes: bool,
|
286
|
+
check_names: bool,
|
287
|
+
null_equal: bool,
|
288
|
+
) -> bool {
|
289
|
+
if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
|
290
|
+
return false;
|
291
|
+
}
|
292
|
+
if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
|
293
|
+
return false;
|
294
|
+
}
|
295
|
+
if null_equal {
|
296
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
297
|
+
} else {
|
298
|
+
self.series.borrow().equals(&other.series.borrow())
|
299
|
+
}
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn not(&self) -> RbResult<Self> {
|
303
|
+
let binding = self.series.borrow();
|
304
|
+
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
305
|
+
Ok((!bool).into_series().into())
|
306
|
+
}
|
307
|
+
|
308
|
+
pub fn to_s(&self) -> String {
|
309
|
+
format!("{}", self.series.borrow())
|
310
|
+
}
|
311
|
+
|
312
|
+
pub fn len(&self) -> usize {
|
313
|
+
self.series.borrow().len()
|
314
|
+
}
|
315
|
+
|
316
|
+
pub fn clone(&self) -> Self {
|
317
|
+
RbSeries::new(self.series.borrow().clone())
|
318
|
+
}
|
319
|
+
|
320
|
+
pub fn apply_lambda(
|
321
|
+
&self,
|
322
|
+
lambda: Value,
|
323
|
+
output_type: Option<Wrap<DataType>>,
|
324
|
+
skip_nulls: bool,
|
325
|
+
) -> RbResult<Self> {
|
326
|
+
let series = &self.series.borrow();
|
327
|
+
|
328
|
+
let output_type = output_type.map(|dt| dt.0);
|
329
|
+
|
330
|
+
macro_rules! dispatch_apply {
|
331
|
+
($self:expr, $method:ident, $($args:expr),*) => {
|
332
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
333
|
+
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
334
|
+
// ca.$method($($args),*)
|
335
|
+
todo!()
|
336
|
+
} else {
|
337
|
+
apply_method_all_arrow_series2!(
|
338
|
+
$self,
|
339
|
+
$method,
|
340
|
+
$($args),*
|
341
|
+
)
|
342
|
+
}
|
343
|
+
|
344
|
+
}
|
345
|
+
|
346
|
+
}
|
347
|
+
|
348
|
+
if matches!(
|
349
|
+
series.dtype(),
|
350
|
+
DataType::Datetime(_, _)
|
351
|
+
| DataType::Date
|
352
|
+
| DataType::Duration(_)
|
353
|
+
| DataType::Categorical(_, _)
|
354
|
+
| DataType::Time
|
355
|
+
) || !skip_nulls
|
356
|
+
{
|
357
|
+
let mut avs = Vec::with_capacity(series.len());
|
358
|
+
let iter = series.iter().map(|av| {
|
359
|
+
let input = Wrap(av);
|
360
|
+
call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
|
361
|
+
.unwrap()
|
362
|
+
.0
|
363
|
+
});
|
364
|
+
avs.extend(iter);
|
365
|
+
return Ok(Series::new(self.name().into(), &avs).into());
|
366
|
+
}
|
367
|
+
|
368
|
+
let out = match output_type {
|
369
|
+
Some(DataType::Int8) => {
|
370
|
+
let ca: Int8Chunked = dispatch_apply!(
|
371
|
+
series,
|
372
|
+
apply_lambda_with_primitive_out_type,
|
373
|
+
lambda,
|
374
|
+
0,
|
375
|
+
None
|
376
|
+
)?;
|
377
|
+
ca.into_series()
|
378
|
+
}
|
379
|
+
Some(DataType::Int16) => {
|
380
|
+
let ca: Int16Chunked = dispatch_apply!(
|
381
|
+
series,
|
382
|
+
apply_lambda_with_primitive_out_type,
|
383
|
+
lambda,
|
384
|
+
0,
|
385
|
+
None
|
386
|
+
)?;
|
387
|
+
ca.into_series()
|
388
|
+
}
|
389
|
+
Some(DataType::Int32) => {
|
390
|
+
let ca: Int32Chunked = dispatch_apply!(
|
391
|
+
series,
|
392
|
+
apply_lambda_with_primitive_out_type,
|
393
|
+
lambda,
|
394
|
+
0,
|
395
|
+
None
|
396
|
+
)?;
|
397
|
+
ca.into_series()
|
398
|
+
}
|
399
|
+
Some(DataType::Int64) => {
|
400
|
+
let ca: Int64Chunked = dispatch_apply!(
|
401
|
+
series,
|
402
|
+
apply_lambda_with_primitive_out_type,
|
403
|
+
lambda,
|
404
|
+
0,
|
405
|
+
None
|
406
|
+
)?;
|
407
|
+
ca.into_series()
|
408
|
+
}
|
409
|
+
Some(DataType::UInt8) => {
|
410
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
411
|
+
series,
|
412
|
+
apply_lambda_with_primitive_out_type,
|
413
|
+
lambda,
|
414
|
+
0,
|
415
|
+
None
|
416
|
+
)?;
|
417
|
+
ca.into_series()
|
418
|
+
}
|
419
|
+
Some(DataType::UInt16) => {
|
420
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
421
|
+
series,
|
422
|
+
apply_lambda_with_primitive_out_type,
|
423
|
+
lambda,
|
424
|
+
0,
|
425
|
+
None
|
426
|
+
)?;
|
427
|
+
ca.into_series()
|
428
|
+
}
|
429
|
+
Some(DataType::UInt32) => {
|
430
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
431
|
+
series,
|
432
|
+
apply_lambda_with_primitive_out_type,
|
433
|
+
lambda,
|
434
|
+
0,
|
435
|
+
None
|
436
|
+
)?;
|
437
|
+
ca.into_series()
|
438
|
+
}
|
439
|
+
Some(DataType::UInt64) => {
|
440
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
441
|
+
series,
|
442
|
+
apply_lambda_with_primitive_out_type,
|
443
|
+
lambda,
|
444
|
+
0,
|
445
|
+
None
|
446
|
+
)?;
|
447
|
+
ca.into_series()
|
448
|
+
}
|
449
|
+
Some(DataType::Float32) => {
|
450
|
+
let ca: Float32Chunked = dispatch_apply!(
|
451
|
+
series,
|
452
|
+
apply_lambda_with_primitive_out_type,
|
453
|
+
lambda,
|
454
|
+
0,
|
455
|
+
None
|
456
|
+
)?;
|
457
|
+
ca.into_series()
|
458
|
+
}
|
459
|
+
Some(DataType::Float64) => {
|
460
|
+
let ca: Float64Chunked = dispatch_apply!(
|
461
|
+
series,
|
462
|
+
apply_lambda_with_primitive_out_type,
|
463
|
+
lambda,
|
464
|
+
0,
|
465
|
+
None
|
466
|
+
)?;
|
467
|
+
ca.into_series()
|
468
|
+
}
|
469
|
+
Some(DataType::Boolean) => {
|
470
|
+
let ca: BooleanChunked =
|
471
|
+
dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
|
472
|
+
ca.into_series()
|
473
|
+
}
|
474
|
+
Some(DataType::Date) => {
|
475
|
+
let ca: Int32Chunked = dispatch_apply!(
|
476
|
+
series,
|
477
|
+
apply_lambda_with_primitive_out_type,
|
478
|
+
lambda,
|
479
|
+
0,
|
480
|
+
None
|
481
|
+
)?;
|
482
|
+
ca.into_date().into_series()
|
483
|
+
}
|
484
|
+
Some(DataType::Datetime(tu, tz)) => {
|
485
|
+
let ca: Int64Chunked = dispatch_apply!(
|
486
|
+
series,
|
487
|
+
apply_lambda_with_primitive_out_type,
|
488
|
+
lambda,
|
489
|
+
0,
|
490
|
+
None
|
491
|
+
)?;
|
492
|
+
ca.into_datetime(tu, tz).into_series()
|
493
|
+
}
|
494
|
+
Some(DataType::String) => {
|
495
|
+
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
496
|
+
|
497
|
+
ca.into_series()
|
498
|
+
}
|
499
|
+
Some(DataType::Object(_, _)) => {
|
500
|
+
let ca =
|
501
|
+
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
502
|
+
ca.into_series()
|
503
|
+
}
|
504
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
505
|
+
|
506
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
|
507
|
+
};
|
508
|
+
|
509
|
+
Ok(RbSeries::new(out))
|
510
|
+
}
|
511
|
+
|
512
|
+
pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
|
513
|
+
let binding = mask.series.borrow();
|
514
|
+
let mask = binding.bool().map_err(RbPolarsErr::from)?;
|
515
|
+
let s = self
|
516
|
+
.series
|
517
|
+
.borrow()
|
518
|
+
.zip_with(mask, &other.series.borrow())
|
519
|
+
.map_err(RbPolarsErr::from)?;
|
520
|
+
Ok(RbSeries::new(s))
|
521
|
+
}
|
522
|
+
|
523
|
+
pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
|
524
|
+
let df = self
|
525
|
+
.series
|
526
|
+
.borrow()
|
527
|
+
.to_dummies(sep.as_deref(), drop_first)
|
528
|
+
.map_err(RbPolarsErr::from)?;
|
529
|
+
Ok(df.into())
|
530
|
+
}
|
531
|
+
|
532
|
+
pub fn n_unique(&self) -> RbResult<usize> {
|
533
|
+
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
534
|
+
Ok(n)
|
535
|
+
}
|
536
|
+
|
537
|
+
pub fn floor(&self) -> RbResult<Self> {
|
538
|
+
let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(s.into())
|
540
|
+
}
|
541
|
+
|
542
|
+
pub fn shrink_to_fit(&self) {
|
543
|
+
self.series.borrow_mut().shrink_to_fit();
|
544
|
+
}
|
545
|
+
|
546
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
547
|
+
let out = self
|
548
|
+
.series
|
549
|
+
.borrow()
|
550
|
+
.dot(&other.series.borrow())
|
551
|
+
.map_err(RbPolarsErr::from)?;
|
552
|
+
Ok(out)
|
553
|
+
}
|
554
|
+
|
555
|
+
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
556
|
+
let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
|
557
|
+
Ok(out)
|
558
|
+
}
|
559
|
+
|
560
|
+
pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
|
561
|
+
let out = self
|
562
|
+
.series
|
563
|
+
.borrow()
|
564
|
+
.kurtosis(fisher, bias)
|
565
|
+
.map_err(RbPolarsErr::from)?;
|
566
|
+
Ok(out)
|
567
|
+
}
|
568
|
+
|
569
|
+
pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
|
570
|
+
let dtype = dtype.0;
|
571
|
+
let out = if strict {
|
572
|
+
self.series.borrow().strict_cast(&dtype)
|
573
|
+
} else {
|
574
|
+
self.series.borrow().cast(&dtype)
|
575
|
+
};
|
576
|
+
let out = out.map_err(RbPolarsErr::from)?;
|
577
|
+
Ok(out.into())
|
578
|
+
}
|
579
|
+
|
580
|
+
pub fn time_unit(&self) -> Option<String> {
|
581
|
+
if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
|
582
|
+
Some(
|
583
|
+
match tu {
|
584
|
+
TimeUnit::Nanoseconds => "ns",
|
585
|
+
TimeUnit::Microseconds => "us",
|
586
|
+
TimeUnit::Milliseconds => "ms",
|
587
|
+
}
|
588
|
+
.to_string(),
|
589
|
+
)
|
590
|
+
} else {
|
591
|
+
None
|
592
|
+
}
|
593
|
+
}
|
594
|
+
}
|
595
|
+
|
596
|
+
macro_rules! impl_set_with_mask {
|
597
|
+
($name:ident, $native:ty, $cast:ident, $variant:ident) => {
|
598
|
+
fn $name(
|
599
|
+
series: &Series,
|
600
|
+
filter: &RbSeries,
|
601
|
+
value: Option<$native>,
|
602
|
+
) -> PolarsResult<Series> {
|
603
|
+
let binding = filter.series.borrow();
|
604
|
+
let mask = binding.bool()?;
|
605
|
+
let ca = series.$cast()?;
|
606
|
+
let new = ca.set(mask, value)?;
|
607
|
+
Ok(new.into_series())
|
608
|
+
}
|
609
|
+
|
610
|
+
impl RbSeries {
|
611
|
+
pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
|
612
|
+
let series =
|
613
|
+
$name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
|
614
|
+
Ok(Self::new(series))
|
615
|
+
}
|
616
|
+
}
|
617
|
+
};
|
618
|
+
}
|
619
|
+
|
620
|
+
// impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
|
621
|
+
impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
|
622
|
+
impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
|
623
|
+
impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
|
624
|
+
impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
|
625
|
+
impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
|
626
|
+
impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
|
627
|
+
impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
|
628
|
+
impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
|
629
|
+
impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
|
630
|
+
impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
|
631
|
+
impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
|
632
|
+
|
633
|
+
impl RbSeries {
|
634
|
+
pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
|
635
|
+
Ok(self
|
636
|
+
.series
|
637
|
+
.borrow()
|
638
|
+
.clone()
|
639
|
+
.extend_constant(value.0, n)
|
640
|
+
.map_err(RbPolarsErr::from)?
|
641
|
+
.into())
|
642
|
+
}
|
643
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
use magnus::prelude::*;
|
2
|
+
use magnus::Value;
|
3
|
+
use polars::export::arrow::array::Array;
|
4
|
+
use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
5
|
+
use polars::prelude::*;
|
6
|
+
|
7
|
+
use super::RbSeries;
|
8
|
+
|
9
|
+
use crate::exceptions::RbValueError;
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
/// Import `arrow_c_stream` across Ruby boundary.
|
13
|
+
fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
|
14
|
+
let capsule = ob.funcall("arrow_c_stream", ())?;
|
15
|
+
Ok(capsule)
|
16
|
+
}
|
17
|
+
|
18
|
+
pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
|
19
|
+
let capsule_pointer: usize = capsule.funcall("to_i", ())?;
|
20
|
+
|
21
|
+
// # Safety
|
22
|
+
// capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
|
23
|
+
// Interface
|
24
|
+
let mut stream = unsafe {
|
25
|
+
// Takes ownership of the pointed to ArrowArrayStream
|
26
|
+
// This acts to move the data out of the capsule pointer, setting the release callback to NULL
|
27
|
+
let stream_ptr = Box::new(std::ptr::replace(
|
28
|
+
capsule_pointer as _,
|
29
|
+
ArrowArrayStream::empty(),
|
30
|
+
));
|
31
|
+
ArrowArrayStreamReader::try_new(stream_ptr)
|
32
|
+
.map_err(|err| RbValueError::new_err(err.to_string()))?
|
33
|
+
};
|
34
|
+
|
35
|
+
let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
|
36
|
+
while let Some(array) = unsafe { stream.next() } {
|
37
|
+
produced_arrays.push(array.unwrap());
|
38
|
+
}
|
39
|
+
|
40
|
+
// Series::try_from fails for an empty vec of chunks
|
41
|
+
let s = if produced_arrays.is_empty() {
|
42
|
+
let polars_dt = DataType::from_arrow(stream.field().dtype(), false);
|
43
|
+
Series::new_empty(stream.field().name.clone(), &polars_dt)
|
44
|
+
} else {
|
45
|
+
Series::try_from((stream.field(), produced_arrays)).unwrap()
|
46
|
+
};
|
47
|
+
Ok(RbSeries::new(s))
|
48
|
+
}
|
49
|
+
|
50
|
+
impl RbSeries {
|
51
|
+
pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
|
52
|
+
let capsule = call_arrow_c_stream(ob)?;
|
53
|
+
import_stream_rbcapsule(capsule)
|
54
|
+
}
|
55
|
+
}
|