polars-df 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/Cargo.lock +1296 -283
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +125 -28
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +16 -11
  13. data/ext/polars/src/dataframe/io.rs +73 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +13 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/rolling.rs +6 -7
  23. data/ext/polars/src/expr/string.rs +9 -36
  24. data/ext/polars/src/file.rs +59 -22
  25. data/ext/polars/src/functions/business.rs +15 -0
  26. data/ext/polars/src/functions/lazy.rs +17 -8
  27. data/ext/polars/src/functions/mod.rs +1 -0
  28. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  29. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  30. data/ext/polars/src/interop/mod.rs +1 -0
  31. data/ext/polars/src/lazyframe/general.rs +877 -0
  32. data/ext/polars/src/lazyframe/mod.rs +3 -827
  33. data/ext/polars/src/lazyframe/serde.rs +31 -0
  34. data/ext/polars/src/lib.rs +45 -14
  35. data/ext/polars/src/map/dataframe.rs +10 -6
  36. data/ext/polars/src/map/lazy.rs +65 -4
  37. data/ext/polars/src/map/mod.rs +9 -8
  38. data/ext/polars/src/on_startup.rs +1 -1
  39. data/ext/polars/src/series/aggregation.rs +1 -5
  40. data/ext/polars/src/series/arithmetic.rs +10 -10
  41. data/ext/polars/src/series/construction.rs +2 -2
  42. data/ext/polars/src/series/export.rs +1 -1
  43. data/ext/polars/src/series/general.rs +643 -0
  44. data/ext/polars/src/series/import.rs +55 -0
  45. data/ext/polars/src/series/mod.rs +11 -638
  46. data/ext/polars/src/series/scatter.rs +2 -2
  47. data/ext/polars/src/utils.rs +0 -20
  48. data/lib/polars/batched_csv_reader.rb +0 -2
  49. data/lib/polars/binary_expr.rb +133 -9
  50. data/lib/polars/binary_name_space.rb +101 -6
  51. data/lib/polars/config.rb +4 -0
  52. data/lib/polars/data_frame.rb +275 -52
  53. data/lib/polars/data_type_group.rb +28 -0
  54. data/lib/polars/data_types.rb +2 -0
  55. data/lib/polars/date_time_expr.rb +244 -0
  56. data/lib/polars/date_time_name_space.rb +87 -0
  57. data/lib/polars/expr.rb +103 -2
  58. data/lib/polars/functions/as_datatype.rb +51 -2
  59. data/lib/polars/functions/col.rb +1 -1
  60. data/lib/polars/functions/eager.rb +1 -3
  61. data/lib/polars/functions/lazy.rb +88 -10
  62. data/lib/polars/functions/range/time_range.rb +21 -21
  63. data/lib/polars/io/csv.rb +14 -16
  64. data/lib/polars/io/database.rb +2 -2
  65. data/lib/polars/io/ipc.rb +14 -4
  66. data/lib/polars/io/ndjson.rb +10 -0
  67. data/lib/polars/io/parquet.rb +168 -111
  68. data/lib/polars/lazy_frame.rb +649 -15
  69. data/lib/polars/list_name_space.rb +169 -0
  70. data/lib/polars/selectors.rb +1144 -0
  71. data/lib/polars/series.rb +465 -35
  72. data/lib/polars/string_cache.rb +27 -1
  73. data/lib/polars/string_expr.rb +0 -1
  74. data/lib/polars/string_name_space.rb +73 -3
  75. data/lib/polars/struct_name_space.rb +31 -7
  76. data/lib/polars/utils/various.rb +5 -1
  77. data/lib/polars/utils.rb +45 -10
  78. data/lib/polars/version.rb +1 -1
  79. data/lib/polars.rb +2 -1
  80. metadata +14 -4
  81. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,643 @@
1
+ use magnus::{exception, Error, IntoValue, Value};
2
+ use polars::prelude::*;
3
+ use polars::series::IsSorted;
4
+
5
+ use crate::apply_method_all_arrow_series2;
6
+ use crate::conversion::*;
7
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
9
+
10
+ impl RbSeries {
11
+ pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
12
+ let binding = self.series.borrow();
13
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
14
+ let df: DataFrame = ca.clone().unnest();
15
+ Ok(df.into())
16
+ }
17
+
18
+ // TODO add to Ruby
19
+ pub fn struct_fields(&self) -> RbResult<Vec<String>> {
20
+ let binding = self.series.borrow();
21
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
22
+ Ok(ca
23
+ .struct_fields()
24
+ .iter()
25
+ .map(|s| s.name().to_string())
26
+ .collect())
27
+ }
28
+
29
+ pub fn is_sorted_ascending_flag(&self) -> bool {
30
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
31
+ }
32
+
33
+ pub fn is_sorted_descending_flag(&self) -> bool {
34
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
35
+ }
36
+
37
+ pub fn can_fast_explode_flag(&self) -> bool {
38
+ match self.series.borrow().list() {
39
+ Err(_) => false,
40
+ Ok(list) => list._can_fast_explode(),
41
+ }
42
+ }
43
+
44
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
45
+ let binding = self.series.borrow();
46
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
47
+ Ok(ca.uses_lexical_ordering())
48
+ }
49
+
50
+ pub fn cat_is_local(&self) -> RbResult<bool> {
51
+ let binding = self.series.borrow();
52
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
53
+ Ok(ca.get_rev_map().is_local())
54
+ }
55
+
56
+ pub fn cat_to_local(&self) -> RbResult<Self> {
57
+ let binding = self.series.borrow();
58
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
59
+ Ok(ca.to_local().into_series().into())
60
+ }
61
+
62
+ pub fn estimated_size(&self) -> usize {
63
+ self.series.borrow().estimated_size()
64
+ }
65
+
66
+ pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
67
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
68
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
69
+ let v_trunc = &val[..val
70
+ .char_indices()
71
+ .take(str_lengths)
72
+ .last()
73
+ .map(|(i, c)| i + c.len_utf8())
74
+ .unwrap_or(0)];
75
+ if val == v_trunc {
76
+ val
77
+ } else {
78
+ format!("{}…", v_trunc)
79
+ }
80
+ } else {
81
+ val
82
+ }
83
+ }
84
+
85
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
86
+ let series = self.series.borrow_mut().rechunk();
87
+ if in_place {
88
+ *self.series.borrow_mut() = series;
89
+ None
90
+ } else {
91
+ Some(series.into())
92
+ }
93
+ }
94
+
95
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
96
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
97
+ }
98
+
99
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
100
+ let out = self
101
+ .series
102
+ .borrow()
103
+ .bitand(&other.series.borrow())
104
+ .map_err(RbPolarsErr::from)?;
105
+ Ok(out.into())
106
+ }
107
+
108
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
109
+ let out = self
110
+ .series
111
+ .borrow()
112
+ .bitor(&other.series.borrow())
113
+ .map_err(RbPolarsErr::from)?;
114
+ Ok(out.into())
115
+ }
116
+
117
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
118
+ let out = self
119
+ .series
120
+ .borrow()
121
+ .bitxor(&other.series.borrow())
122
+ .map_err(RbPolarsErr::from)?;
123
+ Ok(out.into())
124
+ }
125
+
126
+ pub fn chunk_lengths(&self) -> Vec<usize> {
127
+ self.series.borrow().chunk_lengths().collect()
128
+ }
129
+
130
+ pub fn name(&self) -> String {
131
+ self.series.borrow().name().to_string()
132
+ }
133
+
134
+ pub fn rename(&self, name: String) {
135
+ self.series.borrow_mut().rename(name.into());
136
+ }
137
+
138
+ pub fn dtype(&self) -> Value {
139
+ Wrap(self.series.borrow().dtype().clone()).into_value()
140
+ }
141
+
142
+ pub fn inner_dtype(&self) -> Option<Value> {
143
+ self.series
144
+ .borrow()
145
+ .dtype()
146
+ .inner_dtype()
147
+ .map(|dt| Wrap(dt.clone()).into_value())
148
+ }
149
+
150
+ pub fn set_sorted_flag(&self, descending: bool) -> Self {
151
+ let mut out = self.series.borrow().clone();
152
+ if descending {
153
+ out.set_sorted_flag(IsSorted::Descending);
154
+ } else {
155
+ out.set_sorted_flag(IsSorted::Ascending)
156
+ }
157
+ out.into()
158
+ }
159
+
160
+ pub fn n_chunks(&self) -> usize {
161
+ self.series.borrow().n_chunks()
162
+ }
163
+
164
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
165
+ let mut binding = self.series.borrow_mut();
166
+ let res = binding.append(&other.series.borrow());
167
+ if let Err(e) = res {
168
+ Err(Error::new(exception::runtime_error(), e.to_string()))
169
+ } else {
170
+ Ok(())
171
+ }
172
+ }
173
+
174
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
175
+ self.series
176
+ .borrow_mut()
177
+ .extend(&other.series.borrow())
178
+ .map_err(RbPolarsErr::from)?;
179
+ Ok(())
180
+ }
181
+
182
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
183
+ if index >= self.series.borrow().len() {
184
+ Err(Error::new(exception::arg_error(), "index is out of bounds"))
185
+ } else {
186
+ Ok(self.series.borrow().new_from_index(index, length).into())
187
+ }
188
+ }
189
+
190
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
191
+ let filter_series = &filter.series.borrow();
192
+ if let Ok(ca) = filter_series.bool() {
193
+ let series = self.series.borrow().filter(ca).unwrap();
194
+ Ok(series.into())
195
+ } else {
196
+ Err(Error::new(
197
+ exception::runtime_error(),
198
+ "Expected a boolean mask".to_string(),
199
+ ))
200
+ }
201
+ }
202
+
203
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
204
+ Ok(self
205
+ .series
206
+ .borrow_mut()
207
+ .sort(
208
+ SortOptions::default()
209
+ .with_order_descending(descending)
210
+ .with_nulls_last(nulls_last)
211
+ .with_multithreaded(multithreaded),
212
+ )
213
+ .map_err(RbPolarsErr::from)?
214
+ .into())
215
+ }
216
+
217
+ pub fn value_counts(
218
+ &self,
219
+ sort: bool,
220
+ parallel: bool,
221
+ name: String,
222
+ normalize: bool,
223
+ ) -> RbResult<RbDataFrame> {
224
+ let out = self
225
+ .series
226
+ .borrow()
227
+ .value_counts(sort, parallel, name.into(), normalize)
228
+ .map_err(RbPolarsErr::from)?;
229
+ Ok(out.into())
230
+ }
231
+
232
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
233
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
234
+ self.series.borrow().slice(offset, length).into()
235
+ }
236
+
237
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
238
+ let binding = indices.series.borrow();
239
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
240
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
241
+ Ok(RbSeries::new(take))
242
+ }
243
+
244
+ pub fn null_count(&self) -> RbResult<usize> {
245
+ Ok(self.series.borrow().null_count())
246
+ }
247
+
248
+ pub fn has_nulls(&self) -> bool {
249
+ self.series.borrow().has_nulls()
250
+ }
251
+
252
+ pub fn sample_n(
253
+ &self,
254
+ n: usize,
255
+ with_replacement: bool,
256
+ shuffle: bool,
257
+ seed: Option<u64>,
258
+ ) -> RbResult<Self> {
259
+ let s = self
260
+ .series
261
+ .borrow()
262
+ .sample_n(n, with_replacement, shuffle, seed)
263
+ .map_err(RbPolarsErr::from)?;
264
+ Ok(s.into())
265
+ }
266
+
267
+ pub fn sample_frac(
268
+ &self,
269
+ frac: f64,
270
+ with_replacement: bool,
271
+ shuffle: bool,
272
+ seed: Option<u64>,
273
+ ) -> RbResult<Self> {
274
+ let s = self
275
+ .series
276
+ .borrow()
277
+ .sample_frac(frac, with_replacement, shuffle, seed)
278
+ .map_err(RbPolarsErr::from)?;
279
+ Ok(s.into())
280
+ }
281
+
282
+ pub fn equals(
283
+ &self,
284
+ other: &RbSeries,
285
+ check_dtypes: bool,
286
+ check_names: bool,
287
+ null_equal: bool,
288
+ ) -> bool {
289
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
290
+ return false;
291
+ }
292
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
293
+ return false;
294
+ }
295
+ if null_equal {
296
+ self.series.borrow().equals_missing(&other.series.borrow())
297
+ } else {
298
+ self.series.borrow().equals(&other.series.borrow())
299
+ }
300
+ }
301
+
302
+ pub fn not(&self) -> RbResult<Self> {
303
+ let binding = self.series.borrow();
304
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
305
+ Ok((!bool).into_series().into())
306
+ }
307
+
308
+ pub fn to_s(&self) -> String {
309
+ format!("{}", self.series.borrow())
310
+ }
311
+
312
+ pub fn len(&self) -> usize {
313
+ self.series.borrow().len()
314
+ }
315
+
316
+ pub fn clone(&self) -> Self {
317
+ RbSeries::new(self.series.borrow().clone())
318
+ }
319
+
320
+ pub fn apply_lambda(
321
+ &self,
322
+ lambda: Value,
323
+ output_type: Option<Wrap<DataType>>,
324
+ skip_nulls: bool,
325
+ ) -> RbResult<Self> {
326
+ let series = &self.series.borrow();
327
+
328
+ let output_type = output_type.map(|dt| dt.0);
329
+
330
+ macro_rules! dispatch_apply {
331
+ ($self:expr, $method:ident, $($args:expr),*) => {
332
+ if matches!($self.dtype(), DataType::Object(_, _)) {
333
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
334
+ // ca.$method($($args),*)
335
+ todo!()
336
+ } else {
337
+ apply_method_all_arrow_series2!(
338
+ $self,
339
+ $method,
340
+ $($args),*
341
+ )
342
+ }
343
+
344
+ }
345
+
346
+ }
347
+
348
+ if matches!(
349
+ series.dtype(),
350
+ DataType::Datetime(_, _)
351
+ | DataType::Date
352
+ | DataType::Duration(_)
353
+ | DataType::Categorical(_, _)
354
+ | DataType::Time
355
+ ) || !skip_nulls
356
+ {
357
+ let mut avs = Vec::with_capacity(series.len());
358
+ let iter = series.iter().map(|av| {
359
+ let input = Wrap(av);
360
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
361
+ .unwrap()
362
+ .0
363
+ });
364
+ avs.extend(iter);
365
+ return Ok(Series::new(self.name().into(), &avs).into());
366
+ }
367
+
368
+ let out = match output_type {
369
+ Some(DataType::Int8) => {
370
+ let ca: Int8Chunked = dispatch_apply!(
371
+ series,
372
+ apply_lambda_with_primitive_out_type,
373
+ lambda,
374
+ 0,
375
+ None
376
+ )?;
377
+ ca.into_series()
378
+ }
379
+ Some(DataType::Int16) => {
380
+ let ca: Int16Chunked = dispatch_apply!(
381
+ series,
382
+ apply_lambda_with_primitive_out_type,
383
+ lambda,
384
+ 0,
385
+ None
386
+ )?;
387
+ ca.into_series()
388
+ }
389
+ Some(DataType::Int32) => {
390
+ let ca: Int32Chunked = dispatch_apply!(
391
+ series,
392
+ apply_lambda_with_primitive_out_type,
393
+ lambda,
394
+ 0,
395
+ None
396
+ )?;
397
+ ca.into_series()
398
+ }
399
+ Some(DataType::Int64) => {
400
+ let ca: Int64Chunked = dispatch_apply!(
401
+ series,
402
+ apply_lambda_with_primitive_out_type,
403
+ lambda,
404
+ 0,
405
+ None
406
+ )?;
407
+ ca.into_series()
408
+ }
409
+ Some(DataType::UInt8) => {
410
+ let ca: UInt8Chunked = dispatch_apply!(
411
+ series,
412
+ apply_lambda_with_primitive_out_type,
413
+ lambda,
414
+ 0,
415
+ None
416
+ )?;
417
+ ca.into_series()
418
+ }
419
+ Some(DataType::UInt16) => {
420
+ let ca: UInt16Chunked = dispatch_apply!(
421
+ series,
422
+ apply_lambda_with_primitive_out_type,
423
+ lambda,
424
+ 0,
425
+ None
426
+ )?;
427
+ ca.into_series()
428
+ }
429
+ Some(DataType::UInt32) => {
430
+ let ca: UInt32Chunked = dispatch_apply!(
431
+ series,
432
+ apply_lambda_with_primitive_out_type,
433
+ lambda,
434
+ 0,
435
+ None
436
+ )?;
437
+ ca.into_series()
438
+ }
439
+ Some(DataType::UInt64) => {
440
+ let ca: UInt64Chunked = dispatch_apply!(
441
+ series,
442
+ apply_lambda_with_primitive_out_type,
443
+ lambda,
444
+ 0,
445
+ None
446
+ )?;
447
+ ca.into_series()
448
+ }
449
+ Some(DataType::Float32) => {
450
+ let ca: Float32Chunked = dispatch_apply!(
451
+ series,
452
+ apply_lambda_with_primitive_out_type,
453
+ lambda,
454
+ 0,
455
+ None
456
+ )?;
457
+ ca.into_series()
458
+ }
459
+ Some(DataType::Float64) => {
460
+ let ca: Float64Chunked = dispatch_apply!(
461
+ series,
462
+ apply_lambda_with_primitive_out_type,
463
+ lambda,
464
+ 0,
465
+ None
466
+ )?;
467
+ ca.into_series()
468
+ }
469
+ Some(DataType::Boolean) => {
470
+ let ca: BooleanChunked =
471
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
472
+ ca.into_series()
473
+ }
474
+ Some(DataType::Date) => {
475
+ let ca: Int32Chunked = dispatch_apply!(
476
+ series,
477
+ apply_lambda_with_primitive_out_type,
478
+ lambda,
479
+ 0,
480
+ None
481
+ )?;
482
+ ca.into_date().into_series()
483
+ }
484
+ Some(DataType::Datetime(tu, tz)) => {
485
+ let ca: Int64Chunked = dispatch_apply!(
486
+ series,
487
+ apply_lambda_with_primitive_out_type,
488
+ lambda,
489
+ 0,
490
+ None
491
+ )?;
492
+ ca.into_datetime(tu, tz).into_series()
493
+ }
494
+ Some(DataType::String) => {
495
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
496
+
497
+ ca.into_series()
498
+ }
499
+ Some(DataType::Object(_, _)) => {
500
+ let ca =
501
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
502
+ ca.into_series()
503
+ }
504
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
505
+
506
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
507
+ };
508
+
509
+ Ok(RbSeries::new(out))
510
+ }
511
+
512
+ pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
513
+ let binding = mask.series.borrow();
514
+ let mask = binding.bool().map_err(RbPolarsErr::from)?;
515
+ let s = self
516
+ .series
517
+ .borrow()
518
+ .zip_with(mask, &other.series.borrow())
519
+ .map_err(RbPolarsErr::from)?;
520
+ Ok(RbSeries::new(s))
521
+ }
522
+
523
+ pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
524
+ let df = self
525
+ .series
526
+ .borrow()
527
+ .to_dummies(sep.as_deref(), drop_first)
528
+ .map_err(RbPolarsErr::from)?;
529
+ Ok(df.into())
530
+ }
531
+
532
+ pub fn n_unique(&self) -> RbResult<usize> {
533
+ let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
534
+ Ok(n)
535
+ }
536
+
537
+ pub fn floor(&self) -> RbResult<Self> {
538
+ let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
539
+ Ok(s.into())
540
+ }
541
+
542
+ pub fn shrink_to_fit(&self) {
543
+ self.series.borrow_mut().shrink_to_fit();
544
+ }
545
+
546
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
547
+ let out = self
548
+ .series
549
+ .borrow()
550
+ .dot(&other.series.borrow())
551
+ .map_err(RbPolarsErr::from)?;
552
+ Ok(out)
553
+ }
554
+
555
+ pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
556
+ let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
557
+ Ok(out)
558
+ }
559
+
560
+ pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
561
+ let out = self
562
+ .series
563
+ .borrow()
564
+ .kurtosis(fisher, bias)
565
+ .map_err(RbPolarsErr::from)?;
566
+ Ok(out)
567
+ }
568
+
569
+ pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
570
+ let dtype = dtype.0;
571
+ let out = if strict {
572
+ self.series.borrow().strict_cast(&dtype)
573
+ } else {
574
+ self.series.borrow().cast(&dtype)
575
+ };
576
+ let out = out.map_err(RbPolarsErr::from)?;
577
+ Ok(out.into())
578
+ }
579
+
580
+ pub fn time_unit(&self) -> Option<String> {
581
+ if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
582
+ Some(
583
+ match tu {
584
+ TimeUnit::Nanoseconds => "ns",
585
+ TimeUnit::Microseconds => "us",
586
+ TimeUnit::Milliseconds => "ms",
587
+ }
588
+ .to_string(),
589
+ )
590
+ } else {
591
+ None
592
+ }
593
+ }
594
+ }
595
+
596
+ macro_rules! impl_set_with_mask {
597
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
598
+ fn $name(
599
+ series: &Series,
600
+ filter: &RbSeries,
601
+ value: Option<$native>,
602
+ ) -> PolarsResult<Series> {
603
+ let binding = filter.series.borrow();
604
+ let mask = binding.bool()?;
605
+ let ca = series.$cast()?;
606
+ let new = ca.set(mask, value)?;
607
+ Ok(new.into_series())
608
+ }
609
+
610
+ impl RbSeries {
611
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
612
+ let series =
613
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
614
+ Ok(Self::new(series))
615
+ }
616
+ }
617
+ };
618
+ }
619
+
620
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
621
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
622
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
623
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
624
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
625
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
626
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
627
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
628
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
629
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
630
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
631
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
632
+
633
+ impl RbSeries {
634
+ pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
635
+ Ok(self
636
+ .series
637
+ .borrow()
638
+ .clone()
639
+ .extend_constant(value.0, n)
640
+ .map_err(RbPolarsErr::from)?
641
+ .into())
642
+ }
643
+ }
@@ -0,0 +1,55 @@
1
+ use magnus::prelude::*;
2
+ use magnus::Value;
3
+ use polars::export::arrow::array::Array;
4
+ use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
+ use polars::prelude::*;
6
+
7
+ use super::RbSeries;
8
+
9
+ use crate::exceptions::RbValueError;
10
+ use crate::RbResult;
11
+
12
+ /// Import `arrow_c_stream` across Ruby boundary.
13
+ fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
14
+ let capsule = ob.funcall("arrow_c_stream", ())?;
15
+ Ok(capsule)
16
+ }
17
+
18
+ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
19
+ let capsule_pointer: usize = capsule.funcall("to_i", ())?;
20
+
21
+ // # Safety
22
+ // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
23
+ // Interface
24
+ let mut stream = unsafe {
25
+ // Takes ownership of the pointed to ArrowArrayStream
26
+ // This acts to move the data out of the capsule pointer, setting the release callback to NULL
27
+ let stream_ptr = Box::new(std::ptr::replace(
28
+ capsule_pointer as _,
29
+ ArrowArrayStream::empty(),
30
+ ));
31
+ ArrowArrayStreamReader::try_new(stream_ptr)
32
+ .map_err(|err| RbValueError::new_err(err.to_string()))?
33
+ };
34
+
35
+ let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
36
+ while let Some(array) = unsafe { stream.next() } {
37
+ produced_arrays.push(array.unwrap());
38
+ }
39
+
40
+ // Series::try_from fails for an empty vec of chunks
41
+ let s = if produced_arrays.is_empty() {
42
+ let polars_dt = DataType::from_arrow(stream.field().dtype(), false);
43
+ Series::new_empty(stream.field().name.clone(), &polars_dt)
44
+ } else {
45
+ Series::try_from((stream.field(), produced_arrays)).unwrap()
46
+ };
47
+ Ok(RbSeries::new(s))
48
+ }
49
+
50
+ impl RbSeries {
51
+ pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
52
+ let capsule = call_arrow_c_stream(ob)?;
53
+ import_stream_rbcapsule(capsule)
54
+ }
55
+ }