polars-df 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,643 @@
1
+ use magnus::{exception, Error, IntoValue, Value};
2
+ use polars::prelude::*;
3
+ use polars::series::IsSorted;
4
+
5
+ use crate::apply_method_all_arrow_series2;
6
+ use crate::conversion::*;
7
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
9
+
10
+ impl RbSeries {
11
+ pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
12
+ let binding = self.series.borrow();
13
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
14
+ let df: DataFrame = ca.clone().unnest();
15
+ Ok(df.into())
16
+ }
17
+
18
+ // TODO add to Ruby
19
+ pub fn struct_fields(&self) -> RbResult<Vec<String>> {
20
+ let binding = self.series.borrow();
21
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
22
+ Ok(ca
23
+ .struct_fields()
24
+ .iter()
25
+ .map(|s| s.name().to_string())
26
+ .collect())
27
+ }
28
+
29
+ pub fn is_sorted_ascending_flag(&self) -> bool {
30
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
31
+ }
32
+
33
+ pub fn is_sorted_descending_flag(&self) -> bool {
34
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
35
+ }
36
+
37
+ pub fn can_fast_explode_flag(&self) -> bool {
38
+ match self.series.borrow().list() {
39
+ Err(_) => false,
40
+ Ok(list) => list._can_fast_explode(),
41
+ }
42
+ }
43
+
44
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
45
+ let binding = self.series.borrow();
46
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
47
+ Ok(ca.uses_lexical_ordering())
48
+ }
49
+
50
+ pub fn cat_is_local(&self) -> RbResult<bool> {
51
+ let binding = self.series.borrow();
52
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
53
+ Ok(ca.get_rev_map().is_local())
54
+ }
55
+
56
+ pub fn cat_to_local(&self) -> RbResult<Self> {
57
+ let binding = self.series.borrow();
58
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
59
+ Ok(ca.to_local().into_series().into())
60
+ }
61
+
62
+ pub fn estimated_size(&self) -> usize {
63
+ self.series.borrow().estimated_size()
64
+ }
65
+
66
+ pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
67
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
68
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
69
+ let v_trunc = &val[..val
70
+ .char_indices()
71
+ .take(str_lengths)
72
+ .last()
73
+ .map(|(i, c)| i + c.len_utf8())
74
+ .unwrap_or(0)];
75
+ if val == v_trunc {
76
+ val
77
+ } else {
78
+ format!("{}…", v_trunc)
79
+ }
80
+ } else {
81
+ val
82
+ }
83
+ }
84
+
85
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
86
+ let series = self.series.borrow_mut().rechunk();
87
+ if in_place {
88
+ *self.series.borrow_mut() = series;
89
+ None
90
+ } else {
91
+ Some(series.into())
92
+ }
93
+ }
94
+
95
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
96
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
97
+ }
98
+
99
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
100
+ let out = self
101
+ .series
102
+ .borrow()
103
+ .bitand(&other.series.borrow())
104
+ .map_err(RbPolarsErr::from)?;
105
+ Ok(out.into())
106
+ }
107
+
108
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
109
+ let out = self
110
+ .series
111
+ .borrow()
112
+ .bitor(&other.series.borrow())
113
+ .map_err(RbPolarsErr::from)?;
114
+ Ok(out.into())
115
+ }
116
+
117
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
118
+ let out = self
119
+ .series
120
+ .borrow()
121
+ .bitxor(&other.series.borrow())
122
+ .map_err(RbPolarsErr::from)?;
123
+ Ok(out.into())
124
+ }
125
+
126
+ pub fn chunk_lengths(&self) -> Vec<usize> {
127
+ self.series.borrow().chunk_lengths().collect()
128
+ }
129
+
130
+ pub fn name(&self) -> String {
131
+ self.series.borrow().name().to_string()
132
+ }
133
+
134
+ pub fn rename(&self, name: String) {
135
+ self.series.borrow_mut().rename(name.into());
136
+ }
137
+
138
+ pub fn dtype(&self) -> Value {
139
+ Wrap(self.series.borrow().dtype().clone()).into_value()
140
+ }
141
+
142
+ pub fn inner_dtype(&self) -> Option<Value> {
143
+ self.series
144
+ .borrow()
145
+ .dtype()
146
+ .inner_dtype()
147
+ .map(|dt| Wrap(dt.clone()).into_value())
148
+ }
149
+
150
+ pub fn set_sorted_flag(&self, descending: bool) -> Self {
151
+ let mut out = self.series.borrow().clone();
152
+ if descending {
153
+ out.set_sorted_flag(IsSorted::Descending);
154
+ } else {
155
+ out.set_sorted_flag(IsSorted::Ascending)
156
+ }
157
+ out.into()
158
+ }
159
+
160
+ pub fn n_chunks(&self) -> usize {
161
+ self.series.borrow().n_chunks()
162
+ }
163
+
164
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
165
+ let mut binding = self.series.borrow_mut();
166
+ let res = binding.append(&other.series.borrow());
167
+ if let Err(e) = res {
168
+ Err(Error::new(exception::runtime_error(), e.to_string()))
169
+ } else {
170
+ Ok(())
171
+ }
172
+ }
173
+
174
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
175
+ self.series
176
+ .borrow_mut()
177
+ .extend(&other.series.borrow())
178
+ .map_err(RbPolarsErr::from)?;
179
+ Ok(())
180
+ }
181
+
182
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
183
+ if index >= self.series.borrow().len() {
184
+ Err(Error::new(exception::arg_error(), "index is out of bounds"))
185
+ } else {
186
+ Ok(self.series.borrow().new_from_index(index, length).into())
187
+ }
188
+ }
189
+
190
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
191
+ let filter_series = &filter.series.borrow();
192
+ if let Ok(ca) = filter_series.bool() {
193
+ let series = self.series.borrow().filter(ca).unwrap();
194
+ Ok(series.into())
195
+ } else {
196
+ Err(Error::new(
197
+ exception::runtime_error(),
198
+ "Expected a boolean mask".to_string(),
199
+ ))
200
+ }
201
+ }
202
+
203
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
204
+ Ok(self
205
+ .series
206
+ .borrow_mut()
207
+ .sort(
208
+ SortOptions::default()
209
+ .with_order_descending(descending)
210
+ .with_nulls_last(nulls_last)
211
+ .with_multithreaded(multithreaded),
212
+ )
213
+ .map_err(RbPolarsErr::from)?
214
+ .into())
215
+ }
216
+
217
+ pub fn value_counts(
218
+ &self,
219
+ sort: bool,
220
+ parallel: bool,
221
+ name: String,
222
+ normalize: bool,
223
+ ) -> RbResult<RbDataFrame> {
224
+ let out = self
225
+ .series
226
+ .borrow()
227
+ .value_counts(sort, parallel, name.into(), normalize)
228
+ .map_err(RbPolarsErr::from)?;
229
+ Ok(out.into())
230
+ }
231
+
232
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
233
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
234
+ self.series.borrow().slice(offset, length).into()
235
+ }
236
+
237
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
238
+ let binding = indices.series.borrow();
239
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
240
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
241
+ Ok(RbSeries::new(take))
242
+ }
243
+
244
+ pub fn null_count(&self) -> RbResult<usize> {
245
+ Ok(self.series.borrow().null_count())
246
+ }
247
+
248
+ pub fn has_nulls(&self) -> bool {
249
+ self.series.borrow().has_nulls()
250
+ }
251
+
252
+ pub fn sample_n(
253
+ &self,
254
+ n: usize,
255
+ with_replacement: bool,
256
+ shuffle: bool,
257
+ seed: Option<u64>,
258
+ ) -> RbResult<Self> {
259
+ let s = self
260
+ .series
261
+ .borrow()
262
+ .sample_n(n, with_replacement, shuffle, seed)
263
+ .map_err(RbPolarsErr::from)?;
264
+ Ok(s.into())
265
+ }
266
+
267
+ pub fn sample_frac(
268
+ &self,
269
+ frac: f64,
270
+ with_replacement: bool,
271
+ shuffle: bool,
272
+ seed: Option<u64>,
273
+ ) -> RbResult<Self> {
274
+ let s = self
275
+ .series
276
+ .borrow()
277
+ .sample_frac(frac, with_replacement, shuffle, seed)
278
+ .map_err(RbPolarsErr::from)?;
279
+ Ok(s.into())
280
+ }
281
+
282
+ pub fn equals(
283
+ &self,
284
+ other: &RbSeries,
285
+ check_dtypes: bool,
286
+ check_names: bool,
287
+ null_equal: bool,
288
+ ) -> bool {
289
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
290
+ return false;
291
+ }
292
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
293
+ return false;
294
+ }
295
+ if null_equal {
296
+ self.series.borrow().equals_missing(&other.series.borrow())
297
+ } else {
298
+ self.series.borrow().equals(&other.series.borrow())
299
+ }
300
+ }
301
+
302
+ pub fn not(&self) -> RbResult<Self> {
303
+ let binding = self.series.borrow();
304
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
305
+ Ok((!bool).into_series().into())
306
+ }
307
+
308
+ pub fn to_s(&self) -> String {
309
+ format!("{}", self.series.borrow())
310
+ }
311
+
312
+ pub fn len(&self) -> usize {
313
+ self.series.borrow().len()
314
+ }
315
+
316
+ pub fn clone(&self) -> Self {
317
+ RbSeries::new(self.series.borrow().clone())
318
+ }
319
+
320
+ pub fn apply_lambda(
321
+ &self,
322
+ lambda: Value,
323
+ output_type: Option<Wrap<DataType>>,
324
+ skip_nulls: bool,
325
+ ) -> RbResult<Self> {
326
+ let series = &self.series.borrow();
327
+
328
+ let output_type = output_type.map(|dt| dt.0);
329
+
330
+ macro_rules! dispatch_apply {
331
+ ($self:expr, $method:ident, $($args:expr),*) => {
332
+ if matches!($self.dtype(), DataType::Object(_, _)) {
333
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
334
+ // ca.$method($($args),*)
335
+ todo!()
336
+ } else {
337
+ apply_method_all_arrow_series2!(
338
+ $self,
339
+ $method,
340
+ $($args),*
341
+ )
342
+ }
343
+
344
+ }
345
+
346
+ }
347
+
348
+ if matches!(
349
+ series.dtype(),
350
+ DataType::Datetime(_, _)
351
+ | DataType::Date
352
+ | DataType::Duration(_)
353
+ | DataType::Categorical(_, _)
354
+ | DataType::Time
355
+ ) || !skip_nulls
356
+ {
357
+ let mut avs = Vec::with_capacity(series.len());
358
+ let iter = series.iter().map(|av| {
359
+ let input = Wrap(av);
360
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
361
+ .unwrap()
362
+ .0
363
+ });
364
+ avs.extend(iter);
365
+ return Ok(Series::new(self.name().into(), &avs).into());
366
+ }
367
+
368
+ let out = match output_type {
369
+ Some(DataType::Int8) => {
370
+ let ca: Int8Chunked = dispatch_apply!(
371
+ series,
372
+ apply_lambda_with_primitive_out_type,
373
+ lambda,
374
+ 0,
375
+ None
376
+ )?;
377
+ ca.into_series()
378
+ }
379
+ Some(DataType::Int16) => {
380
+ let ca: Int16Chunked = dispatch_apply!(
381
+ series,
382
+ apply_lambda_with_primitive_out_type,
383
+ lambda,
384
+ 0,
385
+ None
386
+ )?;
387
+ ca.into_series()
388
+ }
389
+ Some(DataType::Int32) => {
390
+ let ca: Int32Chunked = dispatch_apply!(
391
+ series,
392
+ apply_lambda_with_primitive_out_type,
393
+ lambda,
394
+ 0,
395
+ None
396
+ )?;
397
+ ca.into_series()
398
+ }
399
+ Some(DataType::Int64) => {
400
+ let ca: Int64Chunked = dispatch_apply!(
401
+ series,
402
+ apply_lambda_with_primitive_out_type,
403
+ lambda,
404
+ 0,
405
+ None
406
+ )?;
407
+ ca.into_series()
408
+ }
409
+ Some(DataType::UInt8) => {
410
+ let ca: UInt8Chunked = dispatch_apply!(
411
+ series,
412
+ apply_lambda_with_primitive_out_type,
413
+ lambda,
414
+ 0,
415
+ None
416
+ )?;
417
+ ca.into_series()
418
+ }
419
+ Some(DataType::UInt16) => {
420
+ let ca: UInt16Chunked = dispatch_apply!(
421
+ series,
422
+ apply_lambda_with_primitive_out_type,
423
+ lambda,
424
+ 0,
425
+ None
426
+ )?;
427
+ ca.into_series()
428
+ }
429
+ Some(DataType::UInt32) => {
430
+ let ca: UInt32Chunked = dispatch_apply!(
431
+ series,
432
+ apply_lambda_with_primitive_out_type,
433
+ lambda,
434
+ 0,
435
+ None
436
+ )?;
437
+ ca.into_series()
438
+ }
439
+ Some(DataType::UInt64) => {
440
+ let ca: UInt64Chunked = dispatch_apply!(
441
+ series,
442
+ apply_lambda_with_primitive_out_type,
443
+ lambda,
444
+ 0,
445
+ None
446
+ )?;
447
+ ca.into_series()
448
+ }
449
+ Some(DataType::Float32) => {
450
+ let ca: Float32Chunked = dispatch_apply!(
451
+ series,
452
+ apply_lambda_with_primitive_out_type,
453
+ lambda,
454
+ 0,
455
+ None
456
+ )?;
457
+ ca.into_series()
458
+ }
459
+ Some(DataType::Float64) => {
460
+ let ca: Float64Chunked = dispatch_apply!(
461
+ series,
462
+ apply_lambda_with_primitive_out_type,
463
+ lambda,
464
+ 0,
465
+ None
466
+ )?;
467
+ ca.into_series()
468
+ }
469
+ Some(DataType::Boolean) => {
470
+ let ca: BooleanChunked =
471
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
472
+ ca.into_series()
473
+ }
474
+ Some(DataType::Date) => {
475
+ let ca: Int32Chunked = dispatch_apply!(
476
+ series,
477
+ apply_lambda_with_primitive_out_type,
478
+ lambda,
479
+ 0,
480
+ None
481
+ )?;
482
+ ca.into_date().into_series()
483
+ }
484
+ Some(DataType::Datetime(tu, tz)) => {
485
+ let ca: Int64Chunked = dispatch_apply!(
486
+ series,
487
+ apply_lambda_with_primitive_out_type,
488
+ lambda,
489
+ 0,
490
+ None
491
+ )?;
492
+ ca.into_datetime(tu, tz).into_series()
493
+ }
494
+ Some(DataType::String) => {
495
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
496
+
497
+ ca.into_series()
498
+ }
499
+ Some(DataType::Object(_, _)) => {
500
+ let ca =
501
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
502
+ ca.into_series()
503
+ }
504
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
505
+
506
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
507
+ };
508
+
509
+ Ok(RbSeries::new(out))
510
+ }
511
+
512
+ pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
513
+ let binding = mask.series.borrow();
514
+ let mask = binding.bool().map_err(RbPolarsErr::from)?;
515
+ let s = self
516
+ .series
517
+ .borrow()
518
+ .zip_with(mask, &other.series.borrow())
519
+ .map_err(RbPolarsErr::from)?;
520
+ Ok(RbSeries::new(s))
521
+ }
522
+
523
+ pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
524
+ let df = self
525
+ .series
526
+ .borrow()
527
+ .to_dummies(sep.as_deref(), drop_first)
528
+ .map_err(RbPolarsErr::from)?;
529
+ Ok(df.into())
530
+ }
531
+
532
+ pub fn n_unique(&self) -> RbResult<usize> {
533
+ let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
534
+ Ok(n)
535
+ }
536
+
537
+ pub fn floor(&self) -> RbResult<Self> {
538
+ let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
539
+ Ok(s.into())
540
+ }
541
+
542
+ pub fn shrink_to_fit(&self) {
543
+ self.series.borrow_mut().shrink_to_fit();
544
+ }
545
+
546
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
547
+ let out = self
548
+ .series
549
+ .borrow()
550
+ .dot(&other.series.borrow())
551
+ .map_err(RbPolarsErr::from)?;
552
+ Ok(out)
553
+ }
554
+
555
+ pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
556
+ let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
557
+ Ok(out)
558
+ }
559
+
560
+ pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
561
+ let out = self
562
+ .series
563
+ .borrow()
564
+ .kurtosis(fisher, bias)
565
+ .map_err(RbPolarsErr::from)?;
566
+ Ok(out)
567
+ }
568
+
569
+ pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
570
+ let dtype = dtype.0;
571
+ let out = if strict {
572
+ self.series.borrow().strict_cast(&dtype)
573
+ } else {
574
+ self.series.borrow().cast(&dtype)
575
+ };
576
+ let out = out.map_err(RbPolarsErr::from)?;
577
+ Ok(out.into())
578
+ }
579
+
580
+ pub fn time_unit(&self) -> Option<String> {
581
+ if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
582
+ Some(
583
+ match tu {
584
+ TimeUnit::Nanoseconds => "ns",
585
+ TimeUnit::Microseconds => "us",
586
+ TimeUnit::Milliseconds => "ms",
587
+ }
588
+ .to_string(),
589
+ )
590
+ } else {
591
+ None
592
+ }
593
+ }
594
+ }
595
+
596
+ macro_rules! impl_set_with_mask {
597
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
598
+ fn $name(
599
+ series: &Series,
600
+ filter: &RbSeries,
601
+ value: Option<$native>,
602
+ ) -> PolarsResult<Series> {
603
+ let binding = filter.series.borrow();
604
+ let mask = binding.bool()?;
605
+ let ca = series.$cast()?;
606
+ let new = ca.set(mask, value)?;
607
+ Ok(new.into_series())
608
+ }
609
+
610
+ impl RbSeries {
611
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
612
+ let series =
613
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
614
+ Ok(Self::new(series))
615
+ }
616
+ }
617
+ };
618
+ }
619
+
620
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
621
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
622
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
623
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
624
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
625
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
626
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
627
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
628
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
629
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
630
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
631
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
632
+
633
+ impl RbSeries {
634
+ pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
635
+ Ok(self
636
+ .series
637
+ .borrow()
638
+ .clone()
639
+ .extend_constant(value.0, n)
640
+ .map_err(RbPolarsErr::from)?
641
+ .into())
642
+ }
643
+ }
@@ -0,0 +1,55 @@
1
+ use magnus::prelude::*;
2
+ use magnus::Value;
3
+ use polars::export::arrow::array::Array;
4
+ use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
+ use polars::prelude::*;
6
+
7
+ use super::RbSeries;
8
+
9
+ use crate::exceptions::RbValueError;
10
+ use crate::RbResult;
11
+
12
+ /// Import `arrow_c_stream` across Ruby boundary.
13
+ fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
14
+ let capsule = ob.funcall("arrow_c_stream", ())?;
15
+ Ok(capsule)
16
+ }
17
+
18
+ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
19
+ let capsule_pointer: usize = capsule.funcall("to_i", ())?;
20
+
21
+ // # Safety
22
+ // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
23
+ // Interface
24
+ let mut stream = unsafe {
25
+ // Takes ownership of the pointed to ArrowArrayStream
26
+ // This acts to move the data out of the capsule pointer, setting the release callback to NULL
27
+ let stream_ptr = Box::new(std::ptr::replace(
28
+ capsule_pointer as _,
29
+ ArrowArrayStream::empty(),
30
+ ));
31
+ ArrowArrayStreamReader::try_new(stream_ptr)
32
+ .map_err(|err| RbValueError::new_err(err.to_string()))?
33
+ };
34
+
35
+ let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
36
+ while let Some(array) = unsafe { stream.next() } {
37
+ produced_arrays.push(array.unwrap());
38
+ }
39
+
40
+ // Series::try_from fails for an empty vec of chunks
41
+ let s = if produced_arrays.is_empty() {
42
+ let polars_dt = DataType::from_arrow(stream.field().dtype(), false);
43
+ Series::new_empty(stream.field().name.clone(), &polars_dt)
44
+ } else {
45
+ Series::try_from((stream.field(), produced_arrays)).unwrap()
46
+ };
47
+ Ok(RbSeries::new(s))
48
+ }
49
+
50
+ impl RbSeries {
51
+ pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
52
+ let capsule = call_arrow_c_stream(ob)?;
53
+ import_stream_rbcapsule(capsule)
54
+ }
55
+ }