polars-df 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,631 @@
1
+ use magnus::{exception, Error, IntoValue, Value};
2
+ use polars::prelude::*;
3
+ use polars::series::IsSorted;
4
+
5
+ use crate::apply_method_all_arrow_series2;
6
+ use crate::conversion::*;
7
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
9
+
10
+ impl RbSeries {
11
+ pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
12
+ let binding = self.series.borrow();
13
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
14
+ let df: DataFrame = ca.clone().unnest();
15
+ Ok(df.into())
16
+ }
17
+
18
+ // TODO add to Ruby
19
+ pub fn struct_fields(&self) -> RbResult<Vec<String>> {
20
+ let binding = self.series.borrow();
21
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
22
+ Ok(ca
23
+ .struct_fields()
24
+ .iter()
25
+ .map(|s| s.name().to_string())
26
+ .collect())
27
+ }
28
+
29
+ pub fn is_sorted_ascending_flag(&self) -> bool {
30
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
31
+ }
32
+
33
+ pub fn is_sorted_descending_flag(&self) -> bool {
34
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
35
+ }
36
+
37
+ pub fn can_fast_explode_flag(&self) -> bool {
38
+ match self.series.borrow().list() {
39
+ Err(_) => false,
40
+ Ok(list) => list._can_fast_explode(),
41
+ }
42
+ }
43
+
44
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
45
+ let binding = self.series.borrow();
46
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
47
+ Ok(ca.uses_lexical_ordering())
48
+ }
49
+
50
+ pub fn cat_is_local(&self) -> RbResult<bool> {
51
+ let binding = self.series.borrow();
52
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
53
+ Ok(ca.get_rev_map().is_local())
54
+ }
55
+
56
+ pub fn cat_to_local(&self) -> RbResult<Self> {
57
+ let binding = self.series.borrow();
58
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
59
+ Ok(ca.to_local().into_series().into())
60
+ }
61
+
62
+ pub fn estimated_size(&self) -> usize {
63
+ self.series.borrow().estimated_size()
64
+ }
65
+
66
+ pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
67
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
68
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
69
+ let v_trunc = &val[..val
70
+ .char_indices()
71
+ .take(str_lengths)
72
+ .last()
73
+ .map(|(i, c)| i + c.len_utf8())
74
+ .unwrap_or(0)];
75
+ if val == v_trunc {
76
+ val
77
+ } else {
78
+ format!("{}…", v_trunc)
79
+ }
80
+ } else {
81
+ val
82
+ }
83
+ }
84
+
85
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
86
+ let series = self.series.borrow_mut().rechunk();
87
+ if in_place {
88
+ *self.series.borrow_mut() = series;
89
+ None
90
+ } else {
91
+ Some(series.into())
92
+ }
93
+ }
94
+
95
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
96
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
97
+ }
98
+
99
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
100
+ let out = (&*self.series.borrow() & &*other.series.borrow()).map_err(RbPolarsErr::from)?;
101
+ Ok(out.into())
102
+ }
103
+
104
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
105
+ let out = (&*self.series.borrow() | &*other.series.borrow()).map_err(RbPolarsErr::from)?;
106
+ Ok(out.into())
107
+ }
108
+
109
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
110
+ let out = (&*self.series.borrow() ^ &*other.series.borrow()).map_err(RbPolarsErr::from)?;
111
+ Ok(out.into())
112
+ }
113
+
114
+ pub fn chunk_lengths(&self) -> Vec<usize> {
115
+ self.series.borrow().chunk_lengths().collect()
116
+ }
117
+
118
+ pub fn name(&self) -> String {
119
+ self.series.borrow().name().to_string()
120
+ }
121
+
122
+ pub fn rename(&self, name: String) {
123
+ self.series.borrow_mut().rename(name.into());
124
+ }
125
+
126
+ pub fn dtype(&self) -> Value {
127
+ Wrap(self.series.borrow().dtype().clone()).into_value()
128
+ }
129
+
130
+ pub fn inner_dtype(&self) -> Option<Value> {
131
+ self.series
132
+ .borrow()
133
+ .dtype()
134
+ .inner_dtype()
135
+ .map(|dt| Wrap(dt.clone()).into_value())
136
+ }
137
+
138
+ pub fn set_sorted_flag(&self, descending: bool) -> Self {
139
+ let mut out = self.series.borrow().clone();
140
+ if descending {
141
+ out.set_sorted_flag(IsSorted::Descending);
142
+ } else {
143
+ out.set_sorted_flag(IsSorted::Ascending)
144
+ }
145
+ out.into()
146
+ }
147
+
148
+ pub fn n_chunks(&self) -> usize {
149
+ self.series.borrow().n_chunks()
150
+ }
151
+
152
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
153
+ let mut binding = self.series.borrow_mut();
154
+ let res = binding.append(&other.series.borrow());
155
+ if let Err(e) = res {
156
+ Err(Error::new(exception::runtime_error(), e.to_string()))
157
+ } else {
158
+ Ok(())
159
+ }
160
+ }
161
+
162
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
163
+ self.series
164
+ .borrow_mut()
165
+ .extend(&other.series.borrow())
166
+ .map_err(RbPolarsErr::from)?;
167
+ Ok(())
168
+ }
169
+
170
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
171
+ if index >= self.series.borrow().len() {
172
+ Err(Error::new(exception::arg_error(), "index is out of bounds"))
173
+ } else {
174
+ Ok(self.series.borrow().new_from_index(index, length).into())
175
+ }
176
+ }
177
+
178
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
179
+ let filter_series = &filter.series.borrow();
180
+ if let Ok(ca) = filter_series.bool() {
181
+ let series = self.series.borrow().filter(ca).unwrap();
182
+ Ok(series.into())
183
+ } else {
184
+ Err(Error::new(
185
+ exception::runtime_error(),
186
+ "Expected a boolean mask".to_string(),
187
+ ))
188
+ }
189
+ }
190
+
191
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
192
+ Ok(self
193
+ .series
194
+ .borrow_mut()
195
+ .sort(
196
+ SortOptions::default()
197
+ .with_order_descending(descending)
198
+ .with_nulls_last(nulls_last)
199
+ .with_multithreaded(multithreaded),
200
+ )
201
+ .map_err(RbPolarsErr::from)?
202
+ .into())
203
+ }
204
+
205
+ pub fn value_counts(
206
+ &self,
207
+ sort: bool,
208
+ parallel: bool,
209
+ name: String,
210
+ normalize: bool,
211
+ ) -> RbResult<RbDataFrame> {
212
+ let out = self
213
+ .series
214
+ .borrow()
215
+ .value_counts(sort, parallel, name.into(), normalize)
216
+ .map_err(RbPolarsErr::from)?;
217
+ Ok(out.into())
218
+ }
219
+
220
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
221
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
222
+ self.series.borrow().slice(offset, length).into()
223
+ }
224
+
225
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
226
+ let binding = indices.series.borrow();
227
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
228
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
229
+ Ok(RbSeries::new(take))
230
+ }
231
+
232
+ pub fn null_count(&self) -> RbResult<usize> {
233
+ Ok(self.series.borrow().null_count())
234
+ }
235
+
236
+ pub fn has_nulls(&self) -> bool {
237
+ self.series.borrow().has_nulls()
238
+ }
239
+
240
+ pub fn sample_n(
241
+ &self,
242
+ n: usize,
243
+ with_replacement: bool,
244
+ shuffle: bool,
245
+ seed: Option<u64>,
246
+ ) -> RbResult<Self> {
247
+ let s = self
248
+ .series
249
+ .borrow()
250
+ .sample_n(n, with_replacement, shuffle, seed)
251
+ .map_err(RbPolarsErr::from)?;
252
+ Ok(s.into())
253
+ }
254
+
255
+ pub fn sample_frac(
256
+ &self,
257
+ frac: f64,
258
+ with_replacement: bool,
259
+ shuffle: bool,
260
+ seed: Option<u64>,
261
+ ) -> RbResult<Self> {
262
+ let s = self
263
+ .series
264
+ .borrow()
265
+ .sample_frac(frac, with_replacement, shuffle, seed)
266
+ .map_err(RbPolarsErr::from)?;
267
+ Ok(s.into())
268
+ }
269
+
270
+ pub fn equals(
271
+ &self,
272
+ other: &RbSeries,
273
+ check_dtypes: bool,
274
+ check_names: bool,
275
+ null_equal: bool,
276
+ ) -> bool {
277
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
278
+ return false;
279
+ }
280
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
281
+ return false;
282
+ }
283
+ if null_equal {
284
+ self.series.borrow().equals_missing(&other.series.borrow())
285
+ } else {
286
+ self.series.borrow().equals(&other.series.borrow())
287
+ }
288
+ }
289
+
290
+ pub fn not(&self) -> RbResult<Self> {
291
+ let binding = self.series.borrow();
292
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
293
+ Ok((!bool).into_series().into())
294
+ }
295
+
296
+ pub fn to_s(&self) -> String {
297
+ format!("{}", self.series.borrow())
298
+ }
299
+
300
+ pub fn len(&self) -> usize {
301
+ self.series.borrow().len()
302
+ }
303
+
304
+ pub fn clone(&self) -> Self {
305
+ RbSeries::new(self.series.borrow().clone())
306
+ }
307
+
308
+ pub fn apply_lambda(
309
+ &self,
310
+ lambda: Value,
311
+ output_type: Option<Wrap<DataType>>,
312
+ skip_nulls: bool,
313
+ ) -> RbResult<Self> {
314
+ let series = &self.series.borrow();
315
+
316
+ let output_type = output_type.map(|dt| dt.0);
317
+
318
+ macro_rules! dispatch_apply {
319
+ ($self:expr, $method:ident, $($args:expr),*) => {
320
+ if matches!($self.dtype(), DataType::Object(_, _)) {
321
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
322
+ // ca.$method($($args),*)
323
+ todo!()
324
+ } else {
325
+ apply_method_all_arrow_series2!(
326
+ $self,
327
+ $method,
328
+ $($args),*
329
+ )
330
+ }
331
+
332
+ }
333
+
334
+ }
335
+
336
+ if matches!(
337
+ series.dtype(),
338
+ DataType::Datetime(_, _)
339
+ | DataType::Date
340
+ | DataType::Duration(_)
341
+ | DataType::Categorical(_, _)
342
+ | DataType::Time
343
+ ) || !skip_nulls
344
+ {
345
+ let mut avs = Vec::with_capacity(series.len());
346
+ let iter = series.iter().map(|av| {
347
+ let input = Wrap(av);
348
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
349
+ .unwrap()
350
+ .0
351
+ });
352
+ avs.extend(iter);
353
+ return Ok(Series::new(self.name().into(), &avs).into());
354
+ }
355
+
356
+ let out = match output_type {
357
+ Some(DataType::Int8) => {
358
+ let ca: Int8Chunked = dispatch_apply!(
359
+ series,
360
+ apply_lambda_with_primitive_out_type,
361
+ lambda,
362
+ 0,
363
+ None
364
+ )?;
365
+ ca.into_series()
366
+ }
367
+ Some(DataType::Int16) => {
368
+ let ca: Int16Chunked = dispatch_apply!(
369
+ series,
370
+ apply_lambda_with_primitive_out_type,
371
+ lambda,
372
+ 0,
373
+ None
374
+ )?;
375
+ ca.into_series()
376
+ }
377
+ Some(DataType::Int32) => {
378
+ let ca: Int32Chunked = dispatch_apply!(
379
+ series,
380
+ apply_lambda_with_primitive_out_type,
381
+ lambda,
382
+ 0,
383
+ None
384
+ )?;
385
+ ca.into_series()
386
+ }
387
+ Some(DataType::Int64) => {
388
+ let ca: Int64Chunked = dispatch_apply!(
389
+ series,
390
+ apply_lambda_with_primitive_out_type,
391
+ lambda,
392
+ 0,
393
+ None
394
+ )?;
395
+ ca.into_series()
396
+ }
397
+ Some(DataType::UInt8) => {
398
+ let ca: UInt8Chunked = dispatch_apply!(
399
+ series,
400
+ apply_lambda_with_primitive_out_type,
401
+ lambda,
402
+ 0,
403
+ None
404
+ )?;
405
+ ca.into_series()
406
+ }
407
+ Some(DataType::UInt16) => {
408
+ let ca: UInt16Chunked = dispatch_apply!(
409
+ series,
410
+ apply_lambda_with_primitive_out_type,
411
+ lambda,
412
+ 0,
413
+ None
414
+ )?;
415
+ ca.into_series()
416
+ }
417
+ Some(DataType::UInt32) => {
418
+ let ca: UInt32Chunked = dispatch_apply!(
419
+ series,
420
+ apply_lambda_with_primitive_out_type,
421
+ lambda,
422
+ 0,
423
+ None
424
+ )?;
425
+ ca.into_series()
426
+ }
427
+ Some(DataType::UInt64) => {
428
+ let ca: UInt64Chunked = dispatch_apply!(
429
+ series,
430
+ apply_lambda_with_primitive_out_type,
431
+ lambda,
432
+ 0,
433
+ None
434
+ )?;
435
+ ca.into_series()
436
+ }
437
+ Some(DataType::Float32) => {
438
+ let ca: Float32Chunked = dispatch_apply!(
439
+ series,
440
+ apply_lambda_with_primitive_out_type,
441
+ lambda,
442
+ 0,
443
+ None
444
+ )?;
445
+ ca.into_series()
446
+ }
447
+ Some(DataType::Float64) => {
448
+ let ca: Float64Chunked = dispatch_apply!(
449
+ series,
450
+ apply_lambda_with_primitive_out_type,
451
+ lambda,
452
+ 0,
453
+ None
454
+ )?;
455
+ ca.into_series()
456
+ }
457
+ Some(DataType::Boolean) => {
458
+ let ca: BooleanChunked =
459
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
460
+ ca.into_series()
461
+ }
462
+ Some(DataType::Date) => {
463
+ let ca: Int32Chunked = dispatch_apply!(
464
+ series,
465
+ apply_lambda_with_primitive_out_type,
466
+ lambda,
467
+ 0,
468
+ None
469
+ )?;
470
+ ca.into_date().into_series()
471
+ }
472
+ Some(DataType::Datetime(tu, tz)) => {
473
+ let ca: Int64Chunked = dispatch_apply!(
474
+ series,
475
+ apply_lambda_with_primitive_out_type,
476
+ lambda,
477
+ 0,
478
+ None
479
+ )?;
480
+ ca.into_datetime(tu, tz).into_series()
481
+ }
482
+ Some(DataType::String) => {
483
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
484
+
485
+ ca.into_series()
486
+ }
487
+ Some(DataType::Object(_, _)) => {
488
+ let ca =
489
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
490
+ ca.into_series()
491
+ }
492
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
493
+
494
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
495
+ };
496
+
497
+ Ok(RbSeries::new(out))
498
+ }
499
+
500
+ pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
501
+ let binding = mask.series.borrow();
502
+ let mask = binding.bool().map_err(RbPolarsErr::from)?;
503
+ let s = self
504
+ .series
505
+ .borrow()
506
+ .zip_with(mask, &other.series.borrow())
507
+ .map_err(RbPolarsErr::from)?;
508
+ Ok(RbSeries::new(s))
509
+ }
510
+
511
+ pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
512
+ let df = self
513
+ .series
514
+ .borrow()
515
+ .to_dummies(sep.as_deref(), drop_first)
516
+ .map_err(RbPolarsErr::from)?;
517
+ Ok(df.into())
518
+ }
519
+
520
+ pub fn n_unique(&self) -> RbResult<usize> {
521
+ let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
522
+ Ok(n)
523
+ }
524
+
525
+ pub fn floor(&self) -> RbResult<Self> {
526
+ let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
527
+ Ok(s.into())
528
+ }
529
+
530
+ pub fn shrink_to_fit(&self) {
531
+ self.series.borrow_mut().shrink_to_fit();
532
+ }
533
+
534
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
535
+ let out = self
536
+ .series
537
+ .borrow()
538
+ .dot(&other.series.borrow())
539
+ .map_err(RbPolarsErr::from)?;
540
+ Ok(out)
541
+ }
542
+
543
+ pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
544
+ let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
545
+ Ok(out)
546
+ }
547
+
548
+ pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
549
+ let out = self
550
+ .series
551
+ .borrow()
552
+ .kurtosis(fisher, bias)
553
+ .map_err(RbPolarsErr::from)?;
554
+ Ok(out)
555
+ }
556
+
557
+ pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
558
+ let dtype = dtype.0;
559
+ let out = if strict {
560
+ self.series.borrow().strict_cast(&dtype)
561
+ } else {
562
+ self.series.borrow().cast(&dtype)
563
+ };
564
+ let out = out.map_err(RbPolarsErr::from)?;
565
+ Ok(out.into())
566
+ }
567
+
568
+ pub fn time_unit(&self) -> Option<String> {
569
+ if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
570
+ Some(
571
+ match tu {
572
+ TimeUnit::Nanoseconds => "ns",
573
+ TimeUnit::Microseconds => "us",
574
+ TimeUnit::Milliseconds => "ms",
575
+ }
576
+ .to_string(),
577
+ )
578
+ } else {
579
+ None
580
+ }
581
+ }
582
+ }
583
+
584
+ macro_rules! impl_set_with_mask {
585
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
586
+ fn $name(
587
+ series: &Series,
588
+ filter: &RbSeries,
589
+ value: Option<$native>,
590
+ ) -> PolarsResult<Series> {
591
+ let binding = filter.series.borrow();
592
+ let mask = binding.bool()?;
593
+ let ca = series.$cast()?;
594
+ let new = ca.set(mask, value)?;
595
+ Ok(new.into_series())
596
+ }
597
+
598
+ impl RbSeries {
599
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
600
+ let series =
601
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
602
+ Ok(Self::new(series))
603
+ }
604
+ }
605
+ };
606
+ }
607
+
608
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
609
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
610
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
611
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
612
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
613
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
614
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
615
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
616
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
617
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
618
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
619
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
620
+
621
+ impl RbSeries {
622
+ pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
623
+ Ok(self
624
+ .series
625
+ .borrow()
626
+ .clone()
627
+ .extend_constant(value.0, n)
628
+ .map_err(RbPolarsErr::from)?
629
+ .into())
630
+ }
631
+ }
@@ -0,0 +1,55 @@
1
+ use magnus::prelude::*;
2
+ use magnus::Value;
3
+ use polars::export::arrow::array::Array;
4
+ use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
+ use polars::prelude::*;
6
+
7
+ use super::RbSeries;
8
+
9
+ use crate::exceptions::RbValueError;
10
+ use crate::RbResult;
11
+
12
+ /// Import `arrow_c_stream` across Ruby boundary.
13
+ fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
14
+ let capsule = ob.funcall("arrow_c_stream", ())?;
15
+ Ok(capsule)
16
+ }
17
+
18
+ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
19
+ let capsule_pointer: usize = capsule.funcall("to_i", ())?;
20
+
21
+ // # Safety
22
+ // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
23
+ // Interface
24
+ let mut stream = unsafe {
25
+ // Takes ownership of the pointed to ArrowArrayStream
26
+ // This acts to move the data out of the capsule pointer, setting the release callback to NULL
27
+ let stream_ptr = Box::new(std::ptr::replace(
28
+ capsule_pointer as _,
29
+ ArrowArrayStream::empty(),
30
+ ));
31
+ ArrowArrayStreamReader::try_new(stream_ptr)
32
+ .map_err(|err| RbValueError::new_err(err.to_string()))?
33
+ };
34
+
35
+ let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
36
+ while let Some(array) = unsafe { stream.next() } {
37
+ produced_arrays.push(array.unwrap());
38
+ }
39
+
40
+ // Series::try_from fails for an empty vec of chunks
41
+ let s = if produced_arrays.is_empty() {
42
+ let polars_dt = DataType::from_arrow_field(stream.field());
43
+ Series::new_empty(stream.field().name.clone(), &polars_dt)
44
+ } else {
45
+ Series::try_from((stream.field(), produced_arrays)).unwrap()
46
+ };
47
+ Ok(RbSeries::new(s))
48
+ }
49
+
50
+ impl RbSeries {
51
+ pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
52
+ let capsule = call_arrow_c_stream(ob)?;
53
+ import_stream_rbcapsule(capsule)
54
+ }
55
+ }