polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,631 @@
1
+ use magnus::{exception, Error, IntoValue, Value};
2
+ use polars::prelude::*;
3
+ use polars::series::IsSorted;
4
+
5
+ use crate::apply_method_all_arrow_series2;
6
+ use crate::conversion::*;
7
+ use crate::map::series::{call_lambda_and_extract, ApplyLambda};
8
+ use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries};
9
+
10
+ impl RbSeries {
11
+ pub fn struct_unnest(&self) -> RbResult<RbDataFrame> {
12
+ let binding = self.series.borrow();
13
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
14
+ let df: DataFrame = ca.clone().unnest();
15
+ Ok(df.into())
16
+ }
17
+
18
+ // TODO add to Ruby
19
+ pub fn struct_fields(&self) -> RbResult<Vec<String>> {
20
+ let binding = self.series.borrow();
21
+ let ca = binding.struct_().map_err(RbPolarsErr::from)?;
22
+ Ok(ca
23
+ .struct_fields()
24
+ .iter()
25
+ .map(|s| s.name().to_string())
26
+ .collect())
27
+ }
28
+
29
+ pub fn is_sorted_ascending_flag(&self) -> bool {
30
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Ascending)
31
+ }
32
+
33
+ pub fn is_sorted_descending_flag(&self) -> bool {
34
+ matches!(self.series.borrow().is_sorted_flag(), IsSorted::Descending)
35
+ }
36
+
37
+ pub fn can_fast_explode_flag(&self) -> bool {
38
+ match self.series.borrow().list() {
39
+ Err(_) => false,
40
+ Ok(list) => list._can_fast_explode(),
41
+ }
42
+ }
43
+
44
+ pub fn cat_uses_lexical_ordering(&self) -> RbResult<bool> {
45
+ let binding = self.series.borrow();
46
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
47
+ Ok(ca.uses_lexical_ordering())
48
+ }
49
+
50
+ pub fn cat_is_local(&self) -> RbResult<bool> {
51
+ let binding = self.series.borrow();
52
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
53
+ Ok(ca.get_rev_map().is_local())
54
+ }
55
+
56
+ pub fn cat_to_local(&self) -> RbResult<Self> {
57
+ let binding = self.series.borrow();
58
+ let ca = binding.categorical().map_err(RbPolarsErr::from)?;
59
+ Ok(ca.to_local().into_series().into())
60
+ }
61
+
62
+ pub fn estimated_size(&self) -> usize {
63
+ self.series.borrow().estimated_size()
64
+ }
65
+
66
+ pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
67
+ let val = format!("{}", self.series.borrow().get(index).unwrap());
68
+ if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
69
+ let v_trunc = &val[..val
70
+ .char_indices()
71
+ .take(str_lengths)
72
+ .last()
73
+ .map(|(i, c)| i + c.len_utf8())
74
+ .unwrap_or(0)];
75
+ if val == v_trunc {
76
+ val
77
+ } else {
78
+ format!("{}…", v_trunc)
79
+ }
80
+ } else {
81
+ val
82
+ }
83
+ }
84
+
85
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
86
+ let series = self.series.borrow_mut().rechunk();
87
+ if in_place {
88
+ *self.series.borrow_mut() = series;
89
+ None
90
+ } else {
91
+ Some(series.into())
92
+ }
93
+ }
94
+
95
+ pub fn get_idx(&self, idx: usize) -> RbResult<Value> {
96
+ Ok(Wrap(self.series.borrow().get(idx).map_err(RbPolarsErr::from)?).into_value())
97
+ }
98
+
99
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
100
+ let out = (&*self.series.borrow() & &*other.series.borrow()).map_err(RbPolarsErr::from)?;
101
+ Ok(out.into())
102
+ }
103
+
104
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
105
+ let out = (&*self.series.borrow() | &*other.series.borrow()).map_err(RbPolarsErr::from)?;
106
+ Ok(out.into())
107
+ }
108
+
109
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
110
+ let out = (&*self.series.borrow() ^ &*other.series.borrow()).map_err(RbPolarsErr::from)?;
111
+ Ok(out.into())
112
+ }
113
+
114
+ pub fn chunk_lengths(&self) -> Vec<usize> {
115
+ self.series.borrow().chunk_lengths().collect()
116
+ }
117
+
118
+ pub fn name(&self) -> String {
119
+ self.series.borrow().name().to_string()
120
+ }
121
+
122
+ pub fn rename(&self, name: String) {
123
+ self.series.borrow_mut().rename(name.into());
124
+ }
125
+
126
+ pub fn dtype(&self) -> Value {
127
+ Wrap(self.series.borrow().dtype().clone()).into_value()
128
+ }
129
+
130
+ pub fn inner_dtype(&self) -> Option<Value> {
131
+ self.series
132
+ .borrow()
133
+ .dtype()
134
+ .inner_dtype()
135
+ .map(|dt| Wrap(dt.clone()).into_value())
136
+ }
137
+
138
+ pub fn set_sorted_flag(&self, descending: bool) -> Self {
139
+ let mut out = self.series.borrow().clone();
140
+ if descending {
141
+ out.set_sorted_flag(IsSorted::Descending);
142
+ } else {
143
+ out.set_sorted_flag(IsSorted::Ascending)
144
+ }
145
+ out.into()
146
+ }
147
+
148
+ pub fn n_chunks(&self) -> usize {
149
+ self.series.borrow().n_chunks()
150
+ }
151
+
152
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
153
+ let mut binding = self.series.borrow_mut();
154
+ let res = binding.append(&other.series.borrow());
155
+ if let Err(e) = res {
156
+ Err(Error::new(exception::runtime_error(), e.to_string()))
157
+ } else {
158
+ Ok(())
159
+ }
160
+ }
161
+
162
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
163
+ self.series
164
+ .borrow_mut()
165
+ .extend(&other.series.borrow())
166
+ .map_err(RbPolarsErr::from)?;
167
+ Ok(())
168
+ }
169
+
170
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
171
+ if index >= self.series.borrow().len() {
172
+ Err(Error::new(exception::arg_error(), "index is out of bounds"))
173
+ } else {
174
+ Ok(self.series.borrow().new_from_index(index, length).into())
175
+ }
176
+ }
177
+
178
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
179
+ let filter_series = &filter.series.borrow();
180
+ if let Ok(ca) = filter_series.bool() {
181
+ let series = self.series.borrow().filter(ca).unwrap();
182
+ Ok(series.into())
183
+ } else {
184
+ Err(Error::new(
185
+ exception::runtime_error(),
186
+ "Expected a boolean mask".to_string(),
187
+ ))
188
+ }
189
+ }
190
+
191
+ pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
192
+ Ok(self
193
+ .series
194
+ .borrow_mut()
195
+ .sort(
196
+ SortOptions::default()
197
+ .with_order_descending(descending)
198
+ .with_nulls_last(nulls_last)
199
+ .with_multithreaded(multithreaded),
200
+ )
201
+ .map_err(RbPolarsErr::from)?
202
+ .into())
203
+ }
204
+
205
+ pub fn value_counts(
206
+ &self,
207
+ sort: bool,
208
+ parallel: bool,
209
+ name: String,
210
+ normalize: bool,
211
+ ) -> RbResult<RbDataFrame> {
212
+ let out = self
213
+ .series
214
+ .borrow()
215
+ .value_counts(sort, parallel, name.into(), normalize)
216
+ .map_err(RbPolarsErr::from)?;
217
+ Ok(out.into())
218
+ }
219
+
220
+ pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
221
+ let length = length.unwrap_or_else(|| self.series.borrow().len());
222
+ self.series.borrow().slice(offset, length).into()
223
+ }
224
+
225
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
226
+ let binding = indices.series.borrow();
227
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
228
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
229
+ Ok(RbSeries::new(take))
230
+ }
231
+
232
+ pub fn null_count(&self) -> RbResult<usize> {
233
+ Ok(self.series.borrow().null_count())
234
+ }
235
+
236
+ pub fn has_nulls(&self) -> bool {
237
+ self.series.borrow().has_nulls()
238
+ }
239
+
240
+ pub fn sample_n(
241
+ &self,
242
+ n: usize,
243
+ with_replacement: bool,
244
+ shuffle: bool,
245
+ seed: Option<u64>,
246
+ ) -> RbResult<Self> {
247
+ let s = self
248
+ .series
249
+ .borrow()
250
+ .sample_n(n, with_replacement, shuffle, seed)
251
+ .map_err(RbPolarsErr::from)?;
252
+ Ok(s.into())
253
+ }
254
+
255
+ pub fn sample_frac(
256
+ &self,
257
+ frac: f64,
258
+ with_replacement: bool,
259
+ shuffle: bool,
260
+ seed: Option<u64>,
261
+ ) -> RbResult<Self> {
262
+ let s = self
263
+ .series
264
+ .borrow()
265
+ .sample_frac(frac, with_replacement, shuffle, seed)
266
+ .map_err(RbPolarsErr::from)?;
267
+ Ok(s.into())
268
+ }
269
+
270
+ pub fn equals(
271
+ &self,
272
+ other: &RbSeries,
273
+ check_dtypes: bool,
274
+ check_names: bool,
275
+ null_equal: bool,
276
+ ) -> bool {
277
+ if check_dtypes && (self.series.borrow().dtype() != other.series.borrow().dtype()) {
278
+ return false;
279
+ }
280
+ if check_names && (self.series.borrow().name() != other.series.borrow().name()) {
281
+ return false;
282
+ }
283
+ if null_equal {
284
+ self.series.borrow().equals_missing(&other.series.borrow())
285
+ } else {
286
+ self.series.borrow().equals(&other.series.borrow())
287
+ }
288
+ }
289
+
290
+ pub fn not(&self) -> RbResult<Self> {
291
+ let binding = self.series.borrow();
292
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
293
+ Ok((!bool).into_series().into())
294
+ }
295
+
296
+ pub fn to_s(&self) -> String {
297
+ format!("{}", self.series.borrow())
298
+ }
299
+
300
+ pub fn len(&self) -> usize {
301
+ self.series.borrow().len()
302
+ }
303
+
304
+ pub fn clone(&self) -> Self {
305
+ RbSeries::new(self.series.borrow().clone())
306
+ }
307
+
308
+ pub fn apply_lambda(
309
+ &self,
310
+ lambda: Value,
311
+ output_type: Option<Wrap<DataType>>,
312
+ skip_nulls: bool,
313
+ ) -> RbResult<Self> {
314
+ let series = &self.series.borrow();
315
+
316
+ let output_type = output_type.map(|dt| dt.0);
317
+
318
+ macro_rules! dispatch_apply {
319
+ ($self:expr, $method:ident, $($args:expr),*) => {
320
+ if matches!($self.dtype(), DataType::Object(_, _)) {
321
+ // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
322
+ // ca.$method($($args),*)
323
+ todo!()
324
+ } else {
325
+ apply_method_all_arrow_series2!(
326
+ $self,
327
+ $method,
328
+ $($args),*
329
+ )
330
+ }
331
+
332
+ }
333
+
334
+ }
335
+
336
+ if matches!(
337
+ series.dtype(),
338
+ DataType::Datetime(_, _)
339
+ | DataType::Date
340
+ | DataType::Duration(_)
341
+ | DataType::Categorical(_, _)
342
+ | DataType::Time
343
+ ) || !skip_nulls
344
+ {
345
+ let mut avs = Vec::with_capacity(series.len());
346
+ let iter = series.iter().map(|av| {
347
+ let input = Wrap(av);
348
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, input)
349
+ .unwrap()
350
+ .0
351
+ });
352
+ avs.extend(iter);
353
+ return Ok(Series::new(self.name().into(), &avs).into());
354
+ }
355
+
356
+ let out = match output_type {
357
+ Some(DataType::Int8) => {
358
+ let ca: Int8Chunked = dispatch_apply!(
359
+ series,
360
+ apply_lambda_with_primitive_out_type,
361
+ lambda,
362
+ 0,
363
+ None
364
+ )?;
365
+ ca.into_series()
366
+ }
367
+ Some(DataType::Int16) => {
368
+ let ca: Int16Chunked = dispatch_apply!(
369
+ series,
370
+ apply_lambda_with_primitive_out_type,
371
+ lambda,
372
+ 0,
373
+ None
374
+ )?;
375
+ ca.into_series()
376
+ }
377
+ Some(DataType::Int32) => {
378
+ let ca: Int32Chunked = dispatch_apply!(
379
+ series,
380
+ apply_lambda_with_primitive_out_type,
381
+ lambda,
382
+ 0,
383
+ None
384
+ )?;
385
+ ca.into_series()
386
+ }
387
+ Some(DataType::Int64) => {
388
+ let ca: Int64Chunked = dispatch_apply!(
389
+ series,
390
+ apply_lambda_with_primitive_out_type,
391
+ lambda,
392
+ 0,
393
+ None
394
+ )?;
395
+ ca.into_series()
396
+ }
397
+ Some(DataType::UInt8) => {
398
+ let ca: UInt8Chunked = dispatch_apply!(
399
+ series,
400
+ apply_lambda_with_primitive_out_type,
401
+ lambda,
402
+ 0,
403
+ None
404
+ )?;
405
+ ca.into_series()
406
+ }
407
+ Some(DataType::UInt16) => {
408
+ let ca: UInt16Chunked = dispatch_apply!(
409
+ series,
410
+ apply_lambda_with_primitive_out_type,
411
+ lambda,
412
+ 0,
413
+ None
414
+ )?;
415
+ ca.into_series()
416
+ }
417
+ Some(DataType::UInt32) => {
418
+ let ca: UInt32Chunked = dispatch_apply!(
419
+ series,
420
+ apply_lambda_with_primitive_out_type,
421
+ lambda,
422
+ 0,
423
+ None
424
+ )?;
425
+ ca.into_series()
426
+ }
427
+ Some(DataType::UInt64) => {
428
+ let ca: UInt64Chunked = dispatch_apply!(
429
+ series,
430
+ apply_lambda_with_primitive_out_type,
431
+ lambda,
432
+ 0,
433
+ None
434
+ )?;
435
+ ca.into_series()
436
+ }
437
+ Some(DataType::Float32) => {
438
+ let ca: Float32Chunked = dispatch_apply!(
439
+ series,
440
+ apply_lambda_with_primitive_out_type,
441
+ lambda,
442
+ 0,
443
+ None
444
+ )?;
445
+ ca.into_series()
446
+ }
447
+ Some(DataType::Float64) => {
448
+ let ca: Float64Chunked = dispatch_apply!(
449
+ series,
450
+ apply_lambda_with_primitive_out_type,
451
+ lambda,
452
+ 0,
453
+ None
454
+ )?;
455
+ ca.into_series()
456
+ }
457
+ Some(DataType::Boolean) => {
458
+ let ca: BooleanChunked =
459
+ dispatch_apply!(series, apply_lambda_with_bool_out_type, lambda, 0, None)?;
460
+ ca.into_series()
461
+ }
462
+ Some(DataType::Date) => {
463
+ let ca: Int32Chunked = dispatch_apply!(
464
+ series,
465
+ apply_lambda_with_primitive_out_type,
466
+ lambda,
467
+ 0,
468
+ None
469
+ )?;
470
+ ca.into_date().into_series()
471
+ }
472
+ Some(DataType::Datetime(tu, tz)) => {
473
+ let ca: Int64Chunked = dispatch_apply!(
474
+ series,
475
+ apply_lambda_with_primitive_out_type,
476
+ lambda,
477
+ 0,
478
+ None
479
+ )?;
480
+ ca.into_datetime(tu, tz).into_series()
481
+ }
482
+ Some(DataType::String) => {
483
+ let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
484
+
485
+ ca.into_series()
486
+ }
487
+ Some(DataType::Object(_, _)) => {
488
+ let ca =
489
+ dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
490
+ ca.into_series()
491
+ }
492
+ None => return dispatch_apply!(series, apply_lambda_unknown, lambda),
493
+
494
+ _ => return dispatch_apply!(series, apply_lambda_unknown, lambda),
495
+ };
496
+
497
+ Ok(RbSeries::new(out))
498
+ }
499
+
500
+ pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
501
+ let binding = mask.series.borrow();
502
+ let mask = binding.bool().map_err(RbPolarsErr::from)?;
503
+ let s = self
504
+ .series
505
+ .borrow()
506
+ .zip_with(mask, &other.series.borrow())
507
+ .map_err(RbPolarsErr::from)?;
508
+ Ok(RbSeries::new(s))
509
+ }
510
+
511
+ pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
512
+ let df = self
513
+ .series
514
+ .borrow()
515
+ .to_dummies(sep.as_deref(), drop_first)
516
+ .map_err(RbPolarsErr::from)?;
517
+ Ok(df.into())
518
+ }
519
+
520
+ pub fn n_unique(&self) -> RbResult<usize> {
521
+ let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
522
+ Ok(n)
523
+ }
524
+
525
+ pub fn floor(&self) -> RbResult<Self> {
526
+ let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
527
+ Ok(s.into())
528
+ }
529
+
530
+ pub fn shrink_to_fit(&self) {
531
+ self.series.borrow_mut().shrink_to_fit();
532
+ }
533
+
534
+ pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
535
+ let out = self
536
+ .series
537
+ .borrow()
538
+ .dot(&other.series.borrow())
539
+ .map_err(RbPolarsErr::from)?;
540
+ Ok(out)
541
+ }
542
+
543
+ pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
544
+ let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
545
+ Ok(out)
546
+ }
547
+
548
+ pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
549
+ let out = self
550
+ .series
551
+ .borrow()
552
+ .kurtosis(fisher, bias)
553
+ .map_err(RbPolarsErr::from)?;
554
+ Ok(out)
555
+ }
556
+
557
+ pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
558
+ let dtype = dtype.0;
559
+ let out = if strict {
560
+ self.series.borrow().strict_cast(&dtype)
561
+ } else {
562
+ self.series.borrow().cast(&dtype)
563
+ };
564
+ let out = out.map_err(RbPolarsErr::from)?;
565
+ Ok(out.into())
566
+ }
567
+
568
+ pub fn time_unit(&self) -> Option<String> {
569
+ if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
570
+ Some(
571
+ match tu {
572
+ TimeUnit::Nanoseconds => "ns",
573
+ TimeUnit::Microseconds => "us",
574
+ TimeUnit::Milliseconds => "ms",
575
+ }
576
+ .to_string(),
577
+ )
578
+ } else {
579
+ None
580
+ }
581
+ }
582
+ }
583
+
584
+ macro_rules! impl_set_with_mask {
585
+ ($name:ident, $native:ty, $cast:ident, $variant:ident) => {
586
+ fn $name(
587
+ series: &Series,
588
+ filter: &RbSeries,
589
+ value: Option<$native>,
590
+ ) -> PolarsResult<Series> {
591
+ let binding = filter.series.borrow();
592
+ let mask = binding.bool()?;
593
+ let ca = series.$cast()?;
594
+ let new = ca.set(mask, value)?;
595
+ Ok(new.into_series())
596
+ }
597
+
598
+ impl RbSeries {
599
+ pub fn $name(&self, filter: &RbSeries, value: Option<$native>) -> RbResult<Self> {
600
+ let series =
601
+ $name(&self.series.borrow(), filter, value).map_err(RbPolarsErr::from)?;
602
+ Ok(Self::new(series))
603
+ }
604
+ }
605
+ };
606
+ }
607
+
608
+ // impl_set_with_mask!(set_with_mask_str, &str, utf8, Utf8);
609
+ impl_set_with_mask!(set_with_mask_f64, f64, f64, Float64);
610
+ impl_set_with_mask!(set_with_mask_f32, f32, f32, Float32);
611
+ impl_set_with_mask!(set_with_mask_u8, u8, u8, UInt8);
612
+ impl_set_with_mask!(set_with_mask_u16, u16, u16, UInt16);
613
+ impl_set_with_mask!(set_with_mask_u32, u32, u32, UInt32);
614
+ impl_set_with_mask!(set_with_mask_u64, u64, u64, UInt64);
615
+ impl_set_with_mask!(set_with_mask_i8, i8, i8, Int8);
616
+ impl_set_with_mask!(set_with_mask_i16, i16, i16, Int16);
617
+ impl_set_with_mask!(set_with_mask_i32, i32, i32, Int32);
618
+ impl_set_with_mask!(set_with_mask_i64, i64, i64, Int64);
619
+ impl_set_with_mask!(set_with_mask_bool, bool, bool, Boolean);
620
+
621
+ impl RbSeries {
622
+ pub fn extend_constant(&self, value: Wrap<AnyValue>, n: usize) -> RbResult<Self> {
623
+ Ok(self
624
+ .series
625
+ .borrow()
626
+ .clone()
627
+ .extend_constant(value.0, n)
628
+ .map_err(RbPolarsErr::from)?
629
+ .into())
630
+ }
631
+ }
@@ -0,0 +1,55 @@
1
+ use magnus::prelude::*;
2
+ use magnus::Value;
3
+ use polars::export::arrow::array::Array;
4
+ use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
+ use polars::prelude::*;
6
+
7
+ use super::RbSeries;
8
+
9
+ use crate::exceptions::RbValueError;
10
+ use crate::RbResult;
11
+
12
+ /// Import `arrow_c_stream` across Ruby boundary.
13
+ fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
14
+ let capsule = ob.funcall("arrow_c_stream", ())?;
15
+ Ok(capsule)
16
+ }
17
+
18
+ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
19
+ let capsule_pointer: usize = capsule.funcall("to_i", ())?;
20
+
21
+ // # Safety
22
+ // capsule holds a valid C ArrowArrayStream pointer, as defined by the Arrow PyCapsule
23
+ // Interface
24
+ let mut stream = unsafe {
25
+ // Takes ownership of the pointed to ArrowArrayStream
26
+ // This acts to move the data out of the capsule pointer, setting the release callback to NULL
27
+ let stream_ptr = Box::new(std::ptr::replace(
28
+ capsule_pointer as _,
29
+ ArrowArrayStream::empty(),
30
+ ));
31
+ ArrowArrayStreamReader::try_new(stream_ptr)
32
+ .map_err(|err| RbValueError::new_err(err.to_string()))?
33
+ };
34
+
35
+ let mut produced_arrays: Vec<Box<dyn Array>> = vec![];
36
+ while let Some(array) = unsafe { stream.next() } {
37
+ produced_arrays.push(array.unwrap());
38
+ }
39
+
40
+ // Series::try_from fails for an empty vec of chunks
41
+ let s = if produced_arrays.is_empty() {
42
+ let polars_dt = DataType::from_arrow_field(stream.field());
43
+ Series::new_empty(stream.field().name.clone(), &polars_dt)
44
+ } else {
45
+ Series::try_from((stream.field(), produced_arrays)).unwrap()
46
+ };
47
+ Ok(RbSeries::new(s))
48
+ }
49
+
50
+ impl RbSeries {
51
+ pub fn from_arrow_c_stream(ob: Value) -> RbResult<Self> {
52
+ let capsule = call_arrow_c_stream(ob)?;
53
+ import_stream_rbcapsule(capsule)
54
+ }
55
+ }