polars-df 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1173 @@
1
+ use magnus::{class, RHash, TryConvert, Value};
2
+ use polars::prelude::*;
3
+
4
+ use super::*;
5
+ use crate::conversion::slice_to_wrapped;
6
+ use crate::series::RbSeries;
7
+ use crate::{ObjectValue, RbResult};
8
+
9
+ /// Find the output type and dispatch to that implementation.
10
+ fn infer_and_finish<'a, A: ApplyLambda<'a>>(
11
+ applyer: &'a A,
12
+ lambda: Value,
13
+ out: Value,
14
+ null_count: usize,
15
+ ) -> RbResult<RbSeries> {
16
+ if out.is_kind_of(class::true_class()) || out.is_kind_of(class::false_class()) {
17
+ let first_value = out.try_convert::<bool>().unwrap();
18
+ applyer
19
+ .apply_lambda_with_bool_out_type(lambda, null_count, Some(first_value))
20
+ .map(|ca| ca.into_series().into())
21
+ } else if out.is_kind_of(class::float()) {
22
+ let first_value = out.try_convert::<f64>().unwrap();
23
+ applyer
24
+ .apply_lambda_with_primitive_out_type::<Float64Type>(
25
+ lambda,
26
+ null_count,
27
+ Some(first_value),
28
+ )
29
+ .map(|ca| ca.into_series().into())
30
+ } else if out.is_kind_of(class::string()) {
31
+ let first_value = out.try_convert::<String>().unwrap();
32
+ applyer
33
+ .apply_lambda_with_utf8_out_type(lambda, null_count, Some(first_value.as_str()))
34
+ .map(|ca| ca.into_series().into())
35
+ } else if out.respond_to("_s", true)? {
36
+ todo!()
37
+ } else if out.is_kind_of(class::array()) {
38
+ todo!()
39
+ } else if out.is_kind_of(class::hash()) {
40
+ let first = out.try_convert::<Wrap<AnyValue<'_>>>()?;
41
+ applyer.apply_to_struct(lambda, null_count, first.0)
42
+ }
43
+ // this succeeds for numpy ints as well, where checking if it is pyint fails
44
+ // we do this later in the chain so that we don't extract integers from string chars.
45
+ else if out.try_convert::<i64>().is_ok() {
46
+ let first_value = out.try_convert::<i64>().unwrap();
47
+ applyer
48
+ .apply_lambda_with_primitive_out_type::<Int64Type>(
49
+ lambda,
50
+ null_count,
51
+ Some(first_value),
52
+ )
53
+ .map(|ca| ca.into_series().into())
54
+ } else if let Ok(av) = out.try_convert::<Wrap<AnyValue>>() {
55
+ applyer
56
+ .apply_extract_any_values(lambda, null_count, av.0)
57
+ .map(|s| s.into())
58
+ } else {
59
+ applyer
60
+ .apply_lambda_with_object_out_type(lambda, null_count, Some(out.into()))
61
+ .map(|ca| ca.into_series().into())
62
+ }
63
+ }
64
+
65
+ pub trait ApplyLambda<'a> {
66
+ fn apply_lambda_unknown(&'a self, _lambda: Value) -> RbResult<RbSeries>;
67
+
68
+ /// Apply a lambda that doesn't change output types
69
+ fn apply_lambda(&'a self, _lambda: Value) -> RbResult<RbSeries>;
70
+
71
+ // Used to store a struct type
72
+ fn apply_to_struct(
73
+ &'a self,
74
+ lambda: Value,
75
+ init_null_count: usize,
76
+ first_value: AnyValue<'a>,
77
+ ) -> RbResult<RbSeries>;
78
+
79
+ /// Apply a lambda with a primitive output type
80
+ fn apply_lambda_with_primitive_out_type<D>(
81
+ &'a self,
82
+ lambda: Value,
83
+ init_null_count: usize,
84
+ first_value: Option<D::Native>,
85
+ ) -> RbResult<ChunkedArray<D>>
86
+ where
87
+ D: RbArrowPrimitiveType,
88
+ D::Native: Into<Value> + TryConvert;
89
+
90
+ /// Apply a lambda with a boolean output type
91
+ fn apply_lambda_with_bool_out_type(
92
+ &'a self,
93
+ lambda: Value,
94
+ init_null_count: usize,
95
+ first_value: Option<bool>,
96
+ ) -> RbResult<ChunkedArray<BooleanType>>;
97
+
98
+ /// Apply a lambda with utf8 output type
99
+ fn apply_lambda_with_utf8_out_type(
100
+ &'a self,
101
+ lambda: Value,
102
+ init_null_count: usize,
103
+ first_value: Option<&str>,
104
+ ) -> RbResult<Utf8Chunked>;
105
+
106
+ /// Apply a lambda with list output type
107
+ fn apply_lambda_with_list_out_type(
108
+ &'a self,
109
+ lambda: Value,
110
+ init_null_count: usize,
111
+ first_value: &Series,
112
+ dt: &DataType,
113
+ ) -> RbResult<ListChunked>;
114
+
115
+ fn apply_extract_any_values(
116
+ &'a self,
117
+ lambda: Value,
118
+ init_null_count: usize,
119
+ first_value: AnyValue<'a>,
120
+ ) -> RbResult<Series>;
121
+
122
+ /// Apply a lambda with list output type
123
+ fn apply_lambda_with_object_out_type(
124
+ &'a self,
125
+ lambda: Value,
126
+ init_null_count: usize,
127
+ first_value: Option<ObjectValue>,
128
+ ) -> RbResult<ObjectChunked<ObjectValue>>;
129
+ }
130
+
131
+ pub fn call_lambda<T>(lambda: Value, in_val: T) -> RbResult<Value>
132
+ where
133
+ T: Into<Value>,
134
+ {
135
+ lambda.funcall("call", (in_val,))
136
+ }
137
+
138
+ pub(crate) fn call_lambda_and_extract<T, S>(lambda: Value, in_val: T) -> RbResult<S>
139
+ where
140
+ T: Into<Value>,
141
+ S: TryConvert,
142
+ {
143
+ match call_lambda(lambda, in_val) {
144
+ Ok(out) => out.try_convert::<S>(),
145
+ Err(e) => panic!("ruby function failed {}", e),
146
+ }
147
+ }
148
+
149
+ fn call_lambda_series_out<T>(lambda: Value, in_val: T) -> RbResult<Series>
150
+ where
151
+ T: Into<Value>,
152
+ {
153
+ let out: Value = lambda.funcall("call", (in_val,))?;
154
+ let py_series: Value = out.funcall("_s", ())?;
155
+ Ok(py_series
156
+ .try_convert::<&RbSeries>()
157
+ .unwrap()
158
+ .series
159
+ .borrow()
160
+ .clone())
161
+ }
162
+
163
+ impl<'a> ApplyLambda<'a> for BooleanChunked {
164
+ fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
165
+ let mut null_count = 0;
166
+ for opt_v in self.into_iter() {
167
+ if let Some(v) = opt_v {
168
+ let arg = (v,);
169
+ let out: Value = lambda.funcall("call", arg)?;
170
+ if out.is_nil() {
171
+ null_count += 1;
172
+ continue;
173
+ }
174
+ return infer_and_finish(self, lambda, out, null_count);
175
+ } else {
176
+ null_count += 1
177
+ }
178
+ }
179
+ Ok(Self::full_null(self.name(), self.len())
180
+ .into_series()
181
+ .into())
182
+ }
183
+
184
+ fn apply_lambda(&'a self, lambda: Value) -> RbResult<RbSeries> {
185
+ self.apply_lambda_with_bool_out_type(lambda, 0, None)
186
+ .map(|ca| RbSeries::new(ca.into_series()))
187
+ }
188
+
189
+ fn apply_to_struct(
190
+ &'a self,
191
+ lambda: Value,
192
+ init_null_count: usize,
193
+ first_value: AnyValue<'a>,
194
+ ) -> RbResult<RbSeries> {
195
+ let skip = 1;
196
+ if !self.has_validity() {
197
+ let it = self
198
+ .into_no_null_iter()
199
+ .skip(init_null_count + skip)
200
+ .map(|val| call_lambda(lambda, val).ok());
201
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
202
+ } else {
203
+ let it = self
204
+ .into_iter()
205
+ .skip(init_null_count + skip)
206
+ .map(|opt_val| opt_val.and_then(|val| call_lambda(lambda, val).ok()));
207
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
208
+ }
209
+ }
210
+
211
+ fn apply_lambda_with_primitive_out_type<D>(
212
+ &'a self,
213
+ lambda: Value,
214
+ init_null_count: usize,
215
+ first_value: Option<D::Native>,
216
+ ) -> RbResult<ChunkedArray<D>>
217
+ where
218
+ D: RbArrowPrimitiveType,
219
+ D::Native: Into<Value> + TryConvert,
220
+ {
221
+ let skip = usize::from(first_value.is_some());
222
+ if init_null_count == self.len() {
223
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
224
+ } else if !self.has_validity() {
225
+ let it = self
226
+ .into_no_null_iter()
227
+ .skip(init_null_count + skip)
228
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
229
+ Ok(iterator_to_primitive(
230
+ it,
231
+ init_null_count,
232
+ first_value,
233
+ self.name(),
234
+ self.len(),
235
+ ))
236
+ } else {
237
+ let it = self
238
+ .into_iter()
239
+ .skip(init_null_count + skip)
240
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
241
+ Ok(iterator_to_primitive(
242
+ it,
243
+ init_null_count,
244
+ first_value,
245
+ self.name(),
246
+ self.len(),
247
+ ))
248
+ }
249
+ }
250
+
251
+ fn apply_lambda_with_bool_out_type(
252
+ &'a self,
253
+ lambda: Value,
254
+ init_null_count: usize,
255
+ first_value: Option<bool>,
256
+ ) -> RbResult<BooleanChunked> {
257
+ let skip = usize::from(first_value.is_some());
258
+ if init_null_count == self.len() {
259
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
260
+ } else if !self.has_validity() {
261
+ let it = self
262
+ .into_no_null_iter()
263
+ .skip(init_null_count + skip)
264
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
265
+ Ok(iterator_to_bool(
266
+ it,
267
+ init_null_count,
268
+ first_value,
269
+ self.name(),
270
+ self.len(),
271
+ ))
272
+ } else {
273
+ let it = self
274
+ .into_iter()
275
+ .skip(init_null_count + skip)
276
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
277
+ Ok(iterator_to_bool(
278
+ it,
279
+ init_null_count,
280
+ first_value,
281
+ self.name(),
282
+ self.len(),
283
+ ))
284
+ }
285
+ }
286
+
287
+ fn apply_lambda_with_utf8_out_type(
288
+ &'a self,
289
+ lambda: Value,
290
+ init_null_count: usize,
291
+ first_value: Option<&str>,
292
+ ) -> RbResult<Utf8Chunked> {
293
+ let skip = usize::from(first_value.is_some());
294
+ if init_null_count == self.len() {
295
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
296
+ } else if !self.has_validity() {
297
+ let it = self
298
+ .into_no_null_iter()
299
+ .skip(init_null_count + skip)
300
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
301
+
302
+ Ok(iterator_to_utf8(
303
+ it,
304
+ init_null_count,
305
+ first_value,
306
+ self.name(),
307
+ self.len(),
308
+ ))
309
+ } else {
310
+ let it = self
311
+ .into_iter()
312
+ .skip(init_null_count + skip)
313
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
314
+ Ok(iterator_to_utf8(
315
+ it,
316
+ init_null_count,
317
+ first_value,
318
+ self.name(),
319
+ self.len(),
320
+ ))
321
+ }
322
+ }
323
+
324
+ fn apply_lambda_with_list_out_type(
325
+ &'a self,
326
+ lambda: Value,
327
+ init_null_count: usize,
328
+ first_value: &Series,
329
+ dt: &DataType,
330
+ ) -> RbResult<ListChunked> {
331
+ let skip = 1;
332
+ if init_null_count == self.len() {
333
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
334
+ } else if !self.has_validity() {
335
+ let it = self
336
+ .into_no_null_iter()
337
+ .skip(init_null_count + skip)
338
+ .map(|val| call_lambda_series_out(lambda, val).ok());
339
+
340
+ iterator_to_list(
341
+ dt,
342
+ it,
343
+ init_null_count,
344
+ Some(first_value),
345
+ self.name(),
346
+ self.len(),
347
+ )
348
+ } else {
349
+ let it = self
350
+ .into_iter()
351
+ .skip(init_null_count + skip)
352
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_series_out(lambda, val).ok()));
353
+ iterator_to_list(
354
+ dt,
355
+ it,
356
+ init_null_count,
357
+ Some(first_value),
358
+ self.name(),
359
+ self.len(),
360
+ )
361
+ }
362
+ }
363
+
364
+ fn apply_extract_any_values(
365
+ &'a self,
366
+ lambda: Value,
367
+ init_null_count: usize,
368
+ first_value: AnyValue<'a>,
369
+ ) -> RbResult<Series> {
370
+ let mut avs = Vec::with_capacity(self.len());
371
+ avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
372
+ avs.push(first_value);
373
+
374
+ if self.null_count() > 0 {
375
+ let iter = self.into_iter().skip(init_null_count + 1).map(|opt_val| {
376
+ let out_wrapped = match opt_val {
377
+ None => Wrap(AnyValue::Null),
378
+ Some(val) => call_lambda_and_extract(lambda, val).unwrap(),
379
+ };
380
+ out_wrapped.0
381
+ });
382
+ avs.extend(iter);
383
+ } else {
384
+ let iter = self
385
+ .into_no_null_iter()
386
+ .skip(init_null_count + 1)
387
+ .map(|val| {
388
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, val)
389
+ .unwrap()
390
+ .0
391
+ });
392
+ avs.extend(iter);
393
+ }
394
+ Ok(Series::new(self.name(), &avs))
395
+ }
396
+
397
+ fn apply_lambda_with_object_out_type(
398
+ &'a self,
399
+ lambda: Value,
400
+ init_null_count: usize,
401
+ first_value: Option<ObjectValue>,
402
+ ) -> RbResult<ObjectChunked<ObjectValue>> {
403
+ let skip = usize::from(first_value.is_some());
404
+ if init_null_count == self.len() {
405
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
406
+ } else if !self.has_validity() {
407
+ let it = self
408
+ .into_no_null_iter()
409
+ .skip(init_null_count + skip)
410
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
411
+
412
+ Ok(iterator_to_object(
413
+ it,
414
+ init_null_count,
415
+ first_value,
416
+ self.name(),
417
+ self.len(),
418
+ ))
419
+ } else {
420
+ let it = self
421
+ .into_iter()
422
+ .skip(init_null_count + skip)
423
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
424
+ Ok(iterator_to_object(
425
+ it,
426
+ init_null_count,
427
+ first_value,
428
+ self.name(),
429
+ self.len(),
430
+ ))
431
+ }
432
+ }
433
+ }
434
+
435
+ impl<'a, T> ApplyLambda<'a> for ChunkedArray<T>
436
+ where
437
+ T: RbArrowPrimitiveType + PolarsNumericType,
438
+ T::Native: Into<Value> + TryConvert,
439
+ ChunkedArray<T>: IntoSeries,
440
+ {
441
+ fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
442
+ let mut null_count = 0;
443
+ for opt_v in self.into_iter() {
444
+ if let Some(v) = opt_v {
445
+ let arg = (v,);
446
+ let out: Value = lambda.funcall("call", arg)?;
447
+ if out.is_nil() {
448
+ null_count += 1;
449
+ continue;
450
+ }
451
+ return infer_and_finish(self, lambda, out, null_count);
452
+ } else {
453
+ null_count += 1
454
+ }
455
+ }
456
+ Ok(Self::full_null(self.name(), self.len())
457
+ .into_series()
458
+ .into())
459
+ }
460
+
461
+ fn apply_lambda(&'a self, lambda: Value) -> RbResult<RbSeries> {
462
+ self.apply_lambda_with_primitive_out_type::<T>(lambda, 0, None)
463
+ .map(|ca| RbSeries::new(ca.into_series()))
464
+ }
465
+
466
+ fn apply_to_struct(
467
+ &'a self,
468
+ lambda: Value,
469
+ init_null_count: usize,
470
+ first_value: AnyValue<'a>,
471
+ ) -> RbResult<RbSeries> {
472
+ let skip = 1;
473
+ if !self.has_validity() {
474
+ let it = self
475
+ .into_no_null_iter()
476
+ .skip(init_null_count + skip)
477
+ .map(|val| call_lambda(lambda, val).ok());
478
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
479
+ } else {
480
+ let it = self
481
+ .into_iter()
482
+ .skip(init_null_count + skip)
483
+ .map(|opt_val| opt_val.and_then(|val| call_lambda(lambda, val).ok()));
484
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
485
+ }
486
+ }
487
+
488
+ fn apply_lambda_with_primitive_out_type<D>(
489
+ &'a self,
490
+ lambda: Value,
491
+ init_null_count: usize,
492
+ first_value: Option<D::Native>,
493
+ ) -> RbResult<ChunkedArray<D>>
494
+ where
495
+ D: RbArrowPrimitiveType,
496
+ D::Native: Into<Value> + TryConvert,
497
+ {
498
+ let skip = usize::from(first_value.is_some());
499
+ if init_null_count == self.len() {
500
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
501
+ } else if !self.has_validity() {
502
+ let it = self
503
+ .into_no_null_iter()
504
+ .skip(init_null_count + skip)
505
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
506
+ Ok(iterator_to_primitive(
507
+ it,
508
+ init_null_count,
509
+ first_value,
510
+ self.name(),
511
+ self.len(),
512
+ ))
513
+ } else {
514
+ let it = self
515
+ .into_iter()
516
+ .skip(init_null_count + skip)
517
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
518
+ Ok(iterator_to_primitive(
519
+ it,
520
+ init_null_count,
521
+ first_value,
522
+ self.name(),
523
+ self.len(),
524
+ ))
525
+ }
526
+ }
527
+
528
+ fn apply_lambda_with_bool_out_type(
529
+ &'a self,
530
+ lambda: Value,
531
+ init_null_count: usize,
532
+ first_value: Option<bool>,
533
+ ) -> RbResult<BooleanChunked> {
534
+ let skip = usize::from(first_value.is_some());
535
+ if init_null_count == self.len() {
536
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
537
+ } else if !self.has_validity() {
538
+ let it = self
539
+ .into_no_null_iter()
540
+ .skip(init_null_count + skip)
541
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
542
+ Ok(iterator_to_bool(
543
+ it,
544
+ init_null_count,
545
+ first_value,
546
+ self.name(),
547
+ self.len(),
548
+ ))
549
+ } else {
550
+ let it = self
551
+ .into_iter()
552
+ .skip(init_null_count + skip)
553
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
554
+ Ok(iterator_to_bool(
555
+ it,
556
+ init_null_count,
557
+ first_value,
558
+ self.name(),
559
+ self.len(),
560
+ ))
561
+ }
562
+ }
563
+
564
+ fn apply_lambda_with_utf8_out_type(
565
+ &'a self,
566
+ lambda: Value,
567
+ init_null_count: usize,
568
+ first_value: Option<&str>,
569
+ ) -> RbResult<Utf8Chunked> {
570
+ let skip = usize::from(first_value.is_some());
571
+ if init_null_count == self.len() {
572
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
573
+ } else if !self.has_validity() {
574
+ let it = self
575
+ .into_no_null_iter()
576
+ .skip(init_null_count + skip)
577
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
578
+
579
+ Ok(iterator_to_utf8(
580
+ it,
581
+ init_null_count,
582
+ first_value,
583
+ self.name(),
584
+ self.len(),
585
+ ))
586
+ } else {
587
+ let it = self
588
+ .into_iter()
589
+ .skip(init_null_count + skip)
590
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
591
+ Ok(iterator_to_utf8(
592
+ it,
593
+ init_null_count,
594
+ first_value,
595
+ self.name(),
596
+ self.len(),
597
+ ))
598
+ }
599
+ }
600
+
601
+ fn apply_lambda_with_list_out_type(
602
+ &'a self,
603
+ lambda: Value,
604
+ init_null_count: usize,
605
+ first_value: &Series,
606
+ dt: &DataType,
607
+ ) -> RbResult<ListChunked> {
608
+ let skip = 1;
609
+ if init_null_count == self.len() {
610
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
611
+ } else if !self.has_validity() {
612
+ let it = self
613
+ .into_no_null_iter()
614
+ .skip(init_null_count + skip)
615
+ .map(|val| call_lambda_series_out(lambda, val).ok());
616
+
617
+ iterator_to_list(
618
+ dt,
619
+ it,
620
+ init_null_count,
621
+ Some(first_value),
622
+ self.name(),
623
+ self.len(),
624
+ )
625
+ } else {
626
+ let it = self
627
+ .into_iter()
628
+ .skip(init_null_count + skip)
629
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_series_out(lambda, val).ok()));
630
+ iterator_to_list(
631
+ dt,
632
+ it,
633
+ init_null_count,
634
+ Some(first_value),
635
+ self.name(),
636
+ self.len(),
637
+ )
638
+ }
639
+ }
640
+
641
+ fn apply_extract_any_values(
642
+ &'a self,
643
+ lambda: Value,
644
+ init_null_count: usize,
645
+ first_value: AnyValue<'a>,
646
+ ) -> RbResult<Series> {
647
+ let mut avs = Vec::with_capacity(self.len());
648
+ avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
649
+ avs.push(first_value);
650
+
651
+ if self.null_count() > 0 {
652
+ let iter = self.into_iter().skip(init_null_count + 1).map(|opt_val| {
653
+ let out_wrapped = match opt_val {
654
+ None => Wrap(AnyValue::Null),
655
+ Some(val) => call_lambda_and_extract(lambda, val).unwrap(),
656
+ };
657
+ out_wrapped.0
658
+ });
659
+ avs.extend(iter);
660
+ } else {
661
+ let iter = self
662
+ .into_no_null_iter()
663
+ .skip(init_null_count + 1)
664
+ .map(|val| {
665
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, val)
666
+ .unwrap()
667
+ .0
668
+ });
669
+ avs.extend(iter);
670
+ }
671
+ Ok(Series::new(self.name(), &avs))
672
+ }
673
+
674
+ fn apply_lambda_with_object_out_type(
675
+ &'a self,
676
+ lambda: Value,
677
+ init_null_count: usize,
678
+ first_value: Option<ObjectValue>,
679
+ ) -> RbResult<ObjectChunked<ObjectValue>> {
680
+ let skip = usize::from(first_value.is_some());
681
+ if init_null_count == self.len() {
682
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
683
+ } else if !self.has_validity() {
684
+ let it = self
685
+ .into_no_null_iter()
686
+ .skip(init_null_count + skip)
687
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
688
+
689
+ Ok(iterator_to_object(
690
+ it,
691
+ init_null_count,
692
+ first_value,
693
+ self.name(),
694
+ self.len(),
695
+ ))
696
+ } else {
697
+ let it = self
698
+ .into_iter()
699
+ .skip(init_null_count + skip)
700
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
701
+ Ok(iterator_to_object(
702
+ it,
703
+ init_null_count,
704
+ first_value,
705
+ self.name(),
706
+ self.len(),
707
+ ))
708
+ }
709
+ }
710
+ }
711
+
712
+ impl<'a> ApplyLambda<'a> for Utf8Chunked {
713
+ fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
714
+ let mut null_count = 0;
715
+ for opt_v in self.into_iter() {
716
+ if let Some(v) = opt_v {
717
+ let arg = (v,);
718
+ let out: Value = lambda.funcall("call", arg)?;
719
+ if out.is_nil() {
720
+ null_count += 1;
721
+ continue;
722
+ }
723
+ return infer_and_finish(self, lambda, out, null_count);
724
+ } else {
725
+ null_count += 1
726
+ }
727
+ }
728
+ Ok(Self::full_null(self.name(), self.len())
729
+ .into_series()
730
+ .into())
731
+ }
732
+
733
+ fn apply_lambda(&'a self, lambda: Value) -> RbResult<RbSeries> {
734
+ let ca = self.apply_lambda_with_utf8_out_type(lambda, 0, None)?;
735
+ Ok(ca.into_series().into())
736
+ }
737
+
738
+ fn apply_to_struct(
739
+ &'a self,
740
+ lambda: Value,
741
+ init_null_count: usize,
742
+ first_value: AnyValue<'a>,
743
+ ) -> RbResult<RbSeries> {
744
+ let skip = 1;
745
+ if !self.has_validity() {
746
+ let it = self
747
+ .into_no_null_iter()
748
+ .skip(init_null_count + skip)
749
+ .map(|val| call_lambda(lambda, val).ok());
750
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
751
+ } else {
752
+ let it = self
753
+ .into_iter()
754
+ .skip(init_null_count + skip)
755
+ .map(|opt_val| opt_val.and_then(|val| call_lambda(lambda, val).ok()));
756
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
757
+ }
758
+ }
759
+
760
+ fn apply_lambda_with_primitive_out_type<D>(
761
+ &'a self,
762
+ lambda: Value,
763
+ init_null_count: usize,
764
+ first_value: Option<D::Native>,
765
+ ) -> RbResult<ChunkedArray<D>>
766
+ where
767
+ D: RbArrowPrimitiveType,
768
+ D::Native: Into<Value> + TryConvert,
769
+ {
770
+ let skip = usize::from(first_value.is_some());
771
+ if init_null_count == self.len() {
772
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
773
+ } else if !self.has_validity() {
774
+ let it = self
775
+ .into_no_null_iter()
776
+ .skip(init_null_count + skip)
777
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
778
+ Ok(iterator_to_primitive(
779
+ it,
780
+ init_null_count,
781
+ first_value,
782
+ self.name(),
783
+ self.len(),
784
+ ))
785
+ } else {
786
+ let it = self
787
+ .into_iter()
788
+ .skip(init_null_count + skip)
789
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
790
+ Ok(iterator_to_primitive(
791
+ it,
792
+ init_null_count,
793
+ first_value,
794
+ self.name(),
795
+ self.len(),
796
+ ))
797
+ }
798
+ }
799
+
800
+ fn apply_lambda_with_bool_out_type(
801
+ &'a self,
802
+ lambda: Value,
803
+ init_null_count: usize,
804
+ first_value: Option<bool>,
805
+ ) -> RbResult<BooleanChunked> {
806
+ let skip = usize::from(first_value.is_some());
807
+ if init_null_count == self.len() {
808
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
809
+ } else if !self.has_validity() {
810
+ let it = self
811
+ .into_no_null_iter()
812
+ .skip(init_null_count + skip)
813
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
814
+ Ok(iterator_to_bool(
815
+ it,
816
+ init_null_count,
817
+ first_value,
818
+ self.name(),
819
+ self.len(),
820
+ ))
821
+ } else {
822
+ let it = self
823
+ .into_iter()
824
+ .skip(init_null_count + skip)
825
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
826
+ Ok(iterator_to_bool(
827
+ it,
828
+ init_null_count,
829
+ first_value,
830
+ self.name(),
831
+ self.len(),
832
+ ))
833
+ }
834
+ }
835
+
836
+ fn apply_lambda_with_utf8_out_type(
837
+ &self,
838
+ lambda: Value,
839
+ init_null_count: usize,
840
+ first_value: Option<&str>,
841
+ ) -> RbResult<Utf8Chunked> {
842
+ let skip = usize::from(first_value.is_some());
843
+ if init_null_count == self.len() {
844
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
845
+ } else if !self.has_validity() {
846
+ let it = self
847
+ .into_no_null_iter()
848
+ .skip(init_null_count + skip)
849
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
850
+
851
+ Ok(iterator_to_utf8(
852
+ it,
853
+ init_null_count,
854
+ first_value,
855
+ self.name(),
856
+ self.len(),
857
+ ))
858
+ } else {
859
+ let it = self
860
+ .into_iter()
861
+ .skip(init_null_count + skip)
862
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
863
+ Ok(iterator_to_utf8(
864
+ it,
865
+ init_null_count,
866
+ first_value,
867
+ self.name(),
868
+ self.len(),
869
+ ))
870
+ }
871
+ }
872
+
873
+ fn apply_lambda_with_list_out_type(
874
+ &'a self,
875
+ lambda: Value,
876
+ init_null_count: usize,
877
+ first_value: &Series,
878
+ dt: &DataType,
879
+ ) -> RbResult<ListChunked> {
880
+ let skip = 1;
881
+ if init_null_count == self.len() {
882
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
883
+ } else if !self.has_validity() {
884
+ let it = self
885
+ .into_no_null_iter()
886
+ .skip(init_null_count + skip)
887
+ .map(|val| call_lambda_series_out(lambda, val).ok());
888
+
889
+ iterator_to_list(
890
+ dt,
891
+ it,
892
+ init_null_count,
893
+ Some(first_value),
894
+ self.name(),
895
+ self.len(),
896
+ )
897
+ } else {
898
+ let it = self
899
+ .into_iter()
900
+ .skip(init_null_count + skip)
901
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_series_out(lambda, val).ok()));
902
+ iterator_to_list(
903
+ dt,
904
+ it,
905
+ init_null_count,
906
+ Some(first_value),
907
+ self.name(),
908
+ self.len(),
909
+ )
910
+ }
911
+ }
912
+
913
+ fn apply_extract_any_values(
914
+ &'a self,
915
+ lambda: Value,
916
+ init_null_count: usize,
917
+ first_value: AnyValue<'a>,
918
+ ) -> RbResult<Series> {
919
+ let mut avs = Vec::with_capacity(self.len());
920
+ avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
921
+ avs.push(first_value);
922
+
923
+ if self.null_count() > 0 {
924
+ let iter = self.into_iter().skip(init_null_count + 1).map(|opt_val| {
925
+ let out_wrapped = match opt_val {
926
+ None => Wrap(AnyValue::Null),
927
+ Some(val) => call_lambda_and_extract(lambda, val).unwrap(),
928
+ };
929
+ out_wrapped.0
930
+ });
931
+ avs.extend(iter);
932
+ } else {
933
+ let iter = self
934
+ .into_no_null_iter()
935
+ .skip(init_null_count + 1)
936
+ .map(|val| {
937
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, val)
938
+ .unwrap()
939
+ .0
940
+ });
941
+ avs.extend(iter);
942
+ }
943
+ Ok(Series::new(self.name(), &avs))
944
+ }
945
+
946
+ fn apply_lambda_with_object_out_type(
947
+ &'a self,
948
+ lambda: Value,
949
+ init_null_count: usize,
950
+ first_value: Option<ObjectValue>,
951
+ ) -> RbResult<ObjectChunked<ObjectValue>> {
952
+ let skip = usize::from(first_value.is_some());
953
+ if init_null_count == self.len() {
954
+ Ok(ChunkedArray::full_null(self.name(), self.len()))
955
+ } else if !self.has_validity() {
956
+ let it = self
957
+ .into_no_null_iter()
958
+ .skip(init_null_count + skip)
959
+ .map(|val| call_lambda_and_extract(lambda, val).ok());
960
+
961
+ Ok(iterator_to_object(
962
+ it,
963
+ init_null_count,
964
+ first_value,
965
+ self.name(),
966
+ self.len(),
967
+ ))
968
+ } else {
969
+ let it = self
970
+ .into_iter()
971
+ .skip(init_null_count + skip)
972
+ .map(|opt_val| opt_val.and_then(|val| call_lambda_and_extract(lambda, val).ok()));
973
+ Ok(iterator_to_object(
974
+ it,
975
+ init_null_count,
976
+ first_value,
977
+ self.name(),
978
+ self.len(),
979
+ ))
980
+ }
981
+ }
982
+ }
983
+
984
+ fn make_dict_arg(names: &[&str], vals: &[AnyValue]) -> RHash {
985
+ let dict = RHash::new();
986
+ for (name, val) in names.iter().zip(slice_to_wrapped(vals)) {
987
+ dict.aset(name.to_string(), (*val).clone()).unwrap()
988
+ }
989
+ dict
990
+ }
991
+
992
+ impl<'a> ApplyLambda<'a> for StructChunked {
993
+ fn apply_lambda_unknown(&'a self, lambda: Value) -> RbResult<RbSeries> {
994
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
995
+ let mut null_count = 0;
996
+ for val in self.into_iter() {
997
+ let arg = make_dict_arg(&names, val);
998
+ let out: Value = lambda.funcall("call", (arg,))?;
999
+ if out.is_nil() {
1000
+ null_count += 1;
1001
+ continue;
1002
+ }
1003
+ return infer_and_finish(self, lambda, out, null_count);
1004
+ }
1005
+
1006
+ // todo! full null
1007
+ Ok(self.clone().into_series().into())
1008
+ }
1009
+
1010
+ fn apply_lambda(&'a self, lambda: Value) -> RbResult<RbSeries> {
1011
+ self.apply_lambda_unknown(lambda)
1012
+ }
1013
+
1014
+ fn apply_to_struct(
1015
+ &'a self,
1016
+ lambda: Value,
1017
+ init_null_count: usize,
1018
+ first_value: AnyValue<'a>,
1019
+ ) -> RbResult<RbSeries> {
1020
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1021
+
1022
+ let skip = 1;
1023
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1024
+ let arg = make_dict_arg(&names, val);
1025
+ let out = lambda.funcall("call", (arg,)).unwrap();
1026
+ Some(out)
1027
+ });
1028
+ iterator_to_struct(it, init_null_count, first_value, self.name(), self.len())
1029
+ }
1030
+
1031
+ fn apply_lambda_with_primitive_out_type<D>(
1032
+ &'a self,
1033
+ lambda: Value,
1034
+ init_null_count: usize,
1035
+ first_value: Option<D::Native>,
1036
+ ) -> RbResult<ChunkedArray<D>>
1037
+ where
1038
+ D: RbArrowPrimitiveType,
1039
+ D::Native: Into<Value> + TryConvert,
1040
+ {
1041
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1042
+
1043
+ let skip = usize::from(first_value.is_some());
1044
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1045
+ let arg = make_dict_arg(&names, val);
1046
+ call_lambda_and_extract(lambda, arg).ok()
1047
+ });
1048
+
1049
+ Ok(iterator_to_primitive(
1050
+ it,
1051
+ init_null_count,
1052
+ first_value,
1053
+ self.name(),
1054
+ self.len(),
1055
+ ))
1056
+ }
1057
+
1058
+ fn apply_lambda_with_bool_out_type(
1059
+ &'a self,
1060
+ lambda: Value,
1061
+ init_null_count: usize,
1062
+ first_value: Option<bool>,
1063
+ ) -> RbResult<BooleanChunked> {
1064
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1065
+
1066
+ let skip = usize::from(first_value.is_some());
1067
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1068
+ let arg = make_dict_arg(&names, val);
1069
+ call_lambda_and_extract(lambda, arg).ok()
1070
+ });
1071
+
1072
+ Ok(iterator_to_bool(
1073
+ it,
1074
+ init_null_count,
1075
+ first_value,
1076
+ self.name(),
1077
+ self.len(),
1078
+ ))
1079
+ }
1080
+
1081
+ fn apply_lambda_with_utf8_out_type(
1082
+ &'a self,
1083
+ lambda: Value,
1084
+ init_null_count: usize,
1085
+ first_value: Option<&str>,
1086
+ ) -> RbResult<Utf8Chunked> {
1087
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1088
+
1089
+ let skip = usize::from(first_value.is_some());
1090
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1091
+ let arg = make_dict_arg(&names, val);
1092
+ call_lambda_and_extract(lambda, arg).ok()
1093
+ });
1094
+
1095
+ Ok(iterator_to_utf8(
1096
+ it,
1097
+ init_null_count,
1098
+ first_value,
1099
+ self.name(),
1100
+ self.len(),
1101
+ ))
1102
+ }
1103
+
1104
+ fn apply_lambda_with_list_out_type(
1105
+ &'a self,
1106
+ lambda: Value,
1107
+ init_null_count: usize,
1108
+ first_value: &Series,
1109
+ dt: &DataType,
1110
+ ) -> RbResult<ListChunked> {
1111
+ let skip = 1;
1112
+
1113
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1114
+
1115
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1116
+ let arg = make_dict_arg(&names, val);
1117
+ call_lambda_series_out(lambda, arg).ok()
1118
+ });
1119
+ iterator_to_list(
1120
+ dt,
1121
+ it,
1122
+ init_null_count,
1123
+ Some(first_value),
1124
+ self.name(),
1125
+ self.len(),
1126
+ )
1127
+ }
1128
+
1129
+ fn apply_extract_any_values(
1130
+ &'a self,
1131
+ lambda: Value,
1132
+ init_null_count: usize,
1133
+ first_value: AnyValue<'a>,
1134
+ ) -> RbResult<Series> {
1135
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1136
+ let mut avs = Vec::with_capacity(self.len());
1137
+ avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
1138
+ avs.push(first_value);
1139
+
1140
+ let iter = self.into_iter().skip(init_null_count + 1).map(|val| {
1141
+ let arg = make_dict_arg(&names, val);
1142
+ call_lambda_and_extract::<_, Wrap<AnyValue>>(lambda, arg)
1143
+ .unwrap()
1144
+ .0
1145
+ });
1146
+ avs.extend(iter);
1147
+
1148
+ Ok(Series::new(self.name(), &avs))
1149
+ }
1150
+
1151
+ fn apply_lambda_with_object_out_type(
1152
+ &'a self,
1153
+ lambda: Value,
1154
+ init_null_count: usize,
1155
+ first_value: Option<ObjectValue>,
1156
+ ) -> RbResult<ObjectChunked<ObjectValue>> {
1157
+ let names = self.fields().iter().map(|s| s.name()).collect::<Vec<_>>();
1158
+
1159
+ let skip = usize::from(first_value.is_some());
1160
+ let it = self.into_iter().skip(init_null_count + skip).map(|val| {
1161
+ let arg = make_dict_arg(&names, val);
1162
+ call_lambda_and_extract(lambda, arg).ok()
1163
+ });
1164
+
1165
+ Ok(iterator_to_object(
1166
+ it,
1167
+ init_null_count,
1168
+ first_value,
1169
+ self.name(),
1170
+ self.len(),
1171
+ ))
1172
+ }
1173
+ }