polars-df 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,12 +2,13 @@ use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConv
2
2
  use polars::io::{HiveOptions, RowIndex};
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
- use polars_plan::plans::ScanSources;
5
+ use polars_plan::dsl::ScanSources;
6
6
  use std::cell::RefCell;
7
7
  use std::io::BufWriter;
8
8
  use std::num::NonZeroUsize;
9
9
  use std::path::PathBuf;
10
10
 
11
+ use super::SinkTarget;
11
12
  use crate::conversion::*;
12
13
  use crate::expr::rb_exprs_to_exprs;
13
14
  use crate::file::get_file_like;
@@ -374,16 +375,16 @@ impl RbLazyFrame {
374
375
  #[allow(clippy::too_many_arguments)]
375
376
  pub fn sink_parquet(
376
377
  &self,
377
- path: PathBuf,
378
+ target: SinkTarget,
378
379
  compression: String,
379
380
  compression_level: Option<i32>,
380
381
  statistics: Wrap<StatisticsOptions>,
381
382
  row_group_size: Option<usize>,
382
383
  data_page_size: Option<usize>,
383
- maintain_order: bool,
384
384
  cloud_options: Option<Vec<(String, String)>>,
385
385
  retries: usize,
386
- ) -> RbResult<()> {
386
+ sink_options: Wrap<SinkOptions>,
387
+ ) -> RbResult<RbLazyFrame> {
387
388
  let compression = parse_parquet_compression(&compression, compression_level)?;
388
389
 
389
390
  let options = ParquetWriteOptions {
@@ -391,48 +392,69 @@ impl RbLazyFrame {
391
392
  statistics: statistics.0,
392
393
  row_group_size,
393
394
  data_page_size,
394
- maintain_order,
395
+ key_value_metadata: None,
396
+ field_overwrites: Vec::new(),
395
397
  };
396
398
 
397
- let cloud_options = {
398
- let cloud_options =
399
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
400
- Some(cloud_options.with_max_retries(retries))
399
+ let cloud_options = match target.base_path() {
400
+ None => None,
401
+ Some(base_path) => {
402
+ let cloud_options = parse_cloud_options(
403
+ base_path.to_str().unwrap(),
404
+ cloud_options.unwrap_or_default(),
405
+ )?;
406
+ Some(cloud_options.with_max_retries(retries))
407
+ }
401
408
  };
402
409
 
403
410
  let ldf = self.ldf.borrow().clone();
404
- ldf.sink_parquet(&path, options, cloud_options)
405
- .map_err(RbPolarsErr::from)?;
406
- Ok(())
411
+ match target {
412
+ SinkTarget::File(target) => {
413
+ ldf.sink_parquet(target, options, cloud_options, sink_options.0)
414
+ }
415
+ }
416
+ .map_err(RbPolarsErr::from)
417
+ .map(Into::into)
418
+ .map_err(Into::into)
407
419
  }
408
420
 
409
421
  pub fn sink_ipc(
410
422
  &self,
411
- path: PathBuf,
423
+ target: SinkTarget,
412
424
  compression: Option<Wrap<IpcCompression>>,
413
- maintain_order: bool,
414
425
  cloud_options: Option<Vec<(String, String)>>,
415
426
  retries: usize,
416
- ) -> RbResult<()> {
427
+ sink_options: Wrap<SinkOptions>,
428
+ ) -> RbResult<RbLazyFrame> {
417
429
  let options = IpcWriterOptions {
418
430
  compression: compression.map(|c| c.0),
419
- maintain_order,
431
+ ..Default::default()
420
432
  };
421
433
 
422
- let cloud_options = {
423
- let cloud_options =
424
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
425
- Some(cloud_options.with_max_retries(retries))
434
+ let cloud_options = match target.base_path() {
435
+ None => None,
436
+ Some(base_path) => {
437
+ let cloud_options = parse_cloud_options(
438
+ base_path.to_str().unwrap(),
439
+ cloud_options.unwrap_or_default(),
440
+ )?;
441
+ Some(cloud_options.with_max_retries(retries))
442
+ }
426
443
  };
427
444
 
428
445
  let ldf = self.ldf.borrow().clone();
429
- ldf.sink_ipc(&path, options, cloud_options)
430
- .map_err(RbPolarsErr::from)?;
431
- Ok(())
446
+ match target {
447
+ SinkTarget::File(target) => {
448
+ ldf.sink_ipc(target, options, cloud_options, sink_options.0)
449
+ }
450
+ }
451
+ .map_err(RbPolarsErr::from)
452
+ .map(Into::into)
453
+ .map_err(Into::into)
432
454
  }
433
455
 
434
- pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<()> {
435
- let path = PathBuf::try_convert(arguments[0])?;
456
+ pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
457
+ let target = SinkTarget::try_convert(arguments[0])?;
436
458
  let include_bom = bool::try_convert(arguments[1])?;
437
459
  let include_header = bool::try_convert(arguments[2])?;
438
460
  let separator = u8::try_convert(arguments[3])?;
@@ -446,9 +468,9 @@ impl RbLazyFrame {
446
468
  let float_precision = Option::<usize>::try_convert(arguments[11])?;
447
469
  let null_value = Option::<String>::try_convert(arguments[12])?;
448
470
  let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
449
- let maintain_order = bool::try_convert(arguments[14])?;
450
- let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
451
- let retries = usize::try_convert(arguments[16])?;
471
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
472
+ let retries = usize::try_convert(arguments[15])?;
473
+ let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
452
474
 
453
475
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
454
476
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -469,42 +491,59 @@ impl RbLazyFrame {
469
491
  let options = CsvWriterOptions {
470
492
  include_bom,
471
493
  include_header,
472
- maintain_order,
473
494
  batch_size: batch_size.0,
474
495
  serialize_options,
475
496
  };
476
497
 
477
- let cloud_options = {
478
- let cloud_options =
479
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
480
- Some(cloud_options.with_max_retries(retries))
498
+ let cloud_options = match target.base_path() {
499
+ None => None,
500
+ Some(base_path) => {
501
+ let cloud_options = parse_cloud_options(
502
+ base_path.to_str().unwrap(),
503
+ cloud_options.unwrap_or_default(),
504
+ )?;
505
+ Some(cloud_options.with_max_retries(retries))
506
+ }
481
507
  };
482
508
 
483
509
  let ldf = self.ldf.borrow().clone();
484
- ldf.sink_csv(&path, options, cloud_options)
485
- .map_err(RbPolarsErr::from)?;
486
- Ok(())
510
+ match target {
511
+ SinkTarget::File(target) => {
512
+ ldf.sink_csv(target, options, cloud_options, sink_options.0)
513
+ }
514
+ }
515
+ .map_err(RbPolarsErr::from)
516
+ .map(Into::into)
517
+ .map_err(Into::into)
487
518
  }
488
519
 
489
520
  pub fn sink_json(
490
521
  &self,
491
- path: PathBuf,
492
- maintain_order: bool,
522
+ target: SinkTarget,
493
523
  cloud_options: Option<Vec<(String, String)>>,
494
524
  retries: usize,
495
- ) -> RbResult<()> {
496
- let options = JsonWriterOptions { maintain_order };
497
-
498
- let cloud_options = {
499
- let cloud_options =
500
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
501
- Some(cloud_options.with_max_retries(retries))
525
+ sink_options: Wrap<SinkOptions>,
526
+ ) -> RbResult<RbLazyFrame> {
527
+ let options = JsonWriterOptions {};
528
+
529
+ let cloud_options = match target.base_path() {
530
+ None => None,
531
+ Some(base_path) => {
532
+ let cloud_options = parse_cloud_options(
533
+ base_path.to_str().unwrap(),
534
+ cloud_options.unwrap_or_default(),
535
+ )?;
536
+ Some(cloud_options.with_max_retries(retries))
537
+ }
502
538
  };
503
539
 
504
540
  let ldf = self.ldf.borrow().clone();
505
- ldf.sink_json(&path, options, cloud_options)
506
- .map_err(RbPolarsErr::from)?;
507
- Ok(())
541
+ match target {
542
+ SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
543
+ }
544
+ .map_err(RbPolarsErr::from)
545
+ .map(Into::into)
546
+ .map_err(Into::into)
508
547
  }
509
548
 
510
549
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
@@ -1,7 +1,9 @@
1
1
  mod general;
2
2
  mod serde;
3
+ mod sink;
3
4
 
4
5
  use polars::lazy::frame::LazyFrame;
6
+ pub use sink::SinkTarget;
5
7
  use std::cell::RefCell;
6
8
 
7
9
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
@@ -0,0 +1,99 @@
1
+ use std::path::{Path, PathBuf};
2
+ use std::sync::{Arc, Mutex};
3
+
4
+ use magnus::{RHash, TryConvert, Value};
5
+ use polars::prelude::sync_on_close::SyncOnCloseType;
6
+ use polars::prelude::{SinkOptions, SpecialEq};
7
+
8
+ use crate::prelude::Wrap;
9
+ use crate::{RbResult, RbValueError};
10
+
11
+ #[derive(Clone)]
12
+ pub enum SinkTarget {
13
+ File(polars_plan::dsl::SinkTarget),
14
+ }
15
+
16
+ impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
17
+ fn try_convert(ob: Value) -> RbResult<Self> {
18
+ if let Ok(v) = PathBuf::try_convert(ob) {
19
+ Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
20
+ } else {
21
+ let writer = {
22
+ let rb_f = ob;
23
+ RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
24
+ }?;
25
+
26
+ Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
27
+ Arc::new(Mutex::new(Some(writer))),
28
+ ))))
29
+ }
30
+ }
31
+ }
32
+
33
+ impl TryConvert for SinkTarget {
34
+ fn try_convert(ob: Value) -> RbResult<Self> {
35
+ Ok(Self::File(
36
+ <Wrap<polars_plan::dsl::SinkTarget>>::try_convert(ob)?.0,
37
+ ))
38
+ }
39
+ }
40
+
41
+ impl SinkTarget {
42
+ pub fn base_path(&self) -> Option<&Path> {
43
+ match self {
44
+ Self::File(t) => match t {
45
+ polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
46
+ polars::prelude::SinkTarget::Dyn(_) => None,
47
+ },
48
+ }
49
+ }
50
+ }
51
+
52
+ impl TryConvert for Wrap<SyncOnCloseType> {
53
+ fn try_convert(ob: Value) -> RbResult<Self> {
54
+ let parsed = match String::try_convert(ob)?.as_str() {
55
+ "none" => SyncOnCloseType::None,
56
+ "data" => SyncOnCloseType::Data,
57
+ "all" => SyncOnCloseType::All,
58
+ v => {
59
+ return Err(RbValueError::new_err(format!(
60
+ "`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}",
61
+ )));
62
+ }
63
+ };
64
+ Ok(Wrap(parsed))
65
+ }
66
+ }
67
+
68
+ impl TryConvert for Wrap<SinkOptions> {
69
+ fn try_convert(ob: Value) -> RbResult<Self> {
70
+ let parsed = RHash::try_convert(ob)?;
71
+
72
+ if parsed.len() != 3 {
73
+ return Err(RbValueError::new_err(
74
+ "`sink_options` must be a dictionary with the exactly 3 field.",
75
+ ));
76
+ }
77
+
78
+ let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| {
79
+ RbValueError::new_err("`sink_options` must contain `sync_on_close` field")
80
+ })?;
81
+ let sync_on_close = Wrap::<SyncOnCloseType>::try_convert(sync_on_close)?.0;
82
+
83
+ let maintain_order = parsed.get("maintain_order").ok_or_else(|| {
84
+ RbValueError::new_err("`sink_options` must contain `maintain_order` field")
85
+ })?;
86
+ let maintain_order = bool::try_convert(maintain_order)?;
87
+
88
+ let mkdir = parsed
89
+ .get("mkdir")
90
+ .ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?;
91
+ let mkdir = bool::try_convert(mkdir)?;
92
+
93
+ Ok(Wrap(SinkOptions {
94
+ sync_on_close,
95
+ maintain_order,
96
+ mkdir,
97
+ }))
98
+ }
99
+ }
@@ -221,8 +221,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
221
221
  class.define_method("gather", method!(RbExpr::gather, 1))?;
222
222
  class.define_method("get", method!(RbExpr::get, 1))?;
223
223
  class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
224
- class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
225
- class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
226
224
  class.define_method("shift", method!(RbExpr::shift, 2))?;
227
225
  class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
228
226
  class.define_method(
@@ -248,7 +246,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
248
246
  class.define_method("slice", method!(RbExpr::slice, 2))?;
249
247
  class.define_method("append", method!(RbExpr::append, 2))?;
250
248
  class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
251
- class.define_method("round", method!(RbExpr::round, 1))?;
249
+ class.define_method("round", method!(RbExpr::round, 2))?;
252
250
  class.define_method("floor", method!(RbExpr::floor, 0))?;
253
251
  class.define_method("ceil", method!(RbExpr::ceil, 0))?;
254
252
  class.define_method("clip", method!(RbExpr::clip, 2))?;
@@ -271,7 +269,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
271
269
  class.define_method("_and", method!(RbExpr::_and, 1))?;
272
270
  class.define_method("_xor", method!(RbExpr::_xor, 1))?;
273
271
  class.define_method("_or", method!(RbExpr::_or, 1))?;
274
- class.define_method("is_in", method!(RbExpr::is_in, 1))?;
272
+ class.define_method("is_in", method!(RbExpr::is_in, 2))?;
275
273
  class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
276
274
  class.define_method("pow", method!(RbExpr::pow, 1))?;
277
275
  class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?;
@@ -322,7 +320,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
322
320
  class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
323
321
  class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
324
322
  class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
325
- class.define_method("arr_contains", method!(RbExpr::arr_contains, 1))?;
323
+ class.define_method("arr_contains", method!(RbExpr::arr_contains, 2))?;
326
324
  class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
327
325
  class.define_method("binary_contains", method!(RbExpr::bin_contains, 1))?;
328
326
  class.define_method("binary_ends_with", method!(RbExpr::bin_ends_with, 1))?;
@@ -367,7 +365,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
367
365
  class.define_method("str_contains_any", method!(RbExpr::str_contains_any, 2))?;
368
366
  class.define_method("str_replace_many", method!(RbExpr::str_replace_many, 3))?;
369
367
  class.define_method("list_len", method!(RbExpr::list_len, 0))?;
370
- class.define_method("list_contains", method!(RbExpr::list_contains, 1))?;
368
+ class.define_method("list_contains", method!(RbExpr::list_contains, 2))?;
371
369
  class.define_method("list_count_matches", method!(RbExpr::list_count_matches, 1))?;
372
370
  class.define_method("dt_year", method!(RbExpr::dt_year, 0))?;
373
371
  class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
@@ -450,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
450
448
  "rolling_quantile_by",
451
449
  method!(RbExpr::rolling_quantile_by, 6),
452
450
  )?;
453
- class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
451
+ class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 4))?;
454
452
  class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
455
453
  class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
456
454
  class.define_method("list_max", method!(RbExpr::list_max, 0))?;
@@ -559,9 +557,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
559
557
  class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
560
558
  class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
561
559
  class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
562
- class.define_singleton_method("fold", function!(functions::lazy::fold, 3))?;
560
+ class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
563
561
  class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
564
- class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?;
562
+ class.define_singleton_method("lit", function!(functions::lazy::lit, 3))?;
565
563
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
566
564
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
567
565
  class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
@@ -245,7 +245,7 @@ fn iterator_to_list(
245
245
  match opt_val {
246
246
  None => builder.append_null(),
247
247
  Some(s) => {
248
- if s.len() == 0 && s.dtype() != dt {
248
+ if s.is_empty() && s.dtype() != dt {
249
249
  builder
250
250
  .append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
251
251
  .unwrap()
@@ -372,7 +372,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
372
372
  first_value: AnyValue<'a>,
373
373
  ) -> RbResult<Series> {
374
374
  let mut avs = Vec::with_capacity(self.len());
375
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
375
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
376
376
  avs.push(first_value);
377
377
 
378
378
  if self.null_count() > 0 {
@@ -656,7 +656,7 @@ where
656
656
  first_value: AnyValue<'a>,
657
657
  ) -> RbResult<Series> {
658
658
  let mut avs = Vec::with_capacity(self.len());
659
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
659
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
660
660
  avs.push(first_value);
661
661
 
662
662
  if self.null_count() > 0 {
@@ -935,7 +935,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
935
935
  first_value: AnyValue<'a>,
936
936
  ) -> RbResult<Series> {
937
937
  let mut avs = Vec::with_capacity(self.len());
938
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
938
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
939
939
  avs.push(first_value);
940
940
 
941
941
  if self.null_count() > 0 {
@@ -1132,7 +1132,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1132
1132
  first_value: AnyValue<'a>,
1133
1133
  ) -> RbResult<Series> {
1134
1134
  let mut avs = Vec::with_capacity(self.len());
1135
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
1135
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
1136
1136
  avs.push(first_value);
1137
1137
 
1138
1138
  let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
@@ -1,5 +1,6 @@
1
1
  use std::any::Any;
2
2
  use std::sync::Arc;
3
+ use std::sync::OnceLock;
3
4
 
4
5
  use magnus::IntoValue;
5
6
  use polars::prelude::*;
@@ -11,8 +12,10 @@ use polars_core::prelude::AnyValue;
11
12
  use crate::prelude::ObjectValue;
12
13
  use crate::Wrap;
13
14
 
15
+ static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
16
+
14
17
  pub(crate) fn register_startup_deps() {
15
- if !registry::is_object_builder_registered() {
18
+ POLARS_REGISTRY_INIT_LOCK.get_or_init(|| {
16
19
  let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
17
20
  Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
18
21
  as Box<dyn AnonymousObjectBuilder>
@@ -24,9 +27,18 @@ pub(crate) fn register_startup_deps() {
24
27
  };
25
28
  Box::new(object) as Box<dyn Any>
26
29
  });
30
+ let rbobject_converter = Arc::new(|av: AnyValue| {
31
+ let object = Wrap(av).into_value();
32
+ Box::new(object) as Box<dyn Any>
33
+ });
27
34
 
28
35
  let object_size = std::mem::size_of::<ObjectValue>();
29
36
  let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
30
- registry::register_object_builder(object_builder, object_converter, physical_dtype)
31
- }
37
+ registry::register_object_builder(
38
+ object_builder,
39
+ object_converter,
40
+ rbobject_converter,
41
+ physical_dtype,
42
+ )
43
+ });
32
44
  }
@@ -27,7 +27,7 @@ impl RbSeries {
27
27
  DataType::Categorical(_, _) | DataType::Enum(_, _) => {
28
28
  RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
29
29
  }
30
- DataType::Object(_, _) => {
30
+ DataType::Object(_) => {
31
31
  let v = RArray::with_capacity(series.len());
32
32
  for i in 0..series.len() {
33
33
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
@@ -105,10 +105,10 @@ impl RbSeries {
105
105
  DataType::Null => {
106
106
  let null: Option<u8> = None;
107
107
  let n = series.len();
108
- let iter = std::iter::repeat(null).take(n);
109
- use std::iter::{Repeat, Take};
108
+ let iter = std::iter::repeat_n(null, n);
109
+ use std::iter::RepeatN;
110
110
  struct NullIter {
111
- iter: Take<Repeat<Option<u8>>>,
111
+ iter: RepeatN<Option<u8>>,
112
112
  n: usize,
113
113
  }
114
114
  impl Iterator for NullIter {
@@ -317,7 +317,7 @@ impl RbSeries {
317
317
 
318
318
  macro_rules! dispatch_apply {
319
319
  ($self:expr, $method:ident, $($args:expr),*) => {
320
- if matches!($self.dtype(), DataType::Object(_, _)) {
320
+ if matches!($self.dtype(), DataType::Object(_)) {
321
321
  // let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
322
322
  // ca.$method($($args),*)
323
323
  todo!()
@@ -484,7 +484,7 @@ impl RbSeries {
484
484
 
485
485
  ca.into_series()
486
486
  }
487
- Some(DataType::Object(_, _)) => {
487
+ Some(DataType::Object(_)) => {
488
488
  let ca =
489
489
  dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
490
490
  ca.into_series()
@@ -481,6 +481,8 @@ module Polars
481
481
  #
482
482
  # @param item [Object]
483
483
  # Item that will be checked for membership
484
+ # @param nulls_equal [Boolean]
485
+ # If true, treat null as a distinct value. Null values will not propagate.
484
486
  #
485
487
  # @return [Expr]
486
488
  #
@@ -501,9 +503,9 @@ module Polars
501
503
  # # │ ["x", "y"] ┆ false │
502
504
  # # │ ["a", "c"] ┆ true │
503
505
  # # └───────────────┴──────────┘
504
- def contains(item)
506
+ def contains(item, nulls_equal: true)
505
507
  item = Utils.parse_into_expression(item, str_as_lit: true)
506
- Utils.wrap_expr(_rbexpr.arr_contains(item))
508
+ Utils.wrap_expr(_rbexpr.arr_contains(item, nulls_equal))
507
509
  end
508
510
 
509
511
  # Count how often the value produced by `element` occurs.
data/lib/polars/expr.rb CHANGED
@@ -1176,8 +1176,8 @@ module Polars
1176
1176
  # # │ 1.0 │
1177
1177
  # # │ 1.2 │
1178
1178
  # # └─────┘
1179
- def round(decimals = 0)
1180
- _from_rbexpr(_rbexpr.round(decimals))
1179
+ def round(decimals = 0, mode: "half_to_even")
1180
+ _from_rbexpr(_rbexpr.round(decimals, mode))
1181
1181
  end
1182
1182
 
1183
1183
  # Compute the dot/inner product between two Expressions.
@@ -1867,7 +1867,7 @@ module Polars
1867
1867
  # # │ 2 ┆ 6 │
1868
1868
  # # └─────┴─────┘
1869
1869
  def forward_fill(limit: nil)
1870
- _from_rbexpr(_rbexpr.forward_fill(limit))
1870
+ fill_null(strategy: "forward", limit: limit)
1871
1871
  end
1872
1872
 
1873
1873
  # Fill missing values with the next to be seen values.
@@ -1897,7 +1897,7 @@ module Polars
1897
1897
  # # │ null ┆ 6 │
1898
1898
  # # └──────┴─────┘
1899
1899
  def backward_fill(limit: nil)
1900
- _from_rbexpr(_rbexpr.backward_fill(limit))
1900
+ fill_null(strategy: "backward", limit: limit)
1901
1901
  end
1902
1902
 
1903
1903
  # Reverse the selection.
@@ -3712,6 +3712,8 @@ module Polars
3712
3712
  #
3713
3713
  # @param other [Object]
3714
3714
  # Series or sequence of primitive type.
3715
+ # @param nulls_equal [Boolean]
3716
+ # If true, treat null as a distinct value. Null values will not propagate.
3715
3717
  #
3716
3718
  # @return [Expr]
3717
3719
  #
@@ -3719,29 +3721,21 @@ module Polars
3719
3721
  # df = Polars::DataFrame.new(
3720
3722
  # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
3721
3723
  # )
3722
- # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
3724
+ # df.with_columns(contains: Polars.col("optional_members").is_in("sets"))
3723
3725
  # # =>
3724
- # # shape: (3, 1)
3725
- # # ┌──────────┐
3726
- # # │ contains │
3727
- # # │ --- │
3728
- # # │ bool │
3729
- # # ╞══════════╡
3730
- # # │ true │
3731
- # # │ true │
3732
- # # │ false │
3733
- # # └──────────┘
3734
- def is_in(other)
3735
- if other.is_a?(::Array)
3736
- if other.length == 0
3737
- other = Polars.lit(nil)._rbexpr
3738
- else
3739
- other = Polars.lit(Series.new(other))._rbexpr
3740
- end
3741
- else
3742
- other = Utils.parse_into_expression(other, str_as_lit: false)
3743
- end
3744
- _from_rbexpr(_rbexpr.is_in(other))
3726
+ # # shape: (3, 3)
3727
+ # # ┌───────────┬──────────────────┬──────────┐
3728
+ # # │ sets ┆ optional_members ┆ contains │
3729
+ # # │ --- ┆ --- ┆ ---
3730
+ # # │ list[i64] ┆ i64 ┆ bool │
3731
+ # # ╞═══════════╪══════════════════╪══════════╡
3732
+ # # │ [1, 2, 3] ┆ 1 ┆ true │
3733
+ # # │ [1, 2] ┆ 2 ┆ true │
3734
+ # # │ [9, 10] ┆ 3 ┆ false │
3735
+ # # └───────────┴──────────────────┴──────────┘
3736
+ def is_in(other, nulls_equal: false)
3737
+ other = Utils.parse_into_expression(other)
3738
+ _from_rbexpr(_rbexpr.is_in(other, nulls_equal))
3745
3739
  end
3746
3740
  alias_method :in?, :is_in
3747
3741
 
@@ -5715,6 +5709,11 @@ module Polars
5715
5709
  # Integer size of the rolling window.
5716
5710
  # @param bias [Boolean]
5717
5711
  # If false, the calculations are corrected for statistical bias.
5712
+ # @param min_samples [Integer]
5713
+ # The number of values in the window that should be non-null before computing
5714
+ # a result. If set to `nil` (default), it will be set equal to `window_size`.
5715
+ # @param center [Boolean]
5716
+ # Set the labels at the center of the window.
5718
5717
  #
5719
5718
  # @return [Expr]
5720
5719
  #
@@ -5733,8 +5732,8 @@ module Polars
5733
5732
  # # │ 0.381802 │
5734
5733
  # # │ 0.47033 │
5735
5734
  # # └──────────┘
5736
- def rolling_skew(window_size, bias: true)
5737
- _from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
5735
+ def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
5736
+ _from_rbexpr(_rbexpr.rolling_skew(window_size, bias, min_samples, center))
5738
5737
  end
5739
5738
 
5740
5739
  # Compute absolute values.
@@ -5889,6 +5888,7 @@ module Polars
5889
5888
  # # │ 20 │
5890
5889
  # # └──────┘
5891
5890
  def diff(n: 1, null_behavior: "ignore")
5891
+ n = Utils.parse_into_expression(n)
5892
5892
  _from_rbexpr(_rbexpr.diff(n, null_behavior))
5893
5893
  end
5894
5894