polars-df 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,61 +1,82 @@
1
- use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
2
- use polars::series::BitRepr;
1
+ use magnus::Value;
2
+ use num_traits::{Float, NumCast};
3
3
  use polars_core::prelude::*;
4
4
 
5
+ use super::numo_rs::{Element, RbArray1};
5
6
  use crate::error::RbPolarsErr;
6
7
  use crate::raise_err;
7
8
  use crate::series::RbSeries;
8
9
  use crate::RbResult;
9
10
 
10
11
  impl RbSeries {
11
- /// For numeric types, this should only be called for Series with null types.
12
- /// This will cast to floats so that `nil = NAN`
12
+ /// Convert this Series to a Numo array.
13
13
  pub fn to_numo(&self) -> RbResult<Value> {
14
- let s = &self.series.borrow();
15
- match s.dtype() {
16
- DataType::String => {
17
- let ca = s.str().unwrap();
14
+ series_to_numo(&self.series.borrow())
15
+ }
16
+ }
17
+
18
+ /// Convert a Series to a Numo array.
19
+ fn series_to_numo(s: &Series) -> RbResult<Value> {
20
+ series_to_numo_with_copy(s)
21
+ }
18
22
 
19
- // TODO make more efficient
20
- let np_arr = RArray::from_iter(ca);
21
- class::object()
22
- .const_get::<_, RModule>("Numo")?
23
- .const_get::<_, RClass>("RObject")?
24
- .funcall("cast", (np_arr,))
25
- }
26
- dt if dt.is_primitive_numeric() => {
27
- if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
- let s = s.cast(&DataType::Float64).unwrap();
29
- let ca = s.f64().unwrap();
30
- // TODO make more efficient
31
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
32
- Some(v) => v,
33
- None => f64::NAN,
34
- }));
35
- class::object()
36
- .const_get::<_, RModule>("Numo")?
37
- .const_get::<_, RClass>("DFloat")?
38
- .funcall("cast", (np_arr,))
39
- } else {
40
- let s = s.cast(&DataType::Float32).unwrap();
41
- let ca = s.f32().unwrap();
42
- // TODO make more efficient
43
- let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
44
- Some(v) => v,
45
- None => f32::NAN,
46
- }));
47
- class::object()
48
- .const_get::<_, RModule>("Numo")?
49
- .const_get::<_, RClass>("SFloat")?
50
- .funcall("cast", (np_arr,))
51
- }
52
- }
53
- dt => {
54
- raise_err!(
55
- format!("'to_numo' not supported for dtype: {dt:?}"),
56
- ComputeError
57
- );
58
- }
23
+ /// Convert a Series to a Numo array, copying data in the process.
24
+ fn series_to_numo_with_copy(s: &Series) -> RbResult<Value> {
25
+ use DataType::*;
26
+ match s.dtype() {
27
+ Int8 => numeric_series_to_numpy::<Int8Type, f32>(s),
28
+ Int16 => numeric_series_to_numpy::<Int16Type, f32>(s),
29
+ Int32 => numeric_series_to_numpy::<Int32Type, f64>(s),
30
+ Int64 => numeric_series_to_numpy::<Int64Type, f64>(s),
31
+ UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(s),
32
+ UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(s),
33
+ UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(s),
34
+ UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(s),
35
+ Float32 => numeric_series_to_numpy::<Float32Type, f32>(s),
36
+ Float64 => numeric_series_to_numpy::<Float64Type, f64>(s),
37
+ Boolean => boolean_series_to_numo(s),
38
+ String => {
39
+ let ca = s.str().unwrap();
40
+ RbArray1::from_iter(ca)
41
+ }
42
+ dt => {
43
+ raise_err!(
44
+ format!("'to_numo' not supported for dtype: {dt:?}"),
45
+ ComputeError
46
+ );
59
47
  }
60
48
  }
61
49
  }
50
+
51
+ /// Convert numeric types to f32 or f64 with NaN representing a null value.
52
+ fn numeric_series_to_numpy<T, U>(s: &Series) -> RbResult<Value>
53
+ where
54
+ T: PolarsNumericType,
55
+ T::Native: Element,
56
+ U: Float + Element,
57
+ {
58
+ let ca: &ChunkedArray<T> = s.as_ref().as_ref();
59
+ if s.null_count() == 0 {
60
+ let values = ca.into_no_null_iter();
61
+ RbArray1::<T::Native>::from_iter(values)
62
+ } else {
63
+ let mapper = |opt_v: Option<T::Native>| match opt_v {
64
+ Some(v) => NumCast::from(v).unwrap(),
65
+ None => U::nan(),
66
+ };
67
+ let values = ca.iter().map(mapper);
68
+ RbArray1::from_iter(values)
69
+ }
70
+ }
71
+
72
+ /// Convert booleans to bit if no nulls are present, otherwise convert to objects.
73
+ fn boolean_series_to_numo(s: &Series) -> RbResult<Value> {
74
+ let ca = s.bool().unwrap();
75
+ if s.null_count() == 0 {
76
+ let values = ca.into_no_null_iter();
77
+ RbArray1::<bool>::from_iter(values)
78
+ } else {
79
+ let values = ca.iter();
80
+ RbArray1::from_iter(values)
81
+ }
82
+ }
@@ -2,12 +2,13 @@ use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConv
2
2
  use polars::io::{HiveOptions, RowIndex};
3
3
  use polars::lazy::frame::LazyFrame;
4
4
  use polars::prelude::*;
5
- use polars_plan::plans::ScanSources;
5
+ use polars_plan::dsl::ScanSources;
6
6
  use std::cell::RefCell;
7
7
  use std::io::BufWriter;
8
8
  use std::num::NonZeroUsize;
9
9
  use std::path::PathBuf;
10
10
 
11
+ use super::SinkTarget;
11
12
  use crate::conversion::*;
12
13
  use crate::expr::rb_exprs_to_exprs;
13
14
  use crate::file::get_file_like;
@@ -374,16 +375,16 @@ impl RbLazyFrame {
374
375
  #[allow(clippy::too_many_arguments)]
375
376
  pub fn sink_parquet(
376
377
  &self,
377
- path: PathBuf,
378
+ target: SinkTarget,
378
379
  compression: String,
379
380
  compression_level: Option<i32>,
380
381
  statistics: Wrap<StatisticsOptions>,
381
382
  row_group_size: Option<usize>,
382
383
  data_page_size: Option<usize>,
383
- maintain_order: bool,
384
384
  cloud_options: Option<Vec<(String, String)>>,
385
385
  retries: usize,
386
- ) -> RbResult<()> {
386
+ sink_options: Wrap<SinkOptions>,
387
+ ) -> RbResult<RbLazyFrame> {
387
388
  let compression = parse_parquet_compression(&compression, compression_level)?;
388
389
 
389
390
  let options = ParquetWriteOptions {
@@ -391,67 +392,83 @@ impl RbLazyFrame {
391
392
  statistics: statistics.0,
392
393
  row_group_size,
393
394
  data_page_size,
394
- maintain_order,
395
395
  };
396
396
 
397
- let cloud_options = {
398
- let cloud_options =
399
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
400
- Some(cloud_options.with_max_retries(retries))
397
+ let cloud_options = match target.base_path() {
398
+ None => None,
399
+ Some(base_path) => {
400
+ let cloud_options = parse_cloud_options(
401
+ base_path.to_str().unwrap(),
402
+ cloud_options.unwrap_or_default(),
403
+ )?;
404
+ Some(cloud_options.with_max_retries(retries))
405
+ }
401
406
  };
402
407
 
403
408
  let ldf = self.ldf.borrow().clone();
404
- ldf.sink_parquet(&path, options, cloud_options)
405
- .map_err(RbPolarsErr::from)?;
406
- Ok(())
409
+ match target {
410
+ SinkTarget::File(target) => {
411
+ ldf.sink_parquet(target, options, cloud_options, sink_options.0)
412
+ }
413
+ }
414
+ .map_err(RbPolarsErr::from)
415
+ .map(Into::into)
416
+ .map_err(Into::into)
407
417
  }
408
418
 
409
419
  pub fn sink_ipc(
410
420
  &self,
411
- path: PathBuf,
421
+ target: SinkTarget,
412
422
  compression: Option<Wrap<IpcCompression>>,
413
- maintain_order: bool,
414
423
  cloud_options: Option<Vec<(String, String)>>,
415
424
  retries: usize,
416
- ) -> RbResult<()> {
425
+ sink_options: Wrap<SinkOptions>,
426
+ ) -> RbResult<RbLazyFrame> {
417
427
  let options = IpcWriterOptions {
418
428
  compression: compression.map(|c| c.0),
419
- maintain_order,
429
+ ..Default::default()
420
430
  };
421
431
 
422
- let cloud_options = {
423
- let cloud_options =
424
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
425
- Some(cloud_options.with_max_retries(retries))
432
+ let cloud_options = match target.base_path() {
433
+ None => None,
434
+ Some(base_path) => {
435
+ let cloud_options = parse_cloud_options(
436
+ base_path.to_str().unwrap(),
437
+ cloud_options.unwrap_or_default(),
438
+ )?;
439
+ Some(cloud_options.with_max_retries(retries))
440
+ }
426
441
  };
427
442
 
428
443
  let ldf = self.ldf.borrow().clone();
429
- ldf.sink_ipc(&path, options, cloud_options)
430
- .map_err(RbPolarsErr::from)?;
431
- Ok(())
432
- }
433
-
434
- #[allow(clippy::too_many_arguments)]
435
- pub fn sink_csv(
436
- &self,
437
- path: PathBuf,
438
- include_bom: bool,
439
- include_header: bool,
440
- separator: u8,
441
- line_terminator: String,
442
- quote_char: u8,
443
- batch_size: Wrap<NonZeroUsize>,
444
- datetime_format: Option<String>,
445
- date_format: Option<String>,
446
- time_format: Option<String>,
447
- float_scientific: Option<bool>,
448
- float_precision: Option<usize>,
449
- null_value: Option<String>,
450
- quote_style: Option<Wrap<QuoteStyle>>,
451
- maintain_order: bool,
452
- ) -> RbResult<()> {
453
- // TODO
454
- let cloud_options = None;
444
+ match target {
445
+ SinkTarget::File(target) => {
446
+ ldf.sink_ipc(target, options, cloud_options, sink_options.0)
447
+ }
448
+ }
449
+ .map_err(RbPolarsErr::from)
450
+ .map(Into::into)
451
+ .map_err(Into::into)
452
+ }
453
+
454
+ pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
455
+ let target = SinkTarget::try_convert(arguments[0])?;
456
+ let include_bom = bool::try_convert(arguments[1])?;
457
+ let include_header = bool::try_convert(arguments[2])?;
458
+ let separator = u8::try_convert(arguments[3])?;
459
+ let line_terminator = String::try_convert(arguments[4])?;
460
+ let quote_char = u8::try_convert(arguments[5])?;
461
+ let batch_size = Wrap::<NonZeroUsize>::try_convert(arguments[6])?;
462
+ let datetime_format = Option::<String>::try_convert(arguments[7])?;
463
+ let date_format = Option::<String>::try_convert(arguments[8])?;
464
+ let time_format = Option::<String>::try_convert(arguments[9])?;
465
+ let float_scientific = Option::<bool>::try_convert(arguments[10])?;
466
+ let float_precision = Option::<usize>::try_convert(arguments[11])?;
467
+ let null_value = Option::<String>::try_convert(arguments[12])?;
468
+ let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
469
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
470
+ let retries = usize::try_convert(arguments[15])?;
471
+ let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
455
472
 
456
473
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
457
474
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -472,42 +489,59 @@ impl RbLazyFrame {
472
489
  let options = CsvWriterOptions {
473
490
  include_bom,
474
491
  include_header,
475
- maintain_order,
476
492
  batch_size: batch_size.0,
477
493
  serialize_options,
478
494
  };
479
495
 
480
- let cloud_options = {
481
- let cloud_options =
482
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
483
- Some(cloud_options)
496
+ let cloud_options = match target.base_path() {
497
+ None => None,
498
+ Some(base_path) => {
499
+ let cloud_options = parse_cloud_options(
500
+ base_path.to_str().unwrap(),
501
+ cloud_options.unwrap_or_default(),
502
+ )?;
503
+ Some(cloud_options.with_max_retries(retries))
504
+ }
484
505
  };
485
506
 
486
507
  let ldf = self.ldf.borrow().clone();
487
- ldf.sink_csv(&path, options, cloud_options)
488
- .map_err(RbPolarsErr::from)?;
489
- Ok(())
508
+ match target {
509
+ SinkTarget::File(target) => {
510
+ ldf.sink_csv(target, options, cloud_options, sink_options.0)
511
+ }
512
+ }
513
+ .map_err(RbPolarsErr::from)
514
+ .map(Into::into)
515
+ .map_err(Into::into)
490
516
  }
491
517
 
492
518
  pub fn sink_json(
493
519
  &self,
494
- path: PathBuf,
495
- maintain_order: bool,
520
+ target: SinkTarget,
496
521
  cloud_options: Option<Vec<(String, String)>>,
497
522
  retries: usize,
498
- ) -> RbResult<()> {
499
- let options = JsonWriterOptions { maintain_order };
500
-
501
- let cloud_options = {
502
- let cloud_options =
503
- parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
504
- Some(cloud_options.with_max_retries(retries))
523
+ sink_options: Wrap<SinkOptions>,
524
+ ) -> RbResult<RbLazyFrame> {
525
+ let options = JsonWriterOptions {};
526
+
527
+ let cloud_options = match target.base_path() {
528
+ None => None,
529
+ Some(base_path) => {
530
+ let cloud_options = parse_cloud_options(
531
+ base_path.to_str().unwrap(),
532
+ cloud_options.unwrap_or_default(),
533
+ )?;
534
+ Some(cloud_options.with_max_retries(retries))
535
+ }
505
536
  };
506
537
 
507
538
  let ldf = self.ldf.borrow().clone();
508
- ldf.sink_json(&path, options, cloud_options)
509
- .map_err(RbPolarsErr::from)?;
510
- Ok(())
539
+ match target {
540
+ SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
541
+ }
542
+ .map_err(RbPolarsErr::from)
543
+ .map(Into::into)
544
+ .map_err(Into::into)
511
545
  }
512
546
 
513
547
  pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
@@ -679,6 +713,7 @@ impl RbLazyFrame {
679
713
  how: Wrap<JoinType>,
680
714
  suffix: String,
681
715
  validate: Wrap<JoinValidation>,
716
+ maintain_order: Wrap<MaintainOrderJoin>,
682
717
  coalesce: Option<bool>,
683
718
  ) -> RbResult<Self> {
684
719
  let coalesce = match coalesce {
@@ -702,6 +737,7 @@ impl RbLazyFrame {
702
737
  .how(how.0)
703
738
  .validate(validate.0)
704
739
  .coalesce(coalesce)
740
+ .maintain_order(maintain_order.0)
705
741
  .suffix(suffix)
706
742
  .finish()
707
743
  .into())
@@ -1,7 +1,9 @@
1
1
  mod general;
2
2
  mod serde;
3
+ mod sink;
3
4
 
4
5
  use polars::lazy::frame::LazyFrame;
6
+ pub use sink::SinkTarget;
5
7
  use std::cell::RefCell;
6
8
 
7
9
  #[magnus::wrap(class = "Polars::RbLazyFrame")]
@@ -0,0 +1,99 @@
1
+ use std::path::{Path, PathBuf};
2
+ use std::sync::{Arc, Mutex};
3
+
4
+ use magnus::{RHash, TryConvert, Value};
5
+ use polars::prelude::sync_on_close::SyncOnCloseType;
6
+ use polars::prelude::{SinkOptions, SpecialEq};
7
+
8
+ use crate::prelude::Wrap;
9
+ use crate::{RbResult, RbValueError};
10
+
11
+ #[derive(Clone)]
12
+ pub enum SinkTarget {
13
+ File(polars_plan::dsl::SinkTarget),
14
+ }
15
+
16
+ impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
17
+ fn try_convert(ob: Value) -> RbResult<Self> {
18
+ if let Ok(v) = PathBuf::try_convert(ob) {
19
+ Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
20
+ } else {
21
+ let writer = {
22
+ let rb_f = ob;
23
+ RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
24
+ }?;
25
+
26
+ Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
27
+ Arc::new(Mutex::new(Some(writer))),
28
+ ))))
29
+ }
30
+ }
31
+ }
32
+
33
+ impl TryConvert for SinkTarget {
34
+ fn try_convert(ob: Value) -> RbResult<Self> {
35
+ Ok(Self::File(
36
+ <Wrap<polars_plan::dsl::SinkTarget>>::try_convert(ob)?.0,
37
+ ))
38
+ }
39
+ }
40
+
41
+ impl SinkTarget {
42
+ pub fn base_path(&self) -> Option<&Path> {
43
+ match self {
44
+ Self::File(t) => match t {
45
+ polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
46
+ polars::prelude::SinkTarget::Dyn(_) => None,
47
+ },
48
+ }
49
+ }
50
+ }
51
+
52
+ impl TryConvert for Wrap<SyncOnCloseType> {
53
+ fn try_convert(ob: Value) -> RbResult<Self> {
54
+ let parsed = match String::try_convert(ob)?.as_str() {
55
+ "none" => SyncOnCloseType::None,
56
+ "data" => SyncOnCloseType::Data,
57
+ "all" => SyncOnCloseType::All,
58
+ v => {
59
+ return Err(RbValueError::new_err(format!(
60
+ "`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}",
61
+ )));
62
+ }
63
+ };
64
+ Ok(Wrap(parsed))
65
+ }
66
+ }
67
+
68
+ impl TryConvert for Wrap<SinkOptions> {
69
+ fn try_convert(ob: Value) -> RbResult<Self> {
70
+ let parsed = RHash::try_convert(ob)?;
71
+
72
+ if parsed.len() != 3 {
73
+ return Err(RbValueError::new_err(
74
+ "`sink_options` must be a dictionary with the exactly 3 field.",
75
+ ));
76
+ }
77
+
78
+ let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| {
79
+ RbValueError::new_err("`sink_options` must contain `sync_on_close` field")
80
+ })?;
81
+ let sync_on_close = Wrap::<SyncOnCloseType>::try_convert(sync_on_close)?.0;
82
+
83
+ let maintain_order = parsed.get("maintain_order").ok_or_else(|| {
84
+ RbValueError::new_err("`sink_options` must contain `maintain_order` field")
85
+ })?;
86
+ let maintain_order = bool::try_convert(maintain_order)?;
87
+
88
+ let mkdir = parsed
89
+ .get("mkdir")
90
+ .ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?;
91
+ let mkdir = bool::try_convert(mkdir)?;
92
+
93
+ Ok(Wrap(SinkOptions {
94
+ sync_on_close,
95
+ maintain_order,
96
+ mkdir,
97
+ }))
98
+ }
99
+ }
@@ -221,8 +221,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
221
221
  class.define_method("gather", method!(RbExpr::gather, 1))?;
222
222
  class.define_method("get", method!(RbExpr::get, 1))?;
223
223
  class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
224
- class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
225
- class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
226
224
  class.define_method("shift", method!(RbExpr::shift, 2))?;
227
225
  class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
228
226
  class.define_method(
@@ -248,7 +246,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
248
246
  class.define_method("slice", method!(RbExpr::slice, 2))?;
249
247
  class.define_method("append", method!(RbExpr::append, 2))?;
250
248
  class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
251
- class.define_method("round", method!(RbExpr::round, 1))?;
249
+ class.define_method("round", method!(RbExpr::round, 2))?;
252
250
  class.define_method("floor", method!(RbExpr::floor, 0))?;
253
251
  class.define_method("ceil", method!(RbExpr::ceil, 0))?;
254
252
  class.define_method("clip", method!(RbExpr::clip, 2))?;
@@ -271,7 +269,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
271
269
  class.define_method("_and", method!(RbExpr::_and, 1))?;
272
270
  class.define_method("_xor", method!(RbExpr::_xor, 1))?;
273
271
  class.define_method("_or", method!(RbExpr::_or, 1))?;
274
- class.define_method("is_in", method!(RbExpr::is_in, 1))?;
272
+ class.define_method("is_in", method!(RbExpr::is_in, 2))?;
275
273
  class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
276
274
  class.define_method("pow", method!(RbExpr::pow, 1))?;
277
275
  class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?;
@@ -430,6 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
430
428
  class.define_method("mode", method!(RbExpr::mode, 0))?;
431
429
  class.define_method("exclude", method!(RbExpr::exclude, 1))?;
432
430
  class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
431
+ class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
433
432
  class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
434
433
  class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
435
434
  class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
@@ -449,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
449
448
  "rolling_quantile_by",
450
449
  method!(RbExpr::rolling_quantile_by, 6),
451
450
  )?;
452
- class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 2))?;
451
+ class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 4))?;
453
452
  class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
454
453
  class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
455
454
  class.define_method("list_max", method!(RbExpr::list_max, 0))?;
@@ -558,7 +557,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
558
557
  class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
559
558
  class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
560
559
  class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
561
- class.define_singleton_method("fold", function!(functions::lazy::fold, 3))?;
560
+ class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
562
561
  class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
563
562
  class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?;
564
563
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
@@ -614,6 +613,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
614
613
  "concat_lf_diagonal",
615
614
  function!(functions::lazy::concat_lf_diagonal, 4),
616
615
  )?;
616
+ class.define_singleton_method(
617
+ "concat_lf_horizontal",
618
+ function!(functions::lazy::concat_lf_horizontal, 2),
619
+ )?;
617
620
  class.define_singleton_method("concat_df", function!(functions::eager::concat_df, 1))?;
618
621
  class.define_singleton_method("concat_lf", function!(functions::lazy::concat_lf, 4))?;
619
622
  class.define_singleton_method(
@@ -745,7 +748,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
745
748
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
746
749
  class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
747
750
  class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
748
- class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
751
+ class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
749
752
  class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
750
753
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
751
754
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
@@ -759,7 +762,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
759
762
  )?;
760
763
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
761
764
  class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
762
- class.define_method("join", method!(RbLazyFrame::join, 10))?;
765
+ class.define_method("join", method!(RbLazyFrame::join, 11))?;
763
766
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
764
767
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
765
768
  class.define_method(
@@ -245,7 +245,7 @@ fn iterator_to_list(
245
245
  match opt_val {
246
246
  None => builder.append_null(),
247
247
  Some(s) => {
248
- if s.len() == 0 && s.dtype() != dt {
248
+ if s.is_empty() && s.dtype() != dt {
249
249
  builder
250
250
  .append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
251
251
  .unwrap()
@@ -372,7 +372,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
372
372
  first_value: AnyValue<'a>,
373
373
  ) -> RbResult<Series> {
374
374
  let mut avs = Vec::with_capacity(self.len());
375
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
375
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
376
376
  avs.push(first_value);
377
377
 
378
378
  if self.null_count() > 0 {
@@ -656,7 +656,7 @@ where
656
656
  first_value: AnyValue<'a>,
657
657
  ) -> RbResult<Series> {
658
658
  let mut avs = Vec::with_capacity(self.len());
659
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
659
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
660
660
  avs.push(first_value);
661
661
 
662
662
  if self.null_count() > 0 {
@@ -935,7 +935,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
935
935
  first_value: AnyValue<'a>,
936
936
  ) -> RbResult<Series> {
937
937
  let mut avs = Vec::with_capacity(self.len());
938
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
938
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
939
939
  avs.push(first_value);
940
940
 
941
941
  if self.null_count() > 0 {
@@ -1132,7 +1132,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
1132
1132
  first_value: AnyValue<'a>,
1133
1133
  ) -> RbResult<Series> {
1134
1134
  let mut avs = Vec::with_capacity(self.len());
1135
- avs.extend(std::iter::repeat(AnyValue::Null).take(init_null_count));
1135
+ avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
1136
1136
  avs.push(first_value);
1137
1137
 
1138
1138
  let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
@@ -1,5 +1,6 @@
1
1
  use std::any::Any;
2
2
  use std::sync::Arc;
3
+ use std::sync::OnceLock;
3
4
 
4
5
  use magnus::IntoValue;
5
6
  use polars::prelude::*;
@@ -11,8 +12,10 @@ use polars_core::prelude::AnyValue;
11
12
  use crate::prelude::ObjectValue;
12
13
  use crate::Wrap;
13
14
 
15
+ static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
16
+
14
17
  pub(crate) fn register_startup_deps() {
15
- if !registry::is_object_builder_registered() {
18
+ POLARS_REGISTRY_INIT_LOCK.get_or_init(|| {
16
19
  let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
17
20
  Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
18
21
  as Box<dyn AnonymousObjectBuilder>
@@ -24,9 +27,18 @@ pub(crate) fn register_startup_deps() {
24
27
  };
25
28
  Box::new(object) as Box<dyn Any>
26
29
  });
30
+ let rbobject_converter = Arc::new(|av: AnyValue| {
31
+ let object = Wrap(av).into_value();
32
+ Box::new(object) as Box<dyn Any>
33
+ });
27
34
 
28
35
  let object_size = std::mem::size_of::<ObjectValue>();
29
36
  let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
30
- registry::register_object_builder(object_builder, object_converter, physical_dtype)
31
- }
37
+ registry::register_object_builder(
38
+ object_builder,
39
+ object_converter,
40
+ rbobject_converter,
41
+ physical_dtype,
42
+ )
43
+ });
32
44
  }