polars-df 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +668 -377
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +10 -8
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +38 -5
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +9 -6
- data/ext/polars/src/expr/general.rs +16 -14
- data/ext/polars/src/expr/rolling.rs +17 -2
- data/ext/polars/src/file.rs +56 -14
- data/ext/polars/src/functions/lazy.rs +30 -2
- data/ext/polars/src/interop/numo/mod.rs +1 -0
- data/ext/polars/src/interop/numo/numo_rs.rs +52 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +69 -48
- data/ext/polars/src/lazyframe/general.rs +102 -66
- data/ext/polars/src/lazyframe/mod.rs +2 -0
- data/ext/polars/src/lazyframe/sink.rs +99 -0
- data/ext/polars/src/lib.rs +11 -8
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +4 -4
- data/ext/polars/src/on_startup.rs +15 -3
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/series/general.rs +2 -2
- data/lib/polars/data_frame.rb +304 -6
- data/lib/polars/expr.rb +58 -19
- data/lib/polars/functions/eager.rb +145 -16
- data/lib/polars/io/database.rb +17 -0
- data/lib/polars/lazy_frame.rb +135 -18
- data/lib/polars/list_expr.rb +4 -7
- data/lib/polars/schema.rb +29 -0
- data/lib/polars/series.rb +36 -32
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +1 -0
- metadata +6 -3
@@ -1,61 +1,82 @@
|
|
1
|
-
use magnus::
|
2
|
-
use
|
1
|
+
use magnus::Value;
|
2
|
+
use num_traits::{Float, NumCast};
|
3
3
|
use polars_core::prelude::*;
|
4
4
|
|
5
|
+
use super::numo_rs::{Element, RbArray1};
|
5
6
|
use crate::error::RbPolarsErr;
|
6
7
|
use crate::raise_err;
|
7
8
|
use crate::series::RbSeries;
|
8
9
|
use crate::RbResult;
|
9
10
|
|
10
11
|
impl RbSeries {
|
11
|
-
///
|
12
|
-
/// This will cast to floats so that `nil = NAN`
|
12
|
+
/// Convert this Series to a Numo array.
|
13
13
|
pub fn to_numo(&self) -> RbResult<Value> {
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
series_to_numo(&self.series.borrow())
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
/// Convert a Series to a Numo array.
|
19
|
+
fn series_to_numo(s: &Series) -> RbResult<Value> {
|
20
|
+
series_to_numo_with_copy(s)
|
21
|
+
}
|
18
22
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
44
|
-
Some(v) => v,
|
45
|
-
None => f32::NAN,
|
46
|
-
}));
|
47
|
-
class::object()
|
48
|
-
.const_get::<_, RModule>("Numo")?
|
49
|
-
.const_get::<_, RClass>("SFloat")?
|
50
|
-
.funcall("cast", (np_arr,))
|
51
|
-
}
|
52
|
-
}
|
53
|
-
dt => {
|
54
|
-
raise_err!(
|
55
|
-
format!("'to_numo' not supported for dtype: {dt:?}"),
|
56
|
-
ComputeError
|
57
|
-
);
|
58
|
-
}
|
23
|
+
/// Convert a Series to a Numo array, copying data in the process.
|
24
|
+
fn series_to_numo_with_copy(s: &Series) -> RbResult<Value> {
|
25
|
+
use DataType::*;
|
26
|
+
match s.dtype() {
|
27
|
+
Int8 => numeric_series_to_numpy::<Int8Type, f32>(s),
|
28
|
+
Int16 => numeric_series_to_numpy::<Int16Type, f32>(s),
|
29
|
+
Int32 => numeric_series_to_numpy::<Int32Type, f64>(s),
|
30
|
+
Int64 => numeric_series_to_numpy::<Int64Type, f64>(s),
|
31
|
+
UInt8 => numeric_series_to_numpy::<UInt8Type, f32>(s),
|
32
|
+
UInt16 => numeric_series_to_numpy::<UInt16Type, f32>(s),
|
33
|
+
UInt32 => numeric_series_to_numpy::<UInt32Type, f64>(s),
|
34
|
+
UInt64 => numeric_series_to_numpy::<UInt64Type, f64>(s),
|
35
|
+
Float32 => numeric_series_to_numpy::<Float32Type, f32>(s),
|
36
|
+
Float64 => numeric_series_to_numpy::<Float64Type, f64>(s),
|
37
|
+
Boolean => boolean_series_to_numo(s),
|
38
|
+
String => {
|
39
|
+
let ca = s.str().unwrap();
|
40
|
+
RbArray1::from_iter(ca)
|
41
|
+
}
|
42
|
+
dt => {
|
43
|
+
raise_err!(
|
44
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
45
|
+
ComputeError
|
46
|
+
);
|
59
47
|
}
|
60
48
|
}
|
61
49
|
}
|
50
|
+
|
51
|
+
/// Convert numeric types to f32 or f64 with NaN representing a null value.
|
52
|
+
fn numeric_series_to_numpy<T, U>(s: &Series) -> RbResult<Value>
|
53
|
+
where
|
54
|
+
T: PolarsNumericType,
|
55
|
+
T::Native: Element,
|
56
|
+
U: Float + Element,
|
57
|
+
{
|
58
|
+
let ca: &ChunkedArray<T> = s.as_ref().as_ref();
|
59
|
+
if s.null_count() == 0 {
|
60
|
+
let values = ca.into_no_null_iter();
|
61
|
+
RbArray1::<T::Native>::from_iter(values)
|
62
|
+
} else {
|
63
|
+
let mapper = |opt_v: Option<T::Native>| match opt_v {
|
64
|
+
Some(v) => NumCast::from(v).unwrap(),
|
65
|
+
None => U::nan(),
|
66
|
+
};
|
67
|
+
let values = ca.iter().map(mapper);
|
68
|
+
RbArray1::from_iter(values)
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
/// Convert booleans to bit if no nulls are present, otherwise convert to objects.
|
73
|
+
fn boolean_series_to_numo(s: &Series) -> RbResult<Value> {
|
74
|
+
let ca = s.bool().unwrap();
|
75
|
+
if s.null_count() == 0 {
|
76
|
+
let values = ca.into_no_null_iter();
|
77
|
+
RbArray1::<bool>::from_iter(values)
|
78
|
+
} else {
|
79
|
+
let values = ca.iter();
|
80
|
+
RbArray1::from_iter(values)
|
81
|
+
}
|
82
|
+
}
|
@@ -2,12 +2,13 @@ use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConv
|
|
2
2
|
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_plan::
|
5
|
+
use polars_plan::dsl::ScanSources;
|
6
6
|
use std::cell::RefCell;
|
7
7
|
use std::io::BufWriter;
|
8
8
|
use std::num::NonZeroUsize;
|
9
9
|
use std::path::PathBuf;
|
10
10
|
|
11
|
+
use super::SinkTarget;
|
11
12
|
use crate::conversion::*;
|
12
13
|
use crate::expr::rb_exprs_to_exprs;
|
13
14
|
use crate::file::get_file_like;
|
@@ -374,16 +375,16 @@ impl RbLazyFrame {
|
|
374
375
|
#[allow(clippy::too_many_arguments)]
|
375
376
|
pub fn sink_parquet(
|
376
377
|
&self,
|
377
|
-
|
378
|
+
target: SinkTarget,
|
378
379
|
compression: String,
|
379
380
|
compression_level: Option<i32>,
|
380
381
|
statistics: Wrap<StatisticsOptions>,
|
381
382
|
row_group_size: Option<usize>,
|
382
383
|
data_page_size: Option<usize>,
|
383
|
-
maintain_order: bool,
|
384
384
|
cloud_options: Option<Vec<(String, String)>>,
|
385
385
|
retries: usize,
|
386
|
-
|
386
|
+
sink_options: Wrap<SinkOptions>,
|
387
|
+
) -> RbResult<RbLazyFrame> {
|
387
388
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
388
389
|
|
389
390
|
let options = ParquetWriteOptions {
|
@@ -391,67 +392,83 @@ impl RbLazyFrame {
|
|
391
392
|
statistics: statistics.0,
|
392
393
|
row_group_size,
|
393
394
|
data_page_size,
|
394
|
-
maintain_order,
|
395
395
|
};
|
396
396
|
|
397
|
-
let cloud_options = {
|
398
|
-
|
399
|
-
|
400
|
-
|
397
|
+
let cloud_options = match target.base_path() {
|
398
|
+
None => None,
|
399
|
+
Some(base_path) => {
|
400
|
+
let cloud_options = parse_cloud_options(
|
401
|
+
base_path.to_str().unwrap(),
|
402
|
+
cloud_options.unwrap_or_default(),
|
403
|
+
)?;
|
404
|
+
Some(cloud_options.with_max_retries(retries))
|
405
|
+
}
|
401
406
|
};
|
402
407
|
|
403
408
|
let ldf = self.ldf.borrow().clone();
|
404
|
-
|
405
|
-
|
406
|
-
|
409
|
+
match target {
|
410
|
+
SinkTarget::File(target) => {
|
411
|
+
ldf.sink_parquet(target, options, cloud_options, sink_options.0)
|
412
|
+
}
|
413
|
+
}
|
414
|
+
.map_err(RbPolarsErr::from)
|
415
|
+
.map(Into::into)
|
416
|
+
.map_err(Into::into)
|
407
417
|
}
|
408
418
|
|
409
419
|
pub fn sink_ipc(
|
410
420
|
&self,
|
411
|
-
|
421
|
+
target: SinkTarget,
|
412
422
|
compression: Option<Wrap<IpcCompression>>,
|
413
|
-
maintain_order: bool,
|
414
423
|
cloud_options: Option<Vec<(String, String)>>,
|
415
424
|
retries: usize,
|
416
|
-
|
425
|
+
sink_options: Wrap<SinkOptions>,
|
426
|
+
) -> RbResult<RbLazyFrame> {
|
417
427
|
let options = IpcWriterOptions {
|
418
428
|
compression: compression.map(|c| c.0),
|
419
|
-
|
429
|
+
..Default::default()
|
420
430
|
};
|
421
431
|
|
422
|
-
let cloud_options = {
|
423
|
-
|
424
|
-
|
425
|
-
|
432
|
+
let cloud_options = match target.base_path() {
|
433
|
+
None => None,
|
434
|
+
Some(base_path) => {
|
435
|
+
let cloud_options = parse_cloud_options(
|
436
|
+
base_path.to_str().unwrap(),
|
437
|
+
cloud_options.unwrap_or_default(),
|
438
|
+
)?;
|
439
|
+
Some(cloud_options.with_max_retries(retries))
|
440
|
+
}
|
426
441
|
};
|
427
442
|
|
428
443
|
let ldf = self.ldf.borrow().clone();
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
let cloud_options =
|
444
|
+
match target {
|
445
|
+
SinkTarget::File(target) => {
|
446
|
+
ldf.sink_ipc(target, options, cloud_options, sink_options.0)
|
447
|
+
}
|
448
|
+
}
|
449
|
+
.map_err(RbPolarsErr::from)
|
450
|
+
.map(Into::into)
|
451
|
+
.map_err(Into::into)
|
452
|
+
}
|
453
|
+
|
454
|
+
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
|
455
|
+
let target = SinkTarget::try_convert(arguments[0])?;
|
456
|
+
let include_bom = bool::try_convert(arguments[1])?;
|
457
|
+
let include_header = bool::try_convert(arguments[2])?;
|
458
|
+
let separator = u8::try_convert(arguments[3])?;
|
459
|
+
let line_terminator = String::try_convert(arguments[4])?;
|
460
|
+
let quote_char = u8::try_convert(arguments[5])?;
|
461
|
+
let batch_size = Wrap::<NonZeroUsize>::try_convert(arguments[6])?;
|
462
|
+
let datetime_format = Option::<String>::try_convert(arguments[7])?;
|
463
|
+
let date_format = Option::<String>::try_convert(arguments[8])?;
|
464
|
+
let time_format = Option::<String>::try_convert(arguments[9])?;
|
465
|
+
let float_scientific = Option::<bool>::try_convert(arguments[10])?;
|
466
|
+
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
467
|
+
let null_value = Option::<String>::try_convert(arguments[12])?;
|
468
|
+
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
|
469
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
|
470
|
+
let retries = usize::try_convert(arguments[15])?;
|
471
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
|
455
472
|
|
456
473
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
457
474
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -472,42 +489,59 @@ impl RbLazyFrame {
|
|
472
489
|
let options = CsvWriterOptions {
|
473
490
|
include_bom,
|
474
491
|
include_header,
|
475
|
-
maintain_order,
|
476
492
|
batch_size: batch_size.0,
|
477
493
|
serialize_options,
|
478
494
|
};
|
479
495
|
|
480
|
-
let cloud_options = {
|
481
|
-
|
482
|
-
|
483
|
-
|
496
|
+
let cloud_options = match target.base_path() {
|
497
|
+
None => None,
|
498
|
+
Some(base_path) => {
|
499
|
+
let cloud_options = parse_cloud_options(
|
500
|
+
base_path.to_str().unwrap(),
|
501
|
+
cloud_options.unwrap_or_default(),
|
502
|
+
)?;
|
503
|
+
Some(cloud_options.with_max_retries(retries))
|
504
|
+
}
|
484
505
|
};
|
485
506
|
|
486
507
|
let ldf = self.ldf.borrow().clone();
|
487
|
-
|
488
|
-
|
489
|
-
|
508
|
+
match target {
|
509
|
+
SinkTarget::File(target) => {
|
510
|
+
ldf.sink_csv(target, options, cloud_options, sink_options.0)
|
511
|
+
}
|
512
|
+
}
|
513
|
+
.map_err(RbPolarsErr::from)
|
514
|
+
.map(Into::into)
|
515
|
+
.map_err(Into::into)
|
490
516
|
}
|
491
517
|
|
492
518
|
pub fn sink_json(
|
493
519
|
&self,
|
494
|
-
|
495
|
-
maintain_order: bool,
|
520
|
+
target: SinkTarget,
|
496
521
|
cloud_options: Option<Vec<(String, String)>>,
|
497
522
|
retries: usize,
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
Some(
|
523
|
+
sink_options: Wrap<SinkOptions>,
|
524
|
+
) -> RbResult<RbLazyFrame> {
|
525
|
+
let options = JsonWriterOptions {};
|
526
|
+
|
527
|
+
let cloud_options = match target.base_path() {
|
528
|
+
None => None,
|
529
|
+
Some(base_path) => {
|
530
|
+
let cloud_options = parse_cloud_options(
|
531
|
+
base_path.to_str().unwrap(),
|
532
|
+
cloud_options.unwrap_or_default(),
|
533
|
+
)?;
|
534
|
+
Some(cloud_options.with_max_retries(retries))
|
535
|
+
}
|
505
536
|
};
|
506
537
|
|
507
538
|
let ldf = self.ldf.borrow().clone();
|
508
|
-
|
509
|
-
.
|
510
|
-
|
539
|
+
match target {
|
540
|
+
SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
|
541
|
+
}
|
542
|
+
.map_err(RbPolarsErr::from)
|
543
|
+
.map(Into::into)
|
544
|
+
.map_err(Into::into)
|
511
545
|
}
|
512
546
|
|
513
547
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
@@ -679,6 +713,7 @@ impl RbLazyFrame {
|
|
679
713
|
how: Wrap<JoinType>,
|
680
714
|
suffix: String,
|
681
715
|
validate: Wrap<JoinValidation>,
|
716
|
+
maintain_order: Wrap<MaintainOrderJoin>,
|
682
717
|
coalesce: Option<bool>,
|
683
718
|
) -> RbResult<Self> {
|
684
719
|
let coalesce = match coalesce {
|
@@ -702,6 +737,7 @@ impl RbLazyFrame {
|
|
702
737
|
.how(how.0)
|
703
738
|
.validate(validate.0)
|
704
739
|
.coalesce(coalesce)
|
740
|
+
.maintain_order(maintain_order.0)
|
705
741
|
.suffix(suffix)
|
706
742
|
.finish()
|
707
743
|
.into())
|
@@ -0,0 +1,99 @@
|
|
1
|
+
use std::path::{Path, PathBuf};
|
2
|
+
use std::sync::{Arc, Mutex};
|
3
|
+
|
4
|
+
use magnus::{RHash, TryConvert, Value};
|
5
|
+
use polars::prelude::sync_on_close::SyncOnCloseType;
|
6
|
+
use polars::prelude::{SinkOptions, SpecialEq};
|
7
|
+
|
8
|
+
use crate::prelude::Wrap;
|
9
|
+
use crate::{RbResult, RbValueError};
|
10
|
+
|
11
|
+
#[derive(Clone)]
|
12
|
+
pub enum SinkTarget {
|
13
|
+
File(polars_plan::dsl::SinkTarget),
|
14
|
+
}
|
15
|
+
|
16
|
+
impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
17
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
+
if let Ok(v) = PathBuf::try_convert(ob) {
|
19
|
+
Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
|
20
|
+
} else {
|
21
|
+
let writer = {
|
22
|
+
let rb_f = ob;
|
23
|
+
RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
|
24
|
+
}?;
|
25
|
+
|
26
|
+
Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
|
27
|
+
Arc::new(Mutex::new(Some(writer))),
|
28
|
+
))))
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
impl TryConvert for SinkTarget {
|
34
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
35
|
+
Ok(Self::File(
|
36
|
+
<Wrap<polars_plan::dsl::SinkTarget>>::try_convert(ob)?.0,
|
37
|
+
))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
impl SinkTarget {
|
42
|
+
pub fn base_path(&self) -> Option<&Path> {
|
43
|
+
match self {
|
44
|
+
Self::File(t) => match t {
|
45
|
+
polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
|
46
|
+
polars::prelude::SinkTarget::Dyn(_) => None,
|
47
|
+
},
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
impl TryConvert for Wrap<SyncOnCloseType> {
|
53
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
54
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
55
|
+
"none" => SyncOnCloseType::None,
|
56
|
+
"data" => SyncOnCloseType::Data,
|
57
|
+
"all" => SyncOnCloseType::All,
|
58
|
+
v => {
|
59
|
+
return Err(RbValueError::new_err(format!(
|
60
|
+
"`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}",
|
61
|
+
)));
|
62
|
+
}
|
63
|
+
};
|
64
|
+
Ok(Wrap(parsed))
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
impl TryConvert for Wrap<SinkOptions> {
|
69
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
70
|
+
let parsed = RHash::try_convert(ob)?;
|
71
|
+
|
72
|
+
if parsed.len() != 3 {
|
73
|
+
return Err(RbValueError::new_err(
|
74
|
+
"`sink_options` must be a dictionary with the exactly 3 field.",
|
75
|
+
));
|
76
|
+
}
|
77
|
+
|
78
|
+
let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| {
|
79
|
+
RbValueError::new_err("`sink_options` must contain `sync_on_close` field")
|
80
|
+
})?;
|
81
|
+
let sync_on_close = Wrap::<SyncOnCloseType>::try_convert(sync_on_close)?.0;
|
82
|
+
|
83
|
+
let maintain_order = parsed.get("maintain_order").ok_or_else(|| {
|
84
|
+
RbValueError::new_err("`sink_options` must contain `maintain_order` field")
|
85
|
+
})?;
|
86
|
+
let maintain_order = bool::try_convert(maintain_order)?;
|
87
|
+
|
88
|
+
let mkdir = parsed
|
89
|
+
.get("mkdir")
|
90
|
+
.ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?;
|
91
|
+
let mkdir = bool::try_convert(mkdir)?;
|
92
|
+
|
93
|
+
Ok(Wrap(SinkOptions {
|
94
|
+
sync_on_close,
|
95
|
+
maintain_order,
|
96
|
+
mkdir,
|
97
|
+
}))
|
98
|
+
}
|
99
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -221,8 +221,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
221
221
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
222
222
|
class.define_method("get", method!(RbExpr::get, 1))?;
|
223
223
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
224
|
-
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
225
|
-
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
226
224
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
227
225
|
class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
|
228
226
|
class.define_method(
|
@@ -248,7 +246,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
248
246
|
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
249
247
|
class.define_method("append", method!(RbExpr::append, 2))?;
|
250
248
|
class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
|
251
|
-
class.define_method("round", method!(RbExpr::round,
|
249
|
+
class.define_method("round", method!(RbExpr::round, 2))?;
|
252
250
|
class.define_method("floor", method!(RbExpr::floor, 0))?;
|
253
251
|
class.define_method("ceil", method!(RbExpr::ceil, 0))?;
|
254
252
|
class.define_method("clip", method!(RbExpr::clip, 2))?;
|
@@ -271,7 +269,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
271
269
|
class.define_method("_and", method!(RbExpr::_and, 1))?;
|
272
270
|
class.define_method("_xor", method!(RbExpr::_xor, 1))?;
|
273
271
|
class.define_method("_or", method!(RbExpr::_or, 1))?;
|
274
|
-
class.define_method("is_in", method!(RbExpr::is_in,
|
272
|
+
class.define_method("is_in", method!(RbExpr::is_in, 2))?;
|
275
273
|
class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
|
276
274
|
class.define_method("pow", method!(RbExpr::pow, 1))?;
|
277
275
|
class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?;
|
@@ -430,6 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
430
428
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
431
429
|
class.define_method("exclude", method!(RbExpr::exclude, 1))?;
|
432
430
|
class.define_method("interpolate", method!(RbExpr::interpolate, 1))?;
|
431
|
+
class.define_method("interpolate_by", method!(RbExpr::interpolate_by, 1))?;
|
433
432
|
class.define_method("rolling_sum", method!(RbExpr::rolling_sum, 4))?;
|
434
433
|
class.define_method("rolling_sum_by", method!(RbExpr::rolling_sum_by, 4))?;
|
435
434
|
class.define_method("rolling_min", method!(RbExpr::rolling_min, 4))?;
|
@@ -449,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
449
448
|
"rolling_quantile_by",
|
450
449
|
method!(RbExpr::rolling_quantile_by, 6),
|
451
450
|
)?;
|
452
|
-
class.define_method("rolling_skew", method!(RbExpr::rolling_skew,
|
451
|
+
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 4))?;
|
453
452
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
454
453
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
455
454
|
class.define_method("list_max", method!(RbExpr::list_max, 0))?;
|
@@ -558,7 +557,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
558
557
|
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
559
558
|
class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
|
560
559
|
class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
|
561
|
-
class.define_singleton_method("fold", function!(functions::lazy::fold,
|
560
|
+
class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
|
562
561
|
class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
|
563
562
|
class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?;
|
564
563
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
@@ -614,6 +613,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
614
613
|
"concat_lf_diagonal",
|
615
614
|
function!(functions::lazy::concat_lf_diagonal, 4),
|
616
615
|
)?;
|
616
|
+
class.define_singleton_method(
|
617
|
+
"concat_lf_horizontal",
|
618
|
+
function!(functions::lazy::concat_lf_horizontal, 2),
|
619
|
+
)?;
|
617
620
|
class.define_singleton_method("concat_df", function!(functions::eager::concat_df, 1))?;
|
618
621
|
class.define_singleton_method("concat_lf", function!(functions::lazy::concat_lf, 4))?;
|
619
622
|
class.define_singleton_method(
|
@@ -745,7 +748,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
745
748
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
746
749
|
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
750
|
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
748
|
-
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv,
|
751
|
+
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, -1))?;
|
749
752
|
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
750
753
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
751
754
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
@@ -759,7 +762,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
759
762
|
)?;
|
760
763
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
761
764
|
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
|
762
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
765
|
+
class.define_method("join", method!(RbLazyFrame::join, 11))?;
|
763
766
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
764
767
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
765
768
|
class.define_method(
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -245,7 +245,7 @@ fn iterator_to_list(
|
|
245
245
|
match opt_val {
|
246
246
|
None => builder.append_null(),
|
247
247
|
Some(s) => {
|
248
|
-
if s.
|
248
|
+
if s.is_empty() && s.dtype() != dt {
|
249
249
|
builder
|
250
250
|
.append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
|
251
251
|
.unwrap()
|
@@ -372,7 +372,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
372
372
|
first_value: AnyValue<'a>,
|
373
373
|
) -> RbResult<Series> {
|
374
374
|
let mut avs = Vec::with_capacity(self.len());
|
375
|
-
avs.extend(std::iter::
|
375
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
376
376
|
avs.push(first_value);
|
377
377
|
|
378
378
|
if self.null_count() > 0 {
|
@@ -656,7 +656,7 @@ where
|
|
656
656
|
first_value: AnyValue<'a>,
|
657
657
|
) -> RbResult<Series> {
|
658
658
|
let mut avs = Vec::with_capacity(self.len());
|
659
|
-
avs.extend(std::iter::
|
659
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
660
660
|
avs.push(first_value);
|
661
661
|
|
662
662
|
if self.null_count() > 0 {
|
@@ -935,7 +935,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
935
935
|
first_value: AnyValue<'a>,
|
936
936
|
) -> RbResult<Series> {
|
937
937
|
let mut avs = Vec::with_capacity(self.len());
|
938
|
-
avs.extend(std::iter::
|
938
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
939
939
|
avs.push(first_value);
|
940
940
|
|
941
941
|
if self.null_count() > 0 {
|
@@ -1132,7 +1132,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1132
1132
|
first_value: AnyValue<'a>,
|
1133
1133
|
) -> RbResult<Series> {
|
1134
1134
|
let mut avs = Vec::with_capacity(self.len());
|
1135
|
-
avs.extend(std::iter::
|
1135
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
1136
1136
|
avs.push(first_value);
|
1137
1137
|
|
1138
1138
|
let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
|
@@ -1,5 +1,6 @@
|
|
1
1
|
use std::any::Any;
|
2
2
|
use std::sync::Arc;
|
3
|
+
use std::sync::OnceLock;
|
3
4
|
|
4
5
|
use magnus::IntoValue;
|
5
6
|
use polars::prelude::*;
|
@@ -11,8 +12,10 @@ use polars_core::prelude::AnyValue;
|
|
11
12
|
use crate::prelude::ObjectValue;
|
12
13
|
use crate::Wrap;
|
13
14
|
|
15
|
+
static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
|
16
|
+
|
14
17
|
pub(crate) fn register_startup_deps() {
|
15
|
-
|
18
|
+
POLARS_REGISTRY_INIT_LOCK.get_or_init(|| {
|
16
19
|
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
|
17
20
|
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
18
21
|
as Box<dyn AnonymousObjectBuilder>
|
@@ -24,9 +27,18 @@ pub(crate) fn register_startup_deps() {
|
|
24
27
|
};
|
25
28
|
Box::new(object) as Box<dyn Any>
|
26
29
|
});
|
30
|
+
let rbobject_converter = Arc::new(|av: AnyValue| {
|
31
|
+
let object = Wrap(av).into_value();
|
32
|
+
Box::new(object) as Box<dyn Any>
|
33
|
+
});
|
27
34
|
|
28
35
|
let object_size = std::mem::size_of::<ObjectValue>();
|
29
36
|
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
|
30
|
-
registry::register_object_builder(
|
31
|
-
|
37
|
+
registry::register_object_builder(
|
38
|
+
object_builder,
|
39
|
+
object_converter,
|
40
|
+
rbobject_converter,
|
41
|
+
physical_dtype,
|
42
|
+
)
|
43
|
+
});
|
32
44
|
}
|