polars-df 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +725 -453
- data/ext/polars/Cargo.toml +8 -8
- data/ext/polars/src/conversion/any_value.rs +1 -1
- data/ext/polars/src/conversion/mod.rs +38 -7
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -1
- data/ext/polars/src/expr/array.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +16 -9
- data/ext/polars/src/expr/general.rs +12 -14
- data/ext/polars/src/expr/list.rs +3 -3
- data/ext/polars/src/expr/rolling.rs +17 -2
- data/ext/polars/src/expr/string.rs +2 -2
- data/ext/polars/src/file.rs +56 -14
- data/ext/polars/src/functions/lazy.rs +26 -4
- data/ext/polars/src/functions/range.rs +4 -4
- data/ext/polars/src/lazyframe/general.rs +87 -48
- data/ext/polars/src/lazyframe/mod.rs +2 -0
- data/ext/polars/src/lazyframe/sink.rs +99 -0
- data/ext/polars/src/lib.rs +7 -9
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +4 -4
- data/ext/polars/src/on_startup.rs +15 -3
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/series/general.rs +2 -2
- data/lib/polars/array_expr.rb +4 -2
- data/lib/polars/expr.rb +28 -28
- data/lib/polars/functions/lit.rb +4 -9
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/lazy_frame.rb +78 -14
- data/lib/polars/list_expr.rb +10 -11
- data/lib/polars/series.rb +29 -12
- data/lib/polars/string_expr.rb +3 -3
- data/lib/polars/version.rb +1 -1
- metadata +4 -3
@@ -2,12 +2,13 @@ use magnus::{r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, TryConv
|
|
2
2
|
use polars::io::{HiveOptions, RowIndex};
|
3
3
|
use polars::lazy::frame::LazyFrame;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_plan::
|
5
|
+
use polars_plan::dsl::ScanSources;
|
6
6
|
use std::cell::RefCell;
|
7
7
|
use std::io::BufWriter;
|
8
8
|
use std::num::NonZeroUsize;
|
9
9
|
use std::path::PathBuf;
|
10
10
|
|
11
|
+
use super::SinkTarget;
|
11
12
|
use crate::conversion::*;
|
12
13
|
use crate::expr::rb_exprs_to_exprs;
|
13
14
|
use crate::file::get_file_like;
|
@@ -374,16 +375,16 @@ impl RbLazyFrame {
|
|
374
375
|
#[allow(clippy::too_many_arguments)]
|
375
376
|
pub fn sink_parquet(
|
376
377
|
&self,
|
377
|
-
|
378
|
+
target: SinkTarget,
|
378
379
|
compression: String,
|
379
380
|
compression_level: Option<i32>,
|
380
381
|
statistics: Wrap<StatisticsOptions>,
|
381
382
|
row_group_size: Option<usize>,
|
382
383
|
data_page_size: Option<usize>,
|
383
|
-
maintain_order: bool,
|
384
384
|
cloud_options: Option<Vec<(String, String)>>,
|
385
385
|
retries: usize,
|
386
|
-
|
386
|
+
sink_options: Wrap<SinkOptions>,
|
387
|
+
) -> RbResult<RbLazyFrame> {
|
387
388
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
388
389
|
|
389
390
|
let options = ParquetWriteOptions {
|
@@ -391,48 +392,69 @@ impl RbLazyFrame {
|
|
391
392
|
statistics: statistics.0,
|
392
393
|
row_group_size,
|
393
394
|
data_page_size,
|
394
|
-
|
395
|
+
key_value_metadata: None,
|
396
|
+
field_overwrites: Vec::new(),
|
395
397
|
};
|
396
398
|
|
397
|
-
let cloud_options = {
|
398
|
-
|
399
|
-
|
400
|
-
|
399
|
+
let cloud_options = match target.base_path() {
|
400
|
+
None => None,
|
401
|
+
Some(base_path) => {
|
402
|
+
let cloud_options = parse_cloud_options(
|
403
|
+
base_path.to_str().unwrap(),
|
404
|
+
cloud_options.unwrap_or_default(),
|
405
|
+
)?;
|
406
|
+
Some(cloud_options.with_max_retries(retries))
|
407
|
+
}
|
401
408
|
};
|
402
409
|
|
403
410
|
let ldf = self.ldf.borrow().clone();
|
404
|
-
|
405
|
-
|
406
|
-
|
411
|
+
match target {
|
412
|
+
SinkTarget::File(target) => {
|
413
|
+
ldf.sink_parquet(target, options, cloud_options, sink_options.0)
|
414
|
+
}
|
415
|
+
}
|
416
|
+
.map_err(RbPolarsErr::from)
|
417
|
+
.map(Into::into)
|
418
|
+
.map_err(Into::into)
|
407
419
|
}
|
408
420
|
|
409
421
|
pub fn sink_ipc(
|
410
422
|
&self,
|
411
|
-
|
423
|
+
target: SinkTarget,
|
412
424
|
compression: Option<Wrap<IpcCompression>>,
|
413
|
-
maintain_order: bool,
|
414
425
|
cloud_options: Option<Vec<(String, String)>>,
|
415
426
|
retries: usize,
|
416
|
-
|
427
|
+
sink_options: Wrap<SinkOptions>,
|
428
|
+
) -> RbResult<RbLazyFrame> {
|
417
429
|
let options = IpcWriterOptions {
|
418
430
|
compression: compression.map(|c| c.0),
|
419
|
-
|
431
|
+
..Default::default()
|
420
432
|
};
|
421
433
|
|
422
|
-
let cloud_options = {
|
423
|
-
|
424
|
-
|
425
|
-
|
434
|
+
let cloud_options = match target.base_path() {
|
435
|
+
None => None,
|
436
|
+
Some(base_path) => {
|
437
|
+
let cloud_options = parse_cloud_options(
|
438
|
+
base_path.to_str().unwrap(),
|
439
|
+
cloud_options.unwrap_or_default(),
|
440
|
+
)?;
|
441
|
+
Some(cloud_options.with_max_retries(retries))
|
442
|
+
}
|
426
443
|
};
|
427
444
|
|
428
445
|
let ldf = self.ldf.borrow().clone();
|
429
|
-
|
430
|
-
|
431
|
-
|
446
|
+
match target {
|
447
|
+
SinkTarget::File(target) => {
|
448
|
+
ldf.sink_ipc(target, options, cloud_options, sink_options.0)
|
449
|
+
}
|
450
|
+
}
|
451
|
+
.map_err(RbPolarsErr::from)
|
452
|
+
.map(Into::into)
|
453
|
+
.map_err(Into::into)
|
432
454
|
}
|
433
455
|
|
434
|
-
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<
|
435
|
-
let
|
456
|
+
pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
|
457
|
+
let target = SinkTarget::try_convert(arguments[0])?;
|
436
458
|
let include_bom = bool::try_convert(arguments[1])?;
|
437
459
|
let include_header = bool::try_convert(arguments[2])?;
|
438
460
|
let separator = u8::try_convert(arguments[3])?;
|
@@ -446,9 +468,9 @@ impl RbLazyFrame {
|
|
446
468
|
let float_precision = Option::<usize>::try_convert(arguments[11])?;
|
447
469
|
let null_value = Option::<String>::try_convert(arguments[12])?;
|
448
470
|
let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[13])?;
|
449
|
-
let
|
450
|
-
let
|
451
|
-
let
|
471
|
+
let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[14])?;
|
472
|
+
let retries = usize::try_convert(arguments[15])?;
|
473
|
+
let sink_options = Wrap::<SinkOptions>::try_convert(arguments[16])?;
|
452
474
|
|
453
475
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
454
476
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
@@ -469,42 +491,59 @@ impl RbLazyFrame {
|
|
469
491
|
let options = CsvWriterOptions {
|
470
492
|
include_bom,
|
471
493
|
include_header,
|
472
|
-
maintain_order,
|
473
494
|
batch_size: batch_size.0,
|
474
495
|
serialize_options,
|
475
496
|
};
|
476
497
|
|
477
|
-
let cloud_options = {
|
478
|
-
|
479
|
-
|
480
|
-
|
498
|
+
let cloud_options = match target.base_path() {
|
499
|
+
None => None,
|
500
|
+
Some(base_path) => {
|
501
|
+
let cloud_options = parse_cloud_options(
|
502
|
+
base_path.to_str().unwrap(),
|
503
|
+
cloud_options.unwrap_or_default(),
|
504
|
+
)?;
|
505
|
+
Some(cloud_options.with_max_retries(retries))
|
506
|
+
}
|
481
507
|
};
|
482
508
|
|
483
509
|
let ldf = self.ldf.borrow().clone();
|
484
|
-
|
485
|
-
|
486
|
-
|
510
|
+
match target {
|
511
|
+
SinkTarget::File(target) => {
|
512
|
+
ldf.sink_csv(target, options, cloud_options, sink_options.0)
|
513
|
+
}
|
514
|
+
}
|
515
|
+
.map_err(RbPolarsErr::from)
|
516
|
+
.map(Into::into)
|
517
|
+
.map_err(Into::into)
|
487
518
|
}
|
488
519
|
|
489
520
|
pub fn sink_json(
|
490
521
|
&self,
|
491
|
-
|
492
|
-
maintain_order: bool,
|
522
|
+
target: SinkTarget,
|
493
523
|
cloud_options: Option<Vec<(String, String)>>,
|
494
524
|
retries: usize,
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
Some(
|
525
|
+
sink_options: Wrap<SinkOptions>,
|
526
|
+
) -> RbResult<RbLazyFrame> {
|
527
|
+
let options = JsonWriterOptions {};
|
528
|
+
|
529
|
+
let cloud_options = match target.base_path() {
|
530
|
+
None => None,
|
531
|
+
Some(base_path) => {
|
532
|
+
let cloud_options = parse_cloud_options(
|
533
|
+
base_path.to_str().unwrap(),
|
534
|
+
cloud_options.unwrap_or_default(),
|
535
|
+
)?;
|
536
|
+
Some(cloud_options.with_max_retries(retries))
|
537
|
+
}
|
502
538
|
};
|
503
539
|
|
504
540
|
let ldf = self.ldf.borrow().clone();
|
505
|
-
|
506
|
-
.
|
507
|
-
|
541
|
+
match target {
|
542
|
+
SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
|
543
|
+
}
|
544
|
+
.map_err(RbPolarsErr::from)
|
545
|
+
.map(Into::into)
|
546
|
+
.map_err(Into::into)
|
508
547
|
}
|
509
548
|
|
510
549
|
pub fn fetch(&self, n_rows: usize) -> RbResult<RbDataFrame> {
|
@@ -0,0 +1,99 @@
|
|
1
|
+
use std::path::{Path, PathBuf};
|
2
|
+
use std::sync::{Arc, Mutex};
|
3
|
+
|
4
|
+
use magnus::{RHash, TryConvert, Value};
|
5
|
+
use polars::prelude::sync_on_close::SyncOnCloseType;
|
6
|
+
use polars::prelude::{SinkOptions, SpecialEq};
|
7
|
+
|
8
|
+
use crate::prelude::Wrap;
|
9
|
+
use crate::{RbResult, RbValueError};
|
10
|
+
|
11
|
+
#[derive(Clone)]
|
12
|
+
pub enum SinkTarget {
|
13
|
+
File(polars_plan::dsl::SinkTarget),
|
14
|
+
}
|
15
|
+
|
16
|
+
impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
17
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
+
if let Ok(v) = PathBuf::try_convert(ob) {
|
19
|
+
Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
|
20
|
+
} else {
|
21
|
+
let writer = {
|
22
|
+
let rb_f = ob;
|
23
|
+
RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
|
24
|
+
}?;
|
25
|
+
|
26
|
+
Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
|
27
|
+
Arc::new(Mutex::new(Some(writer))),
|
28
|
+
))))
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
impl TryConvert for SinkTarget {
|
34
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
35
|
+
Ok(Self::File(
|
36
|
+
<Wrap<polars_plan::dsl::SinkTarget>>::try_convert(ob)?.0,
|
37
|
+
))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
impl SinkTarget {
|
42
|
+
pub fn base_path(&self) -> Option<&Path> {
|
43
|
+
match self {
|
44
|
+
Self::File(t) => match t {
|
45
|
+
polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
|
46
|
+
polars::prelude::SinkTarget::Dyn(_) => None,
|
47
|
+
},
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
impl TryConvert for Wrap<SyncOnCloseType> {
|
53
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
54
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
55
|
+
"none" => SyncOnCloseType::None,
|
56
|
+
"data" => SyncOnCloseType::Data,
|
57
|
+
"all" => SyncOnCloseType::All,
|
58
|
+
v => {
|
59
|
+
return Err(RbValueError::new_err(format!(
|
60
|
+
"`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}",
|
61
|
+
)));
|
62
|
+
}
|
63
|
+
};
|
64
|
+
Ok(Wrap(parsed))
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
impl TryConvert for Wrap<SinkOptions> {
|
69
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
70
|
+
let parsed = RHash::try_convert(ob)?;
|
71
|
+
|
72
|
+
if parsed.len() != 3 {
|
73
|
+
return Err(RbValueError::new_err(
|
74
|
+
"`sink_options` must be a dictionary with the exactly 3 field.",
|
75
|
+
));
|
76
|
+
}
|
77
|
+
|
78
|
+
let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| {
|
79
|
+
RbValueError::new_err("`sink_options` must contain `sync_on_close` field")
|
80
|
+
})?;
|
81
|
+
let sync_on_close = Wrap::<SyncOnCloseType>::try_convert(sync_on_close)?.0;
|
82
|
+
|
83
|
+
let maintain_order = parsed.get("maintain_order").ok_or_else(|| {
|
84
|
+
RbValueError::new_err("`sink_options` must contain `maintain_order` field")
|
85
|
+
})?;
|
86
|
+
let maintain_order = bool::try_convert(maintain_order)?;
|
87
|
+
|
88
|
+
let mkdir = parsed
|
89
|
+
.get("mkdir")
|
90
|
+
.ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?;
|
91
|
+
let mkdir = bool::try_convert(mkdir)?;
|
92
|
+
|
93
|
+
Ok(Wrap(SinkOptions {
|
94
|
+
sync_on_close,
|
95
|
+
maintain_order,
|
96
|
+
mkdir,
|
97
|
+
}))
|
98
|
+
}
|
99
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -221,8 +221,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
221
221
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
222
222
|
class.define_method("get", method!(RbExpr::get, 1))?;
|
223
223
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
224
|
-
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
225
|
-
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
226
224
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
227
225
|
class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
|
228
226
|
class.define_method(
|
@@ -248,7 +246,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
248
246
|
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
249
247
|
class.define_method("append", method!(RbExpr::append, 2))?;
|
250
248
|
class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
|
251
|
-
class.define_method("round", method!(RbExpr::round,
|
249
|
+
class.define_method("round", method!(RbExpr::round, 2))?;
|
252
250
|
class.define_method("floor", method!(RbExpr::floor, 0))?;
|
253
251
|
class.define_method("ceil", method!(RbExpr::ceil, 0))?;
|
254
252
|
class.define_method("clip", method!(RbExpr::clip, 2))?;
|
@@ -271,7 +269,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
271
269
|
class.define_method("_and", method!(RbExpr::_and, 1))?;
|
272
270
|
class.define_method("_xor", method!(RbExpr::_xor, 1))?;
|
273
271
|
class.define_method("_or", method!(RbExpr::_or, 1))?;
|
274
|
-
class.define_method("is_in", method!(RbExpr::is_in,
|
272
|
+
class.define_method("is_in", method!(RbExpr::is_in, 2))?;
|
275
273
|
class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
|
276
274
|
class.define_method("pow", method!(RbExpr::pow, 1))?;
|
277
275
|
class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?;
|
@@ -322,7 +320,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
322
320
|
class.define_method("arr_arg_max", method!(RbExpr::arr_arg_max, 0))?;
|
323
321
|
class.define_method("arr_get", method!(RbExpr::arr_get, 2))?;
|
324
322
|
class.define_method("arr_join", method!(RbExpr::arr_join, 2))?;
|
325
|
-
class.define_method("arr_contains", method!(RbExpr::arr_contains,
|
323
|
+
class.define_method("arr_contains", method!(RbExpr::arr_contains, 2))?;
|
326
324
|
class.define_method("arr_count_matches", method!(RbExpr::arr_count_matches, 1))?;
|
327
325
|
class.define_method("binary_contains", method!(RbExpr::bin_contains, 1))?;
|
328
326
|
class.define_method("binary_ends_with", method!(RbExpr::bin_ends_with, 1))?;
|
@@ -367,7 +365,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
367
365
|
class.define_method("str_contains_any", method!(RbExpr::str_contains_any, 2))?;
|
368
366
|
class.define_method("str_replace_many", method!(RbExpr::str_replace_many, 3))?;
|
369
367
|
class.define_method("list_len", method!(RbExpr::list_len, 0))?;
|
370
|
-
class.define_method("list_contains", method!(RbExpr::list_contains,
|
368
|
+
class.define_method("list_contains", method!(RbExpr::list_contains, 2))?;
|
371
369
|
class.define_method("list_count_matches", method!(RbExpr::list_count_matches, 1))?;
|
372
370
|
class.define_method("dt_year", method!(RbExpr::dt_year, 0))?;
|
373
371
|
class.define_method("dt_is_leap_year", method!(RbExpr::dt_is_leap_year, 0))?;
|
@@ -450,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
450
448
|
"rolling_quantile_by",
|
451
449
|
method!(RbExpr::rolling_quantile_by, 6),
|
452
450
|
)?;
|
453
|
-
class.define_method("rolling_skew", method!(RbExpr::rolling_skew,
|
451
|
+
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 4))?;
|
454
452
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
455
453
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
456
454
|
class.define_method("list_max", method!(RbExpr::list_max, 0))?;
|
@@ -559,9 +557,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
559
557
|
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
560
558
|
class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
|
561
559
|
class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
|
562
|
-
class.define_singleton_method("fold", function!(functions::lazy::fold,
|
560
|
+
class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
|
563
561
|
class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
|
564
|
-
class.define_singleton_method("lit", function!(functions::lazy::lit,
|
562
|
+
class.define_singleton_method("lit", function!(functions::lazy::lit, 3))?;
|
565
563
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
566
564
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
567
565
|
class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -245,7 +245,7 @@ fn iterator_to_list(
|
|
245
245
|
match opt_val {
|
246
246
|
None => builder.append_null(),
|
247
247
|
Some(s) => {
|
248
|
-
if s.
|
248
|
+
if s.is_empty() && s.dtype() != dt {
|
249
249
|
builder
|
250
250
|
.append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
|
251
251
|
.unwrap()
|
@@ -372,7 +372,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
372
372
|
first_value: AnyValue<'a>,
|
373
373
|
) -> RbResult<Series> {
|
374
374
|
let mut avs = Vec::with_capacity(self.len());
|
375
|
-
avs.extend(std::iter::
|
375
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
376
376
|
avs.push(first_value);
|
377
377
|
|
378
378
|
if self.null_count() > 0 {
|
@@ -656,7 +656,7 @@ where
|
|
656
656
|
first_value: AnyValue<'a>,
|
657
657
|
) -> RbResult<Series> {
|
658
658
|
let mut avs = Vec::with_capacity(self.len());
|
659
|
-
avs.extend(std::iter::
|
659
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
660
660
|
avs.push(first_value);
|
661
661
|
|
662
662
|
if self.null_count() > 0 {
|
@@ -935,7 +935,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
935
935
|
first_value: AnyValue<'a>,
|
936
936
|
) -> RbResult<Series> {
|
937
937
|
let mut avs = Vec::with_capacity(self.len());
|
938
|
-
avs.extend(std::iter::
|
938
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
939
939
|
avs.push(first_value);
|
940
940
|
|
941
941
|
if self.null_count() > 0 {
|
@@ -1132,7 +1132,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1132
1132
|
first_value: AnyValue<'a>,
|
1133
1133
|
) -> RbResult<Series> {
|
1134
1134
|
let mut avs = Vec::with_capacity(self.len());
|
1135
|
-
avs.extend(std::iter::
|
1135
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
1136
1136
|
avs.push(first_value);
|
1137
1137
|
|
1138
1138
|
let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
|
@@ -1,5 +1,6 @@
|
|
1
1
|
use std::any::Any;
|
2
2
|
use std::sync::Arc;
|
3
|
+
use std::sync::OnceLock;
|
3
4
|
|
4
5
|
use magnus::IntoValue;
|
5
6
|
use polars::prelude::*;
|
@@ -11,8 +12,10 @@ use polars_core::prelude::AnyValue;
|
|
11
12
|
use crate::prelude::ObjectValue;
|
12
13
|
use crate::Wrap;
|
13
14
|
|
15
|
+
static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
|
16
|
+
|
14
17
|
pub(crate) fn register_startup_deps() {
|
15
|
-
|
18
|
+
POLARS_REGISTRY_INIT_LOCK.get_or_init(|| {
|
16
19
|
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
|
17
20
|
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
18
21
|
as Box<dyn AnonymousObjectBuilder>
|
@@ -24,9 +27,18 @@ pub(crate) fn register_startup_deps() {
|
|
24
27
|
};
|
25
28
|
Box::new(object) as Box<dyn Any>
|
26
29
|
});
|
30
|
+
let rbobject_converter = Arc::new(|av: AnyValue| {
|
31
|
+
let object = Wrap(av).into_value();
|
32
|
+
Box::new(object) as Box<dyn Any>
|
33
|
+
});
|
27
34
|
|
28
35
|
let object_size = std::mem::size_of::<ObjectValue>();
|
29
36
|
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
|
30
|
-
registry::register_object_builder(
|
31
|
-
|
37
|
+
registry::register_object_builder(
|
38
|
+
object_builder,
|
39
|
+
object_converter,
|
40
|
+
rbobject_converter,
|
41
|
+
physical_dtype,
|
42
|
+
)
|
43
|
+
});
|
32
44
|
}
|
@@ -27,7 +27,7 @@ impl RbSeries {
|
|
27
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
28
28
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
29
29
|
}
|
30
|
-
DataType::Object(_
|
30
|
+
DataType::Object(_) => {
|
31
31
|
let v = RArray::with_capacity(series.len());
|
32
32
|
for i in 0..series.len() {
|
33
33
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
@@ -105,10 +105,10 @@ impl RbSeries {
|
|
105
105
|
DataType::Null => {
|
106
106
|
let null: Option<u8> = None;
|
107
107
|
let n = series.len();
|
108
|
-
let iter = std::iter::
|
109
|
-
use std::iter::
|
108
|
+
let iter = std::iter::repeat_n(null, n);
|
109
|
+
use std::iter::RepeatN;
|
110
110
|
struct NullIter {
|
111
|
-
iter:
|
111
|
+
iter: RepeatN<Option<u8>>,
|
112
112
|
n: usize,
|
113
113
|
}
|
114
114
|
impl Iterator for NullIter {
|
@@ -317,7 +317,7 @@ impl RbSeries {
|
|
317
317
|
|
318
318
|
macro_rules! dispatch_apply {
|
319
319
|
($self:expr, $method:ident, $($args:expr),*) => {
|
320
|
-
if matches!($self.dtype(), DataType::Object(_
|
320
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
321
321
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
322
322
|
// ca.$method($($args),*)
|
323
323
|
todo!()
|
@@ -484,7 +484,7 @@ impl RbSeries {
|
|
484
484
|
|
485
485
|
ca.into_series()
|
486
486
|
}
|
487
|
-
Some(DataType::Object(_
|
487
|
+
Some(DataType::Object(_)) => {
|
488
488
|
let ca =
|
489
489
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
490
490
|
ca.into_series()
|
data/lib/polars/array_expr.rb
CHANGED
@@ -481,6 +481,8 @@ module Polars
|
|
481
481
|
#
|
482
482
|
# @param item [Object]
|
483
483
|
# Item that will be checked for membership
|
484
|
+
# @param nulls_equal [Boolean]
|
485
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
484
486
|
#
|
485
487
|
# @return [Expr]
|
486
488
|
#
|
@@ -501,9 +503,9 @@ module Polars
|
|
501
503
|
# # │ ["x", "y"] ┆ false │
|
502
504
|
# # │ ["a", "c"] ┆ true │
|
503
505
|
# # └───────────────┴──────────┘
|
504
|
-
def contains(item)
|
506
|
+
def contains(item, nulls_equal: true)
|
505
507
|
item = Utils.parse_into_expression(item, str_as_lit: true)
|
506
|
-
Utils.wrap_expr(_rbexpr.arr_contains(item))
|
508
|
+
Utils.wrap_expr(_rbexpr.arr_contains(item, nulls_equal))
|
507
509
|
end
|
508
510
|
|
509
511
|
# Count how often the value produced by `element` occurs.
|
data/lib/polars/expr.rb
CHANGED
@@ -1176,8 +1176,8 @@ module Polars
|
|
1176
1176
|
# # │ 1.0 │
|
1177
1177
|
# # │ 1.2 │
|
1178
1178
|
# # └─────┘
|
1179
|
-
def round(decimals = 0)
|
1180
|
-
_from_rbexpr(_rbexpr.round(decimals))
|
1179
|
+
def round(decimals = 0, mode: "half_to_even")
|
1180
|
+
_from_rbexpr(_rbexpr.round(decimals, mode))
|
1181
1181
|
end
|
1182
1182
|
|
1183
1183
|
# Compute the dot/inner product between two Expressions.
|
@@ -1867,7 +1867,7 @@ module Polars
|
|
1867
1867
|
# # │ 2 ┆ 6 │
|
1868
1868
|
# # └─────┴─────┘
|
1869
1869
|
def forward_fill(limit: nil)
|
1870
|
-
|
1870
|
+
fill_null(strategy: "forward", limit: limit)
|
1871
1871
|
end
|
1872
1872
|
|
1873
1873
|
# Fill missing values with the next to be seen values.
|
@@ -1897,7 +1897,7 @@ module Polars
|
|
1897
1897
|
# # │ null ┆ 6 │
|
1898
1898
|
# # └──────┴─────┘
|
1899
1899
|
def backward_fill(limit: nil)
|
1900
|
-
|
1900
|
+
fill_null(strategy: "backward", limit: limit)
|
1901
1901
|
end
|
1902
1902
|
|
1903
1903
|
# Reverse the selection.
|
@@ -3712,6 +3712,8 @@ module Polars
|
|
3712
3712
|
#
|
3713
3713
|
# @param other [Object]
|
3714
3714
|
# Series or sequence of primitive type.
|
3715
|
+
# @param nulls_equal [Boolean]
|
3716
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
3715
3717
|
#
|
3716
3718
|
# @return [Expr]
|
3717
3719
|
#
|
@@ -3719,29 +3721,21 @@ module Polars
|
|
3719
3721
|
# df = Polars::DataFrame.new(
|
3720
3722
|
# {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
|
3721
3723
|
# )
|
3722
|
-
# df.
|
3724
|
+
# df.with_columns(contains: Polars.col("optional_members").is_in("sets"))
|
3723
3725
|
# # =>
|
3724
|
-
# # shape: (3,
|
3725
|
-
# #
|
3726
|
-
# # │ contains │
|
3727
|
-
# # │ --- │
|
3728
|
-
# # │ bool │
|
3729
|
-
# #
|
3730
|
-
# # │ true │
|
3731
|
-
# # │ true │
|
3732
|
-
# # │ false │
|
3733
|
-
# #
|
3734
|
-
def is_in(other)
|
3735
|
-
|
3736
|
-
|
3737
|
-
other = Polars.lit(nil)._rbexpr
|
3738
|
-
else
|
3739
|
-
other = Polars.lit(Series.new(other))._rbexpr
|
3740
|
-
end
|
3741
|
-
else
|
3742
|
-
other = Utils.parse_into_expression(other, str_as_lit: false)
|
3743
|
-
end
|
3744
|
-
_from_rbexpr(_rbexpr.is_in(other))
|
3726
|
+
# # shape: (3, 3)
|
3727
|
+
# # ┌───────────┬──────────────────┬──────────┐
|
3728
|
+
# # │ sets ┆ optional_members ┆ contains │
|
3729
|
+
# # │ --- ┆ --- ┆ --- │
|
3730
|
+
# # │ list[i64] ┆ i64 ┆ bool │
|
3731
|
+
# # ╞═══════════╪══════════════════╪══════════╡
|
3732
|
+
# # │ [1, 2, 3] ┆ 1 ┆ true │
|
3733
|
+
# # │ [1, 2] ┆ 2 ┆ true │
|
3734
|
+
# # │ [9, 10] ┆ 3 ┆ false │
|
3735
|
+
# # └───────────┴──────────────────┴──────────┘
|
3736
|
+
def is_in(other, nulls_equal: false)
|
3737
|
+
other = Utils.parse_into_expression(other)
|
3738
|
+
_from_rbexpr(_rbexpr.is_in(other, nulls_equal))
|
3745
3739
|
end
|
3746
3740
|
alias_method :in?, :is_in
|
3747
3741
|
|
@@ -5715,6 +5709,11 @@ module Polars
|
|
5715
5709
|
# Integer size of the rolling window.
|
5716
5710
|
# @param bias [Boolean]
|
5717
5711
|
# If false, the calculations are corrected for statistical bias.
|
5712
|
+
# @param min_samples [Integer]
|
5713
|
+
# The number of values in the window that should be non-null before computing
|
5714
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
5715
|
+
# @param center [Boolean]
|
5716
|
+
# Set the labels at the center of the window.
|
5718
5717
|
#
|
5719
5718
|
# @return [Expr]
|
5720
5719
|
#
|
@@ -5733,8 +5732,8 @@ module Polars
|
|
5733
5732
|
# # │ 0.381802 │
|
5734
5733
|
# # │ 0.47033 │
|
5735
5734
|
# # └──────────┘
|
5736
|
-
def rolling_skew(window_size, bias: true)
|
5737
|
-
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5735
|
+
def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
|
5736
|
+
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias, min_samples, center))
|
5738
5737
|
end
|
5739
5738
|
|
5740
5739
|
# Compute absolute values.
|
@@ -5889,6 +5888,7 @@ module Polars
|
|
5889
5888
|
# # │ 20 │
|
5890
5889
|
# # └──────┘
|
5891
5890
|
def diff(n: 1, null_behavior: "ignore")
|
5891
|
+
n = Utils.parse_into_expression(n)
|
5892
5892
|
_from_rbexpr(_rbexpr.diff(n, null_behavior))
|
5893
5893
|
end
|
5894
5894
|
|