polars-df 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +667 -370
- data/ext/polars/Cargo.toml +8 -8
- data/ext/polars/src/conversion/mod.rs +20 -5
- data/ext/polars/src/dataframe/export.rs +2 -2
- data/ext/polars/src/dataframe/general.rs +4 -1
- data/ext/polars/src/expr/general.rs +12 -14
- data/ext/polars/src/expr/rolling.rs +17 -2
- data/ext/polars/src/file.rs +56 -14
- data/ext/polars/src/functions/lazy.rs +15 -2
- data/ext/polars/src/lazyframe/general.rs +85 -48
- data/ext/polars/src/lazyframe/mod.rs +2 -0
- data/ext/polars/src/lazyframe/sink.rs +99 -0
- data/ext/polars/src/lib.rs +4 -6
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/map/series.rs +4 -4
- data/ext/polars/src/on_startup.rs +15 -3
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/series/general.rs +2 -2
- data/lib/polars/expr.rb +27 -19
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/lazy_frame.rb +78 -14
- data/lib/polars/list_expr.rb +4 -7
- data/lib/polars/series.rb +11 -9
- data/lib/polars/version.rb +1 -1
- metadata +4 -3
@@ -0,0 +1,99 @@
|
|
1
|
+
use std::path::{Path, PathBuf};
|
2
|
+
use std::sync::{Arc, Mutex};
|
3
|
+
|
4
|
+
use magnus::{RHash, TryConvert, Value};
|
5
|
+
use polars::prelude::sync_on_close::SyncOnCloseType;
|
6
|
+
use polars::prelude::{SinkOptions, SpecialEq};
|
7
|
+
|
8
|
+
use crate::prelude::Wrap;
|
9
|
+
use crate::{RbResult, RbValueError};
|
10
|
+
|
11
|
+
#[derive(Clone)]
|
12
|
+
pub enum SinkTarget {
|
13
|
+
File(polars_plan::dsl::SinkTarget),
|
14
|
+
}
|
15
|
+
|
16
|
+
impl TryConvert for Wrap<polars_plan::dsl::SinkTarget> {
|
17
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
18
|
+
if let Ok(v) = PathBuf::try_convert(ob) {
|
19
|
+
Ok(Wrap(polars::prelude::SinkTarget::Path(Arc::new(v))))
|
20
|
+
} else {
|
21
|
+
let writer = {
|
22
|
+
let rb_f = ob;
|
23
|
+
RbResult::Ok(crate::file::try_get_rbfile(rb_f, true)?.0.into_writeable())
|
24
|
+
}?;
|
25
|
+
|
26
|
+
Ok(Wrap(polars_plan::prelude::SinkTarget::Dyn(SpecialEq::new(
|
27
|
+
Arc::new(Mutex::new(Some(writer))),
|
28
|
+
))))
|
29
|
+
}
|
30
|
+
}
|
31
|
+
}
|
32
|
+
|
33
|
+
impl TryConvert for SinkTarget {
|
34
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
35
|
+
Ok(Self::File(
|
36
|
+
<Wrap<polars_plan::dsl::SinkTarget>>::try_convert(ob)?.0,
|
37
|
+
))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
impl SinkTarget {
|
42
|
+
pub fn base_path(&self) -> Option<&Path> {
|
43
|
+
match self {
|
44
|
+
Self::File(t) => match t {
|
45
|
+
polars::prelude::SinkTarget::Path(p) => Some(p.as_path()),
|
46
|
+
polars::prelude::SinkTarget::Dyn(_) => None,
|
47
|
+
},
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
impl TryConvert for Wrap<SyncOnCloseType> {
|
53
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
54
|
+
let parsed = match String::try_convert(ob)?.as_str() {
|
55
|
+
"none" => SyncOnCloseType::None,
|
56
|
+
"data" => SyncOnCloseType::Data,
|
57
|
+
"all" => SyncOnCloseType::All,
|
58
|
+
v => {
|
59
|
+
return Err(RbValueError::new_err(format!(
|
60
|
+
"`sync_on_close` must be one of {{'none', 'data', 'all'}}, got {v}",
|
61
|
+
)));
|
62
|
+
}
|
63
|
+
};
|
64
|
+
Ok(Wrap(parsed))
|
65
|
+
}
|
66
|
+
}
|
67
|
+
|
68
|
+
impl TryConvert for Wrap<SinkOptions> {
|
69
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
70
|
+
let parsed = RHash::try_convert(ob)?;
|
71
|
+
|
72
|
+
if parsed.len() != 3 {
|
73
|
+
return Err(RbValueError::new_err(
|
74
|
+
"`sink_options` must be a dictionary with the exactly 3 field.",
|
75
|
+
));
|
76
|
+
}
|
77
|
+
|
78
|
+
let sync_on_close = parsed.get("sync_on_close").ok_or_else(|| {
|
79
|
+
RbValueError::new_err("`sink_options` must contain `sync_on_close` field")
|
80
|
+
})?;
|
81
|
+
let sync_on_close = Wrap::<SyncOnCloseType>::try_convert(sync_on_close)?.0;
|
82
|
+
|
83
|
+
let maintain_order = parsed.get("maintain_order").ok_or_else(|| {
|
84
|
+
RbValueError::new_err("`sink_options` must contain `maintain_order` field")
|
85
|
+
})?;
|
86
|
+
let maintain_order = bool::try_convert(maintain_order)?;
|
87
|
+
|
88
|
+
let mkdir = parsed
|
89
|
+
.get("mkdir")
|
90
|
+
.ok_or_else(|| RbValueError::new_err("`sink_options` must contain `mkdir` field"))?;
|
91
|
+
let mkdir = bool::try_convert(mkdir)?;
|
92
|
+
|
93
|
+
Ok(Wrap(SinkOptions {
|
94
|
+
sync_on_close,
|
95
|
+
maintain_order,
|
96
|
+
mkdir,
|
97
|
+
}))
|
98
|
+
}
|
99
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -221,8 +221,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
221
221
|
class.define_method("gather", method!(RbExpr::gather, 1))?;
|
222
222
|
class.define_method("get", method!(RbExpr::get, 1))?;
|
223
223
|
class.define_method("sort_by", method!(RbExpr::sort_by, 5))?;
|
224
|
-
class.define_method("backward_fill", method!(RbExpr::backward_fill, 1))?;
|
225
|
-
class.define_method("forward_fill", method!(RbExpr::forward_fill, 1))?;
|
226
224
|
class.define_method("shift", method!(RbExpr::shift, 2))?;
|
227
225
|
class.define_method("fill_null", method!(RbExpr::fill_null, 1))?;
|
228
226
|
class.define_method(
|
@@ -248,7 +246,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
248
246
|
class.define_method("slice", method!(RbExpr::slice, 2))?;
|
249
247
|
class.define_method("append", method!(RbExpr::append, 2))?;
|
250
248
|
class.define_method("rechunk", method!(RbExpr::rechunk, 0))?;
|
251
|
-
class.define_method("round", method!(RbExpr::round,
|
249
|
+
class.define_method("round", method!(RbExpr::round, 2))?;
|
252
250
|
class.define_method("floor", method!(RbExpr::floor, 0))?;
|
253
251
|
class.define_method("ceil", method!(RbExpr::ceil, 0))?;
|
254
252
|
class.define_method("clip", method!(RbExpr::clip, 2))?;
|
@@ -271,7 +269,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
271
269
|
class.define_method("_and", method!(RbExpr::_and, 1))?;
|
272
270
|
class.define_method("_xor", method!(RbExpr::_xor, 1))?;
|
273
271
|
class.define_method("_or", method!(RbExpr::_or, 1))?;
|
274
|
-
class.define_method("is_in", method!(RbExpr::is_in,
|
272
|
+
class.define_method("is_in", method!(RbExpr::is_in, 2))?;
|
275
273
|
class.define_method("repeat_by", method!(RbExpr::repeat_by, 1))?;
|
276
274
|
class.define_method("pow", method!(RbExpr::pow, 1))?;
|
277
275
|
class.define_method("cum_sum", method!(RbExpr::cum_sum, 1))?;
|
@@ -450,7 +448,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
450
448
|
"rolling_quantile_by",
|
451
449
|
method!(RbExpr::rolling_quantile_by, 6),
|
452
450
|
)?;
|
453
|
-
class.define_method("rolling_skew", method!(RbExpr::rolling_skew,
|
451
|
+
class.define_method("rolling_skew", method!(RbExpr::rolling_skew, 4))?;
|
454
452
|
class.define_method("lower_bound", method!(RbExpr::lower_bound, 0))?;
|
455
453
|
class.define_method("upper_bound", method!(RbExpr::upper_bound, 0))?;
|
456
454
|
class.define_method("list_max", method!(RbExpr::list_max, 0))?;
|
@@ -559,7 +557,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
559
557
|
class.define_singleton_method("first", function!(functions::lazy::first, 0))?;
|
560
558
|
class.define_singleton_method("last", function!(functions::lazy::last, 0))?;
|
561
559
|
class.define_singleton_method("cols", function!(functions::lazy::cols, 1))?;
|
562
|
-
class.define_singleton_method("fold", function!(functions::lazy::fold,
|
560
|
+
class.define_singleton_method("fold", function!(functions::lazy::fold, 5))?;
|
563
561
|
class.define_singleton_method("cum_fold", function!(functions::lazy::cum_fold, 4))?;
|
564
562
|
class.define_singleton_method("lit", function!(functions::lazy::lit, 2))?;
|
565
563
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -245,7 +245,7 @@ fn iterator_to_list(
|
|
245
245
|
match opt_val {
|
246
246
|
None => builder.append_null(),
|
247
247
|
Some(s) => {
|
248
|
-
if s.
|
248
|
+
if s.is_empty() && s.dtype() != dt {
|
249
249
|
builder
|
250
250
|
.append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
|
251
251
|
.unwrap()
|
@@ -372,7 +372,7 @@ impl<'a> ApplyLambda<'a> for BooleanChunked {
|
|
372
372
|
first_value: AnyValue<'a>,
|
373
373
|
) -> RbResult<Series> {
|
374
374
|
let mut avs = Vec::with_capacity(self.len());
|
375
|
-
avs.extend(std::iter::
|
375
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
376
376
|
avs.push(first_value);
|
377
377
|
|
378
378
|
if self.null_count() > 0 {
|
@@ -656,7 +656,7 @@ where
|
|
656
656
|
first_value: AnyValue<'a>,
|
657
657
|
) -> RbResult<Series> {
|
658
658
|
let mut avs = Vec::with_capacity(self.len());
|
659
|
-
avs.extend(std::iter::
|
659
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
660
660
|
avs.push(first_value);
|
661
661
|
|
662
662
|
if self.null_count() > 0 {
|
@@ -935,7 +935,7 @@ impl<'a> ApplyLambda<'a> for StringChunked {
|
|
935
935
|
first_value: AnyValue<'a>,
|
936
936
|
) -> RbResult<Series> {
|
937
937
|
let mut avs = Vec::with_capacity(self.len());
|
938
|
-
avs.extend(std::iter::
|
938
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
939
939
|
avs.push(first_value);
|
940
940
|
|
941
941
|
if self.null_count() > 0 {
|
@@ -1132,7 +1132,7 @@ impl<'a> ApplyLambda<'a> for StructChunked {
|
|
1132
1132
|
first_value: AnyValue<'a>,
|
1133
1133
|
) -> RbResult<Series> {
|
1134
1134
|
let mut avs = Vec::with_capacity(self.len());
|
1135
|
-
avs.extend(std::iter::
|
1135
|
+
avs.extend(std::iter::repeat_n(AnyValue::Null, init_null_count));
|
1136
1136
|
avs.push(first_value);
|
1137
1137
|
|
1138
1138
|
let iter = iter_struct(self).skip(init_null_count + 1).map(|val| {
|
@@ -1,5 +1,6 @@
|
|
1
1
|
use std::any::Any;
|
2
2
|
use std::sync::Arc;
|
3
|
+
use std::sync::OnceLock;
|
3
4
|
|
4
5
|
use magnus::IntoValue;
|
5
6
|
use polars::prelude::*;
|
@@ -11,8 +12,10 @@ use polars_core::prelude::AnyValue;
|
|
11
12
|
use crate::prelude::ObjectValue;
|
12
13
|
use crate::Wrap;
|
13
14
|
|
15
|
+
static POLARS_REGISTRY_INIT_LOCK: OnceLock<()> = OnceLock::new();
|
16
|
+
|
14
17
|
pub(crate) fn register_startup_deps() {
|
15
|
-
|
18
|
+
POLARS_REGISTRY_INIT_LOCK.get_or_init(|| {
|
16
19
|
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
|
17
20
|
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
18
21
|
as Box<dyn AnonymousObjectBuilder>
|
@@ -24,9 +27,18 @@ pub(crate) fn register_startup_deps() {
|
|
24
27
|
};
|
25
28
|
Box::new(object) as Box<dyn Any>
|
26
29
|
});
|
30
|
+
let rbobject_converter = Arc::new(|av: AnyValue| {
|
31
|
+
let object = Wrap(av).into_value();
|
32
|
+
Box::new(object) as Box<dyn Any>
|
33
|
+
});
|
27
34
|
|
28
35
|
let object_size = std::mem::size_of::<ObjectValue>();
|
29
36
|
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);
|
30
|
-
registry::register_object_builder(
|
31
|
-
|
37
|
+
registry::register_object_builder(
|
38
|
+
object_builder,
|
39
|
+
object_converter,
|
40
|
+
rbobject_converter,
|
41
|
+
physical_dtype,
|
42
|
+
)
|
43
|
+
});
|
32
44
|
}
|
@@ -27,7 +27,7 @@ impl RbSeries {
|
|
27
27
|
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
28
28
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
29
29
|
}
|
30
|
-
DataType::Object(_
|
30
|
+
DataType::Object(_) => {
|
31
31
|
let v = RArray::with_capacity(series.len());
|
32
32
|
for i in 0..series.len() {
|
33
33
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
@@ -105,10 +105,10 @@ impl RbSeries {
|
|
105
105
|
DataType::Null => {
|
106
106
|
let null: Option<u8> = None;
|
107
107
|
let n = series.len();
|
108
|
-
let iter = std::iter::
|
109
|
-
use std::iter::
|
108
|
+
let iter = std::iter::repeat_n(null, n);
|
109
|
+
use std::iter::RepeatN;
|
110
110
|
struct NullIter {
|
111
|
-
iter:
|
111
|
+
iter: RepeatN<Option<u8>>,
|
112
112
|
n: usize,
|
113
113
|
}
|
114
114
|
impl Iterator for NullIter {
|
@@ -317,7 +317,7 @@ impl RbSeries {
|
|
317
317
|
|
318
318
|
macro_rules! dispatch_apply {
|
319
319
|
($self:expr, $method:ident, $($args:expr),*) => {
|
320
|
-
if matches!($self.dtype(), DataType::Object(_
|
320
|
+
if matches!($self.dtype(), DataType::Object(_)) {
|
321
321
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
322
322
|
// ca.$method($($args),*)
|
323
323
|
todo!()
|
@@ -484,7 +484,7 @@ impl RbSeries {
|
|
484
484
|
|
485
485
|
ca.into_series()
|
486
486
|
}
|
487
|
-
Some(DataType::Object(_
|
487
|
+
Some(DataType::Object(_)) => {
|
488
488
|
let ca =
|
489
489
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
490
490
|
ca.into_series()
|
data/lib/polars/expr.rb
CHANGED
@@ -1176,8 +1176,8 @@ module Polars
|
|
1176
1176
|
# # │ 1.0 │
|
1177
1177
|
# # │ 1.2 │
|
1178
1178
|
# # └─────┘
|
1179
|
-
def round(decimals = 0)
|
1180
|
-
_from_rbexpr(_rbexpr.round(decimals))
|
1179
|
+
def round(decimals = 0, mode: "half_to_even")
|
1180
|
+
_from_rbexpr(_rbexpr.round(decimals, mode))
|
1181
1181
|
end
|
1182
1182
|
|
1183
1183
|
# Compute the dot/inner product between two Expressions.
|
@@ -1867,7 +1867,7 @@ module Polars
|
|
1867
1867
|
# # │ 2 ┆ 6 │
|
1868
1868
|
# # └─────┴─────┘
|
1869
1869
|
def forward_fill(limit: nil)
|
1870
|
-
|
1870
|
+
fill_null(strategy: "forward", limit: limit)
|
1871
1871
|
end
|
1872
1872
|
|
1873
1873
|
# Fill missing values with the next to be seen values.
|
@@ -1897,7 +1897,7 @@ module Polars
|
|
1897
1897
|
# # │ null ┆ 6 │
|
1898
1898
|
# # └──────┴─────┘
|
1899
1899
|
def backward_fill(limit: nil)
|
1900
|
-
|
1900
|
+
fill_null(strategy: "backward", limit: limit)
|
1901
1901
|
end
|
1902
1902
|
|
1903
1903
|
# Reverse the selection.
|
@@ -3712,6 +3712,8 @@ module Polars
|
|
3712
3712
|
#
|
3713
3713
|
# @param other [Object]
|
3714
3714
|
# Series or sequence of primitive type.
|
3715
|
+
# @param nulls_equal [Boolean]
|
3716
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
3715
3717
|
#
|
3716
3718
|
# @return [Expr]
|
3717
3719
|
#
|
@@ -3719,19 +3721,19 @@ module Polars
|
|
3719
3721
|
# df = Polars::DataFrame.new(
|
3720
3722
|
# {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
|
3721
3723
|
# )
|
3722
|
-
# df.
|
3724
|
+
# df.with_columns(contains: Polars.col("optional_members").is_in("sets"))
|
3723
3725
|
# # =>
|
3724
|
-
# # shape: (3,
|
3725
|
-
# #
|
3726
|
-
# # │ contains │
|
3727
|
-
# # │ --- │
|
3728
|
-
# # │ bool │
|
3729
|
-
# #
|
3730
|
-
# # │ true │
|
3731
|
-
# # │ true │
|
3732
|
-
# # │ false │
|
3733
|
-
# #
|
3734
|
-
def is_in(other)
|
3726
|
+
# # shape: (3, 3)
|
3727
|
+
# # ┌───────────┬──────────────────┬──────────┐
|
3728
|
+
# # │ sets ┆ optional_members ┆ contains │
|
3729
|
+
# # │ --- ┆ --- ┆ --- │
|
3730
|
+
# # │ list[i64] ┆ i64 ┆ bool │
|
3731
|
+
# # ╞═══════════╪══════════════════╪══════════╡
|
3732
|
+
# # │ [1, 2, 3] ┆ 1 ┆ true │
|
3733
|
+
# # │ [1, 2] ┆ 2 ┆ true │
|
3734
|
+
# # │ [9, 10] ┆ 3 ┆ false │
|
3735
|
+
# # └───────────┴──────────────────┴──────────┘
|
3736
|
+
def is_in(other, nulls_equal: false)
|
3735
3737
|
if other.is_a?(::Array)
|
3736
3738
|
if other.length == 0
|
3737
3739
|
other = Polars.lit(nil)._rbexpr
|
@@ -3741,7 +3743,7 @@ module Polars
|
|
3741
3743
|
else
|
3742
3744
|
other = Utils.parse_into_expression(other, str_as_lit: false)
|
3743
3745
|
end
|
3744
|
-
_from_rbexpr(_rbexpr.is_in(other))
|
3746
|
+
_from_rbexpr(_rbexpr.is_in(other, nulls_equal))
|
3745
3747
|
end
|
3746
3748
|
alias_method :in?, :is_in
|
3747
3749
|
|
@@ -5715,6 +5717,11 @@ module Polars
|
|
5715
5717
|
# Integer size of the rolling window.
|
5716
5718
|
# @param bias [Boolean]
|
5717
5719
|
# If false, the calculations are corrected for statistical bias.
|
5720
|
+
# @param min_samples [Integer]
|
5721
|
+
# The number of values in the window that should be non-null before computing
|
5722
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
5723
|
+
# @param center [Boolean]
|
5724
|
+
# Set the labels at the center of the window.
|
5718
5725
|
#
|
5719
5726
|
# @return [Expr]
|
5720
5727
|
#
|
@@ -5733,8 +5740,8 @@ module Polars
|
|
5733
5740
|
# # │ 0.381802 │
|
5734
5741
|
# # │ 0.47033 │
|
5735
5742
|
# # └──────────┘
|
5736
|
-
def rolling_skew(window_size, bias: true)
|
5737
|
-
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias))
|
5743
|
+
def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
|
5744
|
+
_from_rbexpr(_rbexpr.rolling_skew(window_size, bias, min_samples, center))
|
5738
5745
|
end
|
5739
5746
|
|
5740
5747
|
# Compute absolute values.
|
@@ -5889,6 +5896,7 @@ module Polars
|
|
5889
5896
|
# # │ 20 │
|
5890
5897
|
# # └──────┘
|
5891
5898
|
def diff(n: 1, null_behavior: "ignore")
|
5899
|
+
n = Utils.parse_into_expression(n)
|
5892
5900
|
_from_rbexpr(_rbexpr.diff(n, null_behavior))
|
5893
5901
|
end
|
5894
5902
|
|
data/lib/polars/io/database.rb
CHANGED
@@ -51,7 +51,7 @@ module Polars
|
|
51
51
|
when :decimal
|
52
52
|
Decimal
|
53
53
|
when :float
|
54
|
-
# TODO uncomment in
|
54
|
+
# TODO uncomment in future release
|
55
55
|
# if column_type.limit && column_type.limit <= 24
|
56
56
|
# Float32
|
57
57
|
# else
|
@@ -59,7 +59,7 @@ module Polars
|
|
59
59
|
# end
|
60
60
|
Float64
|
61
61
|
when :integer
|
62
|
-
# TODO uncomment in
|
62
|
+
# TODO uncomment in future release
|
63
63
|
# case column_type.limit
|
64
64
|
# when 1
|
65
65
|
# Int8
|
data/lib/polars/lazy_frame.rb
CHANGED
@@ -433,7 +433,10 @@ module Polars
|
|
433
433
|
no_optimization: false,
|
434
434
|
slice_pushdown: true,
|
435
435
|
storage_options: nil,
|
436
|
-
retries: 2
|
436
|
+
retries: 2,
|
437
|
+
sync_on_close: nil,
|
438
|
+
mkdir: false,
|
439
|
+
lazy: false
|
437
440
|
)
|
438
441
|
lf = _set_sink_optimizations(
|
439
442
|
type_coercion: type_coercion,
|
@@ -468,17 +471,30 @@ module Polars
|
|
468
471
|
storage_options = nil
|
469
472
|
end
|
470
473
|
|
471
|
-
|
474
|
+
sink_options = {
|
475
|
+
"sync_on_close" => sync_on_close || "none",
|
476
|
+
"maintain_order" => maintain_order,
|
477
|
+
"mkdir" => mkdir
|
478
|
+
}
|
479
|
+
|
480
|
+
lf = lf.sink_parquet(
|
472
481
|
path,
|
473
482
|
compression,
|
474
483
|
compression_level,
|
475
484
|
statistics,
|
476
485
|
row_group_size,
|
477
486
|
data_pagesize_limit,
|
478
|
-
maintain_order,
|
479
487
|
storage_options,
|
480
|
-
retries
|
488
|
+
retries,
|
489
|
+
sink_options
|
481
490
|
)
|
491
|
+
lf = LazyFrame._from_rbldf(lf)
|
492
|
+
|
493
|
+
if !lazy
|
494
|
+
lf.collect
|
495
|
+
return nil
|
496
|
+
end
|
497
|
+
lf
|
482
498
|
end
|
483
499
|
|
484
500
|
# Evaluate the query in streaming mode and write to an IPC file.
|
@@ -520,7 +536,10 @@ module Polars
|
|
520
536
|
projection_pushdown: true,
|
521
537
|
simplify_expression: true,
|
522
538
|
slice_pushdown: true,
|
523
|
-
no_optimization: false
|
539
|
+
no_optimization: false,
|
540
|
+
sync_on_close: nil,
|
541
|
+
mkdir: false,
|
542
|
+
lazy: false
|
524
543
|
)
|
525
544
|
# TODO support storage options in Rust
|
526
545
|
storage_options = nil
|
@@ -541,13 +560,26 @@ module Polars
|
|
541
560
|
storage_options = nil
|
542
561
|
end
|
543
562
|
|
544
|
-
|
563
|
+
sink_options = {
|
564
|
+
"sync_on_close" => sync_on_close || "none",
|
565
|
+
"maintain_order" => maintain_order,
|
566
|
+
"mkdir" => mkdir
|
567
|
+
}
|
568
|
+
|
569
|
+
lf = lf.sink_ipc(
|
545
570
|
path,
|
546
571
|
compression,
|
547
|
-
maintain_order,
|
548
572
|
storage_options,
|
549
|
-
retries
|
573
|
+
retries,
|
574
|
+
sink_options
|
550
575
|
)
|
576
|
+
lf = LazyFrame._from_rbldf(lf)
|
577
|
+
|
578
|
+
if !lazy
|
579
|
+
lf.collect
|
580
|
+
return nil
|
581
|
+
end
|
582
|
+
lf
|
551
583
|
end
|
552
584
|
|
553
585
|
# Evaluate the query in streaming mode and write to a CSV file.
|
@@ -652,7 +684,10 @@ module Polars
|
|
652
684
|
slice_pushdown: true,
|
653
685
|
no_optimization: false,
|
654
686
|
storage_options: nil,
|
655
|
-
retries: 2
|
687
|
+
retries: 2,
|
688
|
+
sync_on_close: nil,
|
689
|
+
mkdir: false,
|
690
|
+
lazy: false
|
656
691
|
)
|
657
692
|
Utils._check_arg_is_1byte("separator", separator, false)
|
658
693
|
Utils._check_arg_is_1byte("quote_char", quote_char, false)
|
@@ -672,7 +707,13 @@ module Polars
|
|
672
707
|
storage_options = nil
|
673
708
|
end
|
674
709
|
|
675
|
-
|
710
|
+
sink_options = {
|
711
|
+
"sync_on_close" => sync_on_close || "none",
|
712
|
+
"maintain_order" => maintain_order,
|
713
|
+
"mkdir" => mkdir
|
714
|
+
}
|
715
|
+
|
716
|
+
lf = lf.sink_csv(
|
676
717
|
path,
|
677
718
|
include_bom,
|
678
719
|
include_header,
|
@@ -687,10 +728,17 @@ module Polars
|
|
687
728
|
float_precision,
|
688
729
|
null_value,
|
689
730
|
quote_style,
|
690
|
-
maintain_order,
|
691
731
|
storage_options,
|
692
|
-
retries
|
732
|
+
retries,
|
733
|
+
sink_options
|
693
734
|
)
|
735
|
+
lf = LazyFrame._from_rbldf(lf)
|
736
|
+
|
737
|
+
if !lazy
|
738
|
+
lf.collect
|
739
|
+
return nil
|
740
|
+
end
|
741
|
+
lf
|
694
742
|
end
|
695
743
|
|
696
744
|
# Evaluate the query in streaming mode and write to an NDJSON file.
|
@@ -730,7 +778,10 @@ module Polars
|
|
730
778
|
slice_pushdown: true,
|
731
779
|
no_optimization: false,
|
732
780
|
storage_options: nil,
|
733
|
-
retries: 2
|
781
|
+
retries: 2,
|
782
|
+
sync_on_close: nil,
|
783
|
+
mkdir: false,
|
784
|
+
lazy: false
|
734
785
|
)
|
735
786
|
lf = _set_sink_optimizations(
|
736
787
|
type_coercion: type_coercion,
|
@@ -747,7 +798,20 @@ module Polars
|
|
747
798
|
storage_options = nil
|
748
799
|
end
|
749
800
|
|
750
|
-
|
801
|
+
sink_options = {
|
802
|
+
"sync_on_close" => sync_on_close || "none",
|
803
|
+
"maintain_order" => maintain_order,
|
804
|
+
"mkdir" => mkdir
|
805
|
+
}
|
806
|
+
|
807
|
+
lf = lf.sink_json(path, storage_options, retries, sink_options)
|
808
|
+
lf = LazyFrame._from_rbldf(lf)
|
809
|
+
|
810
|
+
if !lazy
|
811
|
+
lf.collect
|
812
|
+
return nil
|
813
|
+
end
|
814
|
+
lf
|
751
815
|
end
|
752
816
|
|
753
817
|
# @private
|
data/lib/polars/list_expr.rb
CHANGED
@@ -403,7 +403,7 @@ module Polars
|
|
403
403
|
# The indices may be defined in a single column, or by sublists in another
|
404
404
|
# column of dtype `List`.
|
405
405
|
#
|
406
|
-
# @param
|
406
|
+
# @param indices [Object]
|
407
407
|
# Indices to return per sublist
|
408
408
|
# @param null_on_oob [Boolean]
|
409
409
|
# Behavior if an index is out of bounds:
|
@@ -427,12 +427,9 @@ module Polars
|
|
427
427
|
# # │ [] ┆ [null, null] │
|
428
428
|
# # │ [1, 2, … 5] ┆ [1, 5] │
|
429
429
|
# # └─────────────┴──────────────┘
|
430
|
-
def gather(
|
431
|
-
|
432
|
-
|
433
|
-
end
|
434
|
-
index = Utils.parse_into_expression(index, str_as_lit: false)
|
435
|
-
Utils.wrap_expr(_rbexpr.list_gather(index, null_on_oob))
|
430
|
+
def gather(indices, null_on_oob: false)
|
431
|
+
indices = Utils.parse_into_expression(indices)
|
432
|
+
Utils.wrap_expr(_rbexpr.list_gather(indices, null_on_oob))
|
436
433
|
end
|
437
434
|
alias_method :take, :gather
|
438
435
|
|
data/lib/polars/series.rb
CHANGED
@@ -3577,24 +3577,26 @@ module Polars
|
|
3577
3577
|
# Integer size of the rolling window.
|
3578
3578
|
# @param bias [Boolean]
|
3579
3579
|
# If false, the calculations are corrected for statistical bias.
|
3580
|
+
# @param min_samples [Integer]
|
3581
|
+
# The number of values in the window that should be non-null before computing
|
3582
|
+
# a result. If set to `nil` (default), it will be set equal to `window_size`.
|
3583
|
+
# @param center [Boolean]
|
3584
|
+
# Set the labels at the center of the window.
|
3580
3585
|
#
|
3581
3586
|
# @return [Series]
|
3582
3587
|
#
|
3583
3588
|
# @example
|
3584
|
-
#
|
3585
|
-
# s.rolling_skew(3)
|
3589
|
+
# Polars::Series.new([1, 4, 2, 9]).rolling_skew(3)
|
3586
3590
|
# # =>
|
3587
|
-
# # shape: (
|
3588
|
-
# # Series: '
|
3591
|
+
# # shape: (4,)
|
3592
|
+
# # Series: '' [f64]
|
3589
3593
|
# # [
|
3590
3594
|
# # null
|
3591
3595
|
# # null
|
3592
|
-
# # 0.0
|
3593
|
-
# # 0.0
|
3594
3596
|
# # 0.381802
|
3595
|
-
# # 0.
|
3597
|
+
# # 0.47033
|
3596
3598
|
# # ]
|
3597
|
-
def rolling_skew(window_size, bias: true)
|
3599
|
+
def rolling_skew(window_size, bias: true, min_samples: nil, center: false)
|
3598
3600
|
super
|
3599
3601
|
end
|
3600
3602
|
|
@@ -4043,7 +4045,7 @@ module Polars
|
|
4043
4045
|
#
|
4044
4046
|
# @example
|
4045
4047
|
# s.kurtosis(fisher: false, bias: false)
|
4046
|
-
# # => 2.
|
4048
|
+
# # => 2.1040361802642717
|
4047
4049
|
def kurtosis(fisher: true, bias: true)
|
4048
4050
|
_s.kurtosis(fisher, bias)
|
4049
4051
|
end
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: bigdecimal
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- ext/polars/src/lazyframe/general.rs
|
100
100
|
- ext/polars/src/lazyframe/mod.rs
|
101
101
|
- ext/polars/src/lazyframe/serde.rs
|
102
|
+
- ext/polars/src/lazyframe/sink.rs
|
102
103
|
- ext/polars/src/lazygroupby.rs
|
103
104
|
- ext/polars/src/lib.rs
|
104
105
|
- ext/polars/src/map/dataframe.rs
|
@@ -209,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
209
210
|
- !ruby/object:Gem::Version
|
210
211
|
version: '0'
|
211
212
|
requirements: []
|
212
|
-
rubygems_version: 3.6.
|
213
|
+
rubygems_version: 3.6.7
|
213
214
|
specification_version: 4
|
214
215
|
summary: Blazingly fast DataFrames for Ruby
|
215
216
|
test_files: []
|