polars-df 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
@@ -0,0 +1,100 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::utils::CustomIterTools;
|
4
|
+
|
5
|
+
use crate::conversion::get_rbseq;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
|
9
|
+
let (seq, len) = get_rbseq(seq)?;
|
10
|
+
|
11
|
+
let s = match dtype {
|
12
|
+
DataType::Int64 => {
|
13
|
+
let mut builder =
|
14
|
+
ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
|
15
|
+
for sub_seq in seq.each() {
|
16
|
+
let sub_seq = sub_seq?;
|
17
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
18
|
+
|
19
|
+
// safety: we know the iterators len
|
20
|
+
let iter = unsafe {
|
21
|
+
sub_seq
|
22
|
+
.each()
|
23
|
+
.map(|v| {
|
24
|
+
let v = v.unwrap();
|
25
|
+
if v.is_nil() {
|
26
|
+
None
|
27
|
+
} else {
|
28
|
+
Some(v.try_convert::<i64>().unwrap())
|
29
|
+
}
|
30
|
+
})
|
31
|
+
.trust_my_length(len)
|
32
|
+
};
|
33
|
+
builder.append_iter(iter)
|
34
|
+
}
|
35
|
+
builder.finish().into_series()
|
36
|
+
}
|
37
|
+
DataType::Float64 => {
|
38
|
+
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
|
39
|
+
name,
|
40
|
+
len,
|
41
|
+
len * 5,
|
42
|
+
DataType::Float64,
|
43
|
+
);
|
44
|
+
for sub_seq in seq.each() {
|
45
|
+
let sub_seq = sub_seq?;
|
46
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
47
|
+
// safety: we know the iterators len
|
48
|
+
let iter = unsafe {
|
49
|
+
sub_seq
|
50
|
+
.each()
|
51
|
+
.map(|v| {
|
52
|
+
let v = v.unwrap();
|
53
|
+
if v.is_nil() {
|
54
|
+
None
|
55
|
+
} else {
|
56
|
+
Some(v.try_convert::<f64>().unwrap())
|
57
|
+
}
|
58
|
+
})
|
59
|
+
.trust_my_length(len)
|
60
|
+
};
|
61
|
+
builder.append_iter(iter)
|
62
|
+
}
|
63
|
+
builder.finish().into_series()
|
64
|
+
}
|
65
|
+
DataType::Boolean => {
|
66
|
+
let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
|
67
|
+
for sub_seq in seq.each() {
|
68
|
+
let sub_seq = sub_seq?;
|
69
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
70
|
+
// safety: we know the iterators len
|
71
|
+
let iter = unsafe {
|
72
|
+
sub_seq
|
73
|
+
.each()
|
74
|
+
.map(|v| {
|
75
|
+
let v = v.unwrap();
|
76
|
+
if v.is_nil() {
|
77
|
+
None
|
78
|
+
} else {
|
79
|
+
Some(v.try_convert::<bool>().unwrap())
|
80
|
+
}
|
81
|
+
})
|
82
|
+
.trust_my_length(len)
|
83
|
+
};
|
84
|
+
builder.append_iter(iter)
|
85
|
+
}
|
86
|
+
builder.finish().into_series()
|
87
|
+
}
|
88
|
+
DataType::Utf8 => {
|
89
|
+
return Err(RbPolarsErr::todo());
|
90
|
+
}
|
91
|
+
dt => {
|
92
|
+
return Err(RbPolarsErr::other(format!(
|
93
|
+
"cannot create list array from {:?}",
|
94
|
+
dt
|
95
|
+
)));
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
Ok(s)
|
100
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
|
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
7
|
use crate::conversion::*;
|
8
|
+
use crate::list_construction::rb_seq_to_list;
|
8
9
|
use crate::set::set_at_idx;
|
9
10
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
10
11
|
|
@@ -123,6 +124,19 @@ impl RbSeries {
|
|
123
124
|
RbSeries::new(s)
|
124
125
|
}
|
125
126
|
|
127
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
128
|
+
let val = val
|
129
|
+
.each()
|
130
|
+
.map(|v| v.map(ObjectValue::from))
|
131
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
132
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
133
|
+
Ok(s.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
137
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
138
|
+
}
|
139
|
+
|
126
140
|
pub fn estimated_size(&self) -> usize {
|
127
141
|
self.series.borrow().estimated_size()
|
128
142
|
}
|
@@ -787,4 +801,25 @@ impl RbSeries {
|
|
787
801
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
788
802
|
Ok(ca.into_date().into_series().into())
|
789
803
|
}
|
804
|
+
|
805
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
806
|
+
let len = values.len();
|
807
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
808
|
+
for item in values.each() {
|
809
|
+
let v = item?;
|
810
|
+
if v.is_nil() {
|
811
|
+
builder.append_null();
|
812
|
+
} else {
|
813
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
814
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
815
|
+
// TODO use strict
|
816
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
817
|
+
}
|
818
|
+
}
|
819
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
820
|
+
Ok(ca
|
821
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
822
|
+
.into_series()
|
823
|
+
.into())
|
824
|
+
}
|
790
825
|
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/polars/data_frame.rb
CHANGED
@@ -880,7 +880,7 @@ module Polars
|
|
880
880
|
# "val" => [1, 2, 3]
|
881
881
|
# }
|
882
882
|
# )
|
883
|
-
# df.reverse
|
883
|
+
# df.reverse
|
884
884
|
# # =>
|
885
885
|
# # shape: (3, 2)
|
886
886
|
# # ┌─────┬─────┐
|
@@ -1998,8 +1998,105 @@ module Polars
|
|
1998
1998
|
self[name]
|
1999
1999
|
end
|
2000
2000
|
|
2001
|
-
#
|
2002
|
-
#
|
2001
|
+
# Fill null values using the specified value or strategy.
|
2002
|
+
#
|
2003
|
+
# @param value [Numeric]
|
2004
|
+
# Value used to fill null values.
|
2005
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
2006
|
+
# Strategy used to fill null values.
|
2007
|
+
# @param limit [Integer]
|
2008
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
2009
|
+
# 'backward' strategy.
|
2010
|
+
# @param matches_supertype [Boolean]
|
2011
|
+
# Fill all matching supertype of the fill `value`.
|
2012
|
+
#
|
2013
|
+
# @return [DataFrame]
|
2014
|
+
#
|
2015
|
+
# @example
|
2016
|
+
# df = Polars::DataFrame.new(
|
2017
|
+
# {
|
2018
|
+
# "a" => [1, 2, nil, 4],
|
2019
|
+
# "b" => [0.5, 4, nil, 13]
|
2020
|
+
# }
|
2021
|
+
# )
|
2022
|
+
# df.fill_null(99)
|
2023
|
+
# # =>
|
2024
|
+
# # shape: (4, 2)
|
2025
|
+
# # ┌─────┬──────┐
|
2026
|
+
# # │ a ┆ b │
|
2027
|
+
# # │ --- ┆ --- │
|
2028
|
+
# # │ i64 ┆ f64 │
|
2029
|
+
# # ╞═════╪══════╡
|
2030
|
+
# # │ 1 ┆ 0.5 │
|
2031
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2032
|
+
# # │ 2 ┆ 4.0 │
|
2033
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2034
|
+
# # │ 99 ┆ 99.0 │
|
2035
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2036
|
+
# # │ 4 ┆ 13.0 │
|
2037
|
+
# # └─────┴──────┘
|
2038
|
+
#
|
2039
|
+
# @example
|
2040
|
+
# df.fill_null(strategy: "forward")
|
2041
|
+
# # =>
|
2042
|
+
# # shape: (4, 2)
|
2043
|
+
# # ┌─────┬──────┐
|
2044
|
+
# # │ a ┆ b │
|
2045
|
+
# # │ --- ┆ --- │
|
2046
|
+
# # │ i64 ┆ f64 │
|
2047
|
+
# # ╞═════╪══════╡
|
2048
|
+
# # │ 1 ┆ 0.5 │
|
2049
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2050
|
+
# # │ 2 ┆ 4.0 │
|
2051
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2052
|
+
# # │ 2 ┆ 4.0 │
|
2053
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2054
|
+
# # │ 4 ┆ 13.0 │
|
2055
|
+
# # └─────┴──────┘
|
2056
|
+
#
|
2057
|
+
# @example
|
2058
|
+
# df.fill_null(strategy: "max")
|
2059
|
+
# # =>
|
2060
|
+
# # shape: (4, 2)
|
2061
|
+
# # ┌─────┬──────┐
|
2062
|
+
# # │ a ┆ b │
|
2063
|
+
# # │ --- ┆ --- │
|
2064
|
+
# # │ i64 ┆ f64 │
|
2065
|
+
# # ╞═════╪══════╡
|
2066
|
+
# # │ 1 ┆ 0.5 │
|
2067
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2068
|
+
# # │ 2 ┆ 4.0 │
|
2069
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2070
|
+
# # │ 4 ┆ 13.0 │
|
2071
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2072
|
+
# # │ 4 ┆ 13.0 │
|
2073
|
+
# # └─────┴──────┘
|
2074
|
+
#
|
2075
|
+
# @example
|
2076
|
+
# df.fill_null(strategy: "zero")
|
2077
|
+
# # =>
|
2078
|
+
# # shape: (4, 2)
|
2079
|
+
# # ┌─────┬──────┐
|
2080
|
+
# # │ a ┆ b │
|
2081
|
+
# # │ --- ┆ --- │
|
2082
|
+
# # │ i64 ┆ f64 │
|
2083
|
+
# # ╞═════╪══════╡
|
2084
|
+
# # │ 1 ┆ 0.5 │
|
2085
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2086
|
+
# # │ 2 ┆ 4.0 │
|
2087
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2088
|
+
# # │ 0 ┆ 0.0 │
|
2089
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2090
|
+
# # │ 4 ┆ 13.0 │
|
2091
|
+
# # └─────┴──────┘
|
2092
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
|
2093
|
+
_from_rbdf(
|
2094
|
+
lazy
|
2095
|
+
.fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
|
2096
|
+
.collect(no_optimization: true)
|
2097
|
+
._df
|
2098
|
+
)
|
2099
|
+
end
|
2003
2100
|
|
2004
2101
|
# Fill floating point NaN values by an Expression evaluation.
|
2005
2102
|
#
|
@@ -2357,7 +2454,7 @@ module Polars
|
|
2357
2454
|
# [
|
2358
2455
|
# (Polars.col("a") ** 2).alias("a^2"),
|
2359
2456
|
# (Polars.col("b") / 2).alias("b/2"),
|
2360
|
-
# (Polars.col("c").is_not
|
2457
|
+
# (Polars.col("c").is_not).alias("not c")
|
2361
2458
|
# ]
|
2362
2459
|
# )
|
2363
2460
|
# # =>
|
@@ -1357,9 +1357,9 @@ module Polars
|
|
1357
1357
|
|
1358
1358
|
# Offset this date by a relative time offset.
|
1359
1359
|
#
|
1360
|
-
# This differs from
|
1360
|
+
# This differs from `Polars.col("foo") + timedelta` in that it can
|
1361
1361
|
# take months and leap years into account. Note that only a single minus
|
1362
|
-
# sign is allowed in the
|
1362
|
+
# sign is allowed in the `by` string, as the first character.
|
1363
1363
|
#
|
1364
1364
|
# @param by [String]
|
1365
1365
|
# The offset is dictated by the following string language:
|