polars-df 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +73 -3
- data/Cargo.toml +3 -0
- data/ext/polars/Cargo.toml +12 -1
- data/ext/polars/src/conversion.rs +80 -0
- data/ext/polars/src/error.rs +4 -0
- data/ext/polars/src/lazy/dataframe.rs +2 -2
- data/ext/polars/src/lazy/dsl.rs +98 -0
- data/ext/polars/src/lib.rs +34 -0
- data/ext/polars/src/list_construction.rs +100 -0
- data/ext/polars/src/series.rs +35 -0
- data/lib/polars/cat_name_space.rb +54 -0
- data/lib/polars/data_frame.rb +101 -4
- data/lib/polars/date_time_expr.rb +2 -2
- data/lib/polars/date_time_name_space.rb +1484 -0
- data/lib/polars/expr.rb +3774 -58
- data/lib/polars/expr_dispatch.rb +8 -0
- data/lib/polars/group_by.rb +1 -0
- data/lib/polars/io.rb +1 -1
- data/lib/polars/lazy_frame.rb +8 -4
- data/lib/polars/lazy_functions.rb +126 -16
- data/lib/polars/lazy_group_by.rb +1 -0
- data/lib/polars/list_expr.rb +502 -5
- data/lib/polars/list_name_space.rb +346 -0
- data/lib/polars/series.rb +802 -52
- data/lib/polars/string_expr.rb +189 -13
- data/lib/polars/string_name_space.rb +690 -0
- data/lib/polars/struct_name_space.rb +64 -0
- data/lib/polars/utils.rb +28 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -0
- metadata +8 -2
@@ -0,0 +1,100 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::prelude::*;
|
3
|
+
use polars_core::utils::CustomIterTools;
|
4
|
+
|
5
|
+
use crate::conversion::get_rbseq;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn rb_seq_to_list(name: &str, seq: Value, dtype: &DataType) -> RbResult<Series> {
|
9
|
+
let (seq, len) = get_rbseq(seq)?;
|
10
|
+
|
11
|
+
let s = match dtype {
|
12
|
+
DataType::Int64 => {
|
13
|
+
let mut builder =
|
14
|
+
ListPrimitiveChunkedBuilder::<Int64Type>::new(name, len, len * 5, DataType::Int64);
|
15
|
+
for sub_seq in seq.each() {
|
16
|
+
let sub_seq = sub_seq?;
|
17
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
18
|
+
|
19
|
+
// safety: we know the iterators len
|
20
|
+
let iter = unsafe {
|
21
|
+
sub_seq
|
22
|
+
.each()
|
23
|
+
.map(|v| {
|
24
|
+
let v = v.unwrap();
|
25
|
+
if v.is_nil() {
|
26
|
+
None
|
27
|
+
} else {
|
28
|
+
Some(v.try_convert::<i64>().unwrap())
|
29
|
+
}
|
30
|
+
})
|
31
|
+
.trust_my_length(len)
|
32
|
+
};
|
33
|
+
builder.append_iter(iter)
|
34
|
+
}
|
35
|
+
builder.finish().into_series()
|
36
|
+
}
|
37
|
+
DataType::Float64 => {
|
38
|
+
let mut builder = ListPrimitiveChunkedBuilder::<Float64Type>::new(
|
39
|
+
name,
|
40
|
+
len,
|
41
|
+
len * 5,
|
42
|
+
DataType::Float64,
|
43
|
+
);
|
44
|
+
for sub_seq in seq.each() {
|
45
|
+
let sub_seq = sub_seq?;
|
46
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
47
|
+
// safety: we know the iterators len
|
48
|
+
let iter = unsafe {
|
49
|
+
sub_seq
|
50
|
+
.each()
|
51
|
+
.map(|v| {
|
52
|
+
let v = v.unwrap();
|
53
|
+
if v.is_nil() {
|
54
|
+
None
|
55
|
+
} else {
|
56
|
+
Some(v.try_convert::<f64>().unwrap())
|
57
|
+
}
|
58
|
+
})
|
59
|
+
.trust_my_length(len)
|
60
|
+
};
|
61
|
+
builder.append_iter(iter)
|
62
|
+
}
|
63
|
+
builder.finish().into_series()
|
64
|
+
}
|
65
|
+
DataType::Boolean => {
|
66
|
+
let mut builder = ListBooleanChunkedBuilder::new(name, len, len * 5);
|
67
|
+
for sub_seq in seq.each() {
|
68
|
+
let sub_seq = sub_seq?;
|
69
|
+
let (sub_seq, len) = get_rbseq(sub_seq)?;
|
70
|
+
// safety: we know the iterators len
|
71
|
+
let iter = unsafe {
|
72
|
+
sub_seq
|
73
|
+
.each()
|
74
|
+
.map(|v| {
|
75
|
+
let v = v.unwrap();
|
76
|
+
if v.is_nil() {
|
77
|
+
None
|
78
|
+
} else {
|
79
|
+
Some(v.try_convert::<bool>().unwrap())
|
80
|
+
}
|
81
|
+
})
|
82
|
+
.trust_my_length(len)
|
83
|
+
};
|
84
|
+
builder.append_iter(iter)
|
85
|
+
}
|
86
|
+
builder.finish().into_series()
|
87
|
+
}
|
88
|
+
DataType::Utf8 => {
|
89
|
+
return Err(RbPolarsErr::todo());
|
90
|
+
}
|
91
|
+
dt => {
|
92
|
+
return Err(RbPolarsErr::other(format!(
|
93
|
+
"cannot create list array from {:?}",
|
94
|
+
dt
|
95
|
+
)));
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
Ok(s)
|
100
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
|
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
7
|
use crate::conversion::*;
|
8
|
+
use crate::list_construction::rb_seq_to_list;
|
8
9
|
use crate::set::set_at_idx;
|
9
10
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
10
11
|
|
@@ -123,6 +124,19 @@ impl RbSeries {
|
|
123
124
|
RbSeries::new(s)
|
124
125
|
}
|
125
126
|
|
127
|
+
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
128
|
+
let val = val
|
129
|
+
.each()
|
130
|
+
.map(|v| v.map(ObjectValue::from))
|
131
|
+
.collect::<RbResult<Vec<ObjectValue>>>()?;
|
132
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
|
133
|
+
Ok(s.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn new_list(name: String, seq: Value, dtype: Wrap<DataType>) -> RbResult<Self> {
|
137
|
+
rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into())
|
138
|
+
}
|
139
|
+
|
126
140
|
pub fn estimated_size(&self) -> usize {
|
127
141
|
self.series.borrow().estimated_size()
|
128
142
|
}
|
@@ -787,4 +801,25 @@ impl RbSeries {
|
|
787
801
|
let ca: ChunkedArray<Int32Type> = builder.finish();
|
788
802
|
Ok(ca.into_date().into_series().into())
|
789
803
|
}
|
804
|
+
|
805
|
+
pub fn new_opt_datetime(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
806
|
+
let len = values.len();
|
807
|
+
let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(&name, len);
|
808
|
+
for item in values.each() {
|
809
|
+
let v = item?;
|
810
|
+
if v.is_nil() {
|
811
|
+
builder.append_null();
|
812
|
+
} else {
|
813
|
+
let sec: i64 = v.funcall("to_i", ())?;
|
814
|
+
let nsec: i64 = v.funcall("nsec", ())?;
|
815
|
+
// TODO use strict
|
816
|
+
builder.append_value(sec * 1_000_000_000 + nsec);
|
817
|
+
}
|
818
|
+
}
|
819
|
+
let ca: ChunkedArray<Int64Type> = builder.finish();
|
820
|
+
Ok(ca
|
821
|
+
.into_datetime(TimeUnit::Nanoseconds, None)
|
822
|
+
.into_series()
|
823
|
+
.into())
|
824
|
+
}
|
790
825
|
}
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.cat namespace.
|
3
|
+
class CatNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "cat"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Determine how this categorical series should be sorted.
|
14
|
+
#
|
15
|
+
# @param ordering ["physical", "lexical"]
|
16
|
+
# Ordering type:
|
17
|
+
#
|
18
|
+
# - 'physical' -> Use the physical representation of the categories to
|
19
|
+
# determine the order (default).
|
20
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
21
|
+
#
|
22
|
+
# @return [Series]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
# df = Polars::DataFrame.new(
|
26
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
27
|
+
# ).with_columns(
|
28
|
+
# [
|
29
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
30
|
+
# ]
|
31
|
+
# )
|
32
|
+
# df.sort(["cats", "vals"])
|
33
|
+
# # =>
|
34
|
+
# # shape: (5, 2)
|
35
|
+
# # ┌──────┬──────┐
|
36
|
+
# # │ cats ┆ vals │
|
37
|
+
# # │ --- ┆ --- │
|
38
|
+
# # │ cat ┆ i64 │
|
39
|
+
# # ╞══════╪══════╡
|
40
|
+
# # │ a ┆ 2 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ b ┆ 3 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ k ┆ 2 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 1 │
|
47
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
48
|
+
# # │ z ┆ 3 │
|
49
|
+
# # └──────┴──────┘
|
50
|
+
def set_ordering(ordering)
|
51
|
+
super
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/polars/data_frame.rb
CHANGED
@@ -880,7 +880,7 @@ module Polars
|
|
880
880
|
# "val" => [1, 2, 3]
|
881
881
|
# }
|
882
882
|
# )
|
883
|
-
# df.reverse
|
883
|
+
# df.reverse
|
884
884
|
# # =>
|
885
885
|
# # shape: (3, 2)
|
886
886
|
# # ┌─────┬─────┐
|
@@ -1998,8 +1998,105 @@ module Polars
|
|
1998
1998
|
self[name]
|
1999
1999
|
end
|
2000
2000
|
|
2001
|
-
#
|
2002
|
-
#
|
2001
|
+
# Fill null values using the specified value or strategy.
|
2002
|
+
#
|
2003
|
+
# @param value [Numeric]
|
2004
|
+
# Value used to fill null values.
|
2005
|
+
# @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
|
2006
|
+
# Strategy used to fill null values.
|
2007
|
+
# @param limit [Integer]
|
2008
|
+
# Number of consecutive null values to fill when using the 'forward' or
|
2009
|
+
# 'backward' strategy.
|
2010
|
+
# @param matches_supertype [Boolean]
|
2011
|
+
# Fill all matching supertype of the fill `value`.
|
2012
|
+
#
|
2013
|
+
# @return [DataFrame]
|
2014
|
+
#
|
2015
|
+
# @example
|
2016
|
+
# df = Polars::DataFrame.new(
|
2017
|
+
# {
|
2018
|
+
# "a" => [1, 2, nil, 4],
|
2019
|
+
# "b" => [0.5, 4, nil, 13]
|
2020
|
+
# }
|
2021
|
+
# )
|
2022
|
+
# df.fill_null(99)
|
2023
|
+
# # =>
|
2024
|
+
# # shape: (4, 2)
|
2025
|
+
# # ┌─────┬──────┐
|
2026
|
+
# # │ a ┆ b │
|
2027
|
+
# # │ --- ┆ --- │
|
2028
|
+
# # │ i64 ┆ f64 │
|
2029
|
+
# # ╞═════╪══════╡
|
2030
|
+
# # │ 1 ┆ 0.5 │
|
2031
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2032
|
+
# # │ 2 ┆ 4.0 │
|
2033
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2034
|
+
# # │ 99 ┆ 99.0 │
|
2035
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2036
|
+
# # │ 4 ┆ 13.0 │
|
2037
|
+
# # └─────┴──────┘
|
2038
|
+
#
|
2039
|
+
# @example
|
2040
|
+
# df.fill_null(strategy: "forward")
|
2041
|
+
# # =>
|
2042
|
+
# # shape: (4, 2)
|
2043
|
+
# # ┌─────┬──────┐
|
2044
|
+
# # │ a ┆ b │
|
2045
|
+
# # │ --- ┆ --- │
|
2046
|
+
# # │ i64 ┆ f64 │
|
2047
|
+
# # ╞═════╪══════╡
|
2048
|
+
# # │ 1 ┆ 0.5 │
|
2049
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2050
|
+
# # │ 2 ┆ 4.0 │
|
2051
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2052
|
+
# # │ 2 ┆ 4.0 │
|
2053
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2054
|
+
# # │ 4 ┆ 13.0 │
|
2055
|
+
# # └─────┴──────┘
|
2056
|
+
#
|
2057
|
+
# @example
|
2058
|
+
# df.fill_null(strategy: "max")
|
2059
|
+
# # =>
|
2060
|
+
# # shape: (4, 2)
|
2061
|
+
# # ┌─────┬──────┐
|
2062
|
+
# # │ a ┆ b │
|
2063
|
+
# # │ --- ┆ --- │
|
2064
|
+
# # │ i64 ┆ f64 │
|
2065
|
+
# # ╞═════╪══════╡
|
2066
|
+
# # │ 1 ┆ 0.5 │
|
2067
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2068
|
+
# # │ 2 ┆ 4.0 │
|
2069
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2070
|
+
# # │ 4 ┆ 13.0 │
|
2071
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2072
|
+
# # │ 4 ┆ 13.0 │
|
2073
|
+
# # └─────┴──────┘
|
2074
|
+
#
|
2075
|
+
# @example
|
2076
|
+
# df.fill_null(strategy: "zero")
|
2077
|
+
# # =>
|
2078
|
+
# # shape: (4, 2)
|
2079
|
+
# # ┌─────┬──────┐
|
2080
|
+
# # │ a ┆ b │
|
2081
|
+
# # │ --- ┆ --- │
|
2082
|
+
# # │ i64 ┆ f64 │
|
2083
|
+
# # ╞═════╪══════╡
|
2084
|
+
# # │ 1 ┆ 0.5 │
|
2085
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2086
|
+
# # │ 2 ┆ 4.0 │
|
2087
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2088
|
+
# # │ 0 ┆ 0.0 │
|
2089
|
+
# # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
|
2090
|
+
# # │ 4 ┆ 13.0 │
|
2091
|
+
# # └─────┴──────┘
|
2092
|
+
def fill_null(value = nil, strategy: nil, limit: nil, matches_supertype: true)
|
2093
|
+
_from_rbdf(
|
2094
|
+
lazy
|
2095
|
+
.fill_null(value, strategy: strategy, limit: limit, matches_supertype: matches_supertype)
|
2096
|
+
.collect(no_optimization: true)
|
2097
|
+
._df
|
2098
|
+
)
|
2099
|
+
end
|
2003
2100
|
|
2004
2101
|
# Fill floating point NaN values by an Expression evaluation.
|
2005
2102
|
#
|
@@ -2357,7 +2454,7 @@ module Polars
|
|
2357
2454
|
# [
|
2358
2455
|
# (Polars.col("a") ** 2).alias("a^2"),
|
2359
2456
|
# (Polars.col("b") / 2).alias("b/2"),
|
2360
|
-
# (Polars.col("c").is_not
|
2457
|
+
# (Polars.col("c").is_not).alias("not c")
|
2361
2458
|
# ]
|
2362
2459
|
# )
|
2363
2460
|
# # =>
|
@@ -1357,9 +1357,9 @@ module Polars
|
|
1357
1357
|
|
1358
1358
|
# Offset this date by a relative time offset.
|
1359
1359
|
#
|
1360
|
-
# This differs from
|
1360
|
+
# This differs from `Polars.col("foo") + timedelta` in that it can
|
1361
1361
|
# take months and leap years into account. Note that only a single minus
|
1362
|
-
# sign is allowed in the
|
1362
|
+
# sign is allowed in the `by` string, as the first character.
|
1363
1363
|
#
|
1364
1364
|
# @param by [String]
|
1365
1365
|
# The offset is dictated by the following string language:
|