polars-df 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -0,0 +1,185 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
|
4
|
+
TryConvert, Value,
|
5
|
+
};
|
6
|
+
use polars::frame::row::any_values_to_dtype;
|
7
|
+
use polars::prelude::*;
|
8
|
+
|
9
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
10
|
+
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
match self.0 {
|
17
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
18
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
19
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
20
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
21
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
22
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
23
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
24
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
25
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
26
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
27
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
28
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
29
|
+
AnyValue::String(v) => ruby.into_value(v),
|
30
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
31
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
32
|
+
let s = if arr.is_null() {
|
33
|
+
rev.get(idx)
|
34
|
+
} else {
|
35
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
36
|
+
};
|
37
|
+
s.into_value()
|
38
|
+
}
|
39
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
40
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
41
|
+
let time_unit = time_unit.to_ascii();
|
42
|
+
utils()
|
43
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
44
|
+
.unwrap()
|
45
|
+
}
|
46
|
+
AnyValue::Duration(v, time_unit) => {
|
47
|
+
let time_unit = time_unit.to_ascii();
|
48
|
+
utils()
|
49
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
50
|
+
.unwrap()
|
51
|
+
}
|
52
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
53
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
54
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
55
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
56
|
+
AnyValue::Object(v) => {
|
57
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
58
|
+
object.to_object()
|
59
|
+
}
|
60
|
+
AnyValue::ObjectOwned(v) => {
|
61
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
62
|
+
object.to_object()
|
63
|
+
}
|
64
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
65
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
66
|
+
AnyValue::Decimal(v, scale) => utils()
|
67
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
68
|
+
.unwrap(),
|
69
|
+
}
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
74
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
75
|
+
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
76
|
+
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
77
|
+
} else if let Some(v) = Integer::from_value(ob) {
|
78
|
+
Ok(AnyValue::Int64(v.to_i64()?).into())
|
79
|
+
} else if let Some(v) = Float::from_value(ob) {
|
80
|
+
Ok(AnyValue::Float64(v.to_f64()).into())
|
81
|
+
} else if let Some(v) = RString::from_value(ob) {
|
82
|
+
if v.enc_get() == Index::utf8() {
|
83
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
84
|
+
} else {
|
85
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
86
|
+
}
|
87
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
88
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
89
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
90
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
91
|
+
let v = sec * 1_000_000_000 + nsec;
|
92
|
+
// TODO support time zone when possible
|
93
|
+
// https://github.com/pola-rs/polars/issues/9103
|
94
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
95
|
+
} else if ob.is_nil() {
|
96
|
+
Ok(AnyValue::Null.into())
|
97
|
+
} else if let Some(dict) = RHash::from_value(ob) {
|
98
|
+
let len = dict.len();
|
99
|
+
let mut keys = Vec::with_capacity(len);
|
100
|
+
let mut vals = Vec::with_capacity(len);
|
101
|
+
dict.foreach(|k: Value, v: Value| {
|
102
|
+
let key = String::try_convert(k)?;
|
103
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
104
|
+
let dtype = DataType::from(&val);
|
105
|
+
keys.push(Field::new(&key, dtype));
|
106
|
+
vals.push(val);
|
107
|
+
Ok(ForEach::Continue)
|
108
|
+
})?;
|
109
|
+
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
110
|
+
} else if let Some(v) = RArray::from_value(ob) {
|
111
|
+
if v.is_empty() {
|
112
|
+
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
113
|
+
} else {
|
114
|
+
let list = v;
|
115
|
+
|
116
|
+
let mut avs = Vec::with_capacity(25);
|
117
|
+
let mut iter = list.each();
|
118
|
+
|
119
|
+
for item in (&mut iter).take(25) {
|
120
|
+
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
121
|
+
}
|
122
|
+
|
123
|
+
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
124
|
+
|
125
|
+
// push the rest
|
126
|
+
avs.reserve(list.len());
|
127
|
+
for item in iter {
|
128
|
+
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
129
|
+
}
|
130
|
+
|
131
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
132
|
+
.map_err(RbPolarsErr::from)?;
|
133
|
+
Ok(Wrap(AnyValue::List(s)))
|
134
|
+
}
|
135
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
136
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
137
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
138
|
+
Ok(Wrap(AnyValue::Datetime(
|
139
|
+
sec * 1_000_000_000 + nsec,
|
140
|
+
TimeUnit::Nanoseconds,
|
141
|
+
&None,
|
142
|
+
)))
|
143
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
144
|
+
// convert to DateTime for UTC
|
145
|
+
let v = ob
|
146
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
147
|
+
.funcall::<_, _, Value>("to_time", ())?
|
148
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
149
|
+
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
150
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
151
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
152
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
153
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
154
|
+
})?;
|
155
|
+
if sign < 0 {
|
156
|
+
// TODO better error
|
157
|
+
v = v.checked_neg().unwrap();
|
158
|
+
}
|
159
|
+
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
160
|
+
} else {
|
161
|
+
Err(RbPolarsErr::other(format!(
|
162
|
+
"object type not supported {:?}",
|
163
|
+
ob
|
164
|
+
)))
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
170
|
+
let exp = exp - (digits.len() as i32);
|
171
|
+
match digits.parse::<i128>() {
|
172
|
+
Ok(mut v) => {
|
173
|
+
let scale = if exp > 0 {
|
174
|
+
v = 10_i128
|
175
|
+
.checked_pow(exp as u32)
|
176
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
177
|
+
0
|
178
|
+
} else {
|
179
|
+
(-exp) as usize
|
180
|
+
};
|
181
|
+
Some((v, scale))
|
182
|
+
}
|
183
|
+
Err(_) => None,
|
184
|
+
}
|
185
|
+
}
|
@@ -0,0 +1,140 @@
|
|
1
|
+
use magnus::{prelude::*, IntoValue, RArray, RString, Ruby, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use super::{get_rbseq, struct_dict, Wrap};
|
5
|
+
|
6
|
+
use crate::rb_modules::utils;
|
7
|
+
use crate::RbResult;
|
8
|
+
|
9
|
+
impl TryConvert for Wrap<StringChunked> {
|
10
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
11
|
+
let (seq, len) = get_rbseq(obj)?;
|
12
|
+
let mut builder = StringChunkedBuilder::new("", len);
|
13
|
+
|
14
|
+
for res in seq.each() {
|
15
|
+
let item = res?;
|
16
|
+
match String::try_convert(item) {
|
17
|
+
Ok(val) => builder.append_value(&val),
|
18
|
+
Err(_) => builder.append_null(),
|
19
|
+
}
|
20
|
+
}
|
21
|
+
Ok(Wrap(builder.finish()))
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
impl TryConvert for Wrap<BinaryChunked> {
|
26
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
27
|
+
let (seq, len) = get_rbseq(obj)?;
|
28
|
+
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
|
+
|
30
|
+
for res in seq.each() {
|
31
|
+
let item = res?;
|
32
|
+
match RString::try_convert(item) {
|
33
|
+
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
|
+
Err(_) => builder.append_null(),
|
35
|
+
}
|
36
|
+
}
|
37
|
+
Ok(Wrap(builder.finish()))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
impl IntoValue for Wrap<&StringChunked> {
|
42
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
43
|
+
let iter = self.0.into_iter();
|
44
|
+
RArray::from_iter(iter).into_value()
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
49
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
50
|
+
let iter = self
|
51
|
+
.0
|
52
|
+
.into_iter()
|
53
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
54
|
+
RArray::from_iter(iter).into_value()
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
impl IntoValue for Wrap<&StructChunked> {
|
59
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
60
|
+
let s = self.0.clone().into_series();
|
61
|
+
// todo! iterate its chunks and flatten.
|
62
|
+
// make series::iter() accept a chunk index.
|
63
|
+
let s = s.rechunk();
|
64
|
+
let iter = s.iter().map(|av| {
|
65
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
66
|
+
struct_dict(av._iter_struct_av(), flds)
|
67
|
+
} else {
|
68
|
+
unreachable!()
|
69
|
+
}
|
70
|
+
});
|
71
|
+
|
72
|
+
RArray::from_iter(iter).into_value()
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
77
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
78
|
+
let utils = utils();
|
79
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
80
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
81
|
+
opt_v.map(|v| {
|
82
|
+
utils
|
83
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
84
|
+
.unwrap()
|
85
|
+
})
|
86
|
+
});
|
87
|
+
RArray::from_iter(iter).into_value()
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
impl IntoValue for Wrap<&DatetimeChunked> {
|
92
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
93
|
+
let utils = utils();
|
94
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
95
|
+
let time_zone = self.0.time_zone().clone().into_value();
|
96
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
97
|
+
opt_v.map(|v| {
|
98
|
+
utils
|
99
|
+
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
100
|
+
.unwrap()
|
101
|
+
})
|
102
|
+
});
|
103
|
+
RArray::from_iter(iter).into_value()
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
impl IntoValue for Wrap<&TimeChunked> {
|
108
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
109
|
+
let utils = utils();
|
110
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
111
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
112
|
+
});
|
113
|
+
RArray::from_iter(iter).into_value()
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
impl IntoValue for Wrap<&DateChunked> {
|
118
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
119
|
+
let utils = utils();
|
120
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
121
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
122
|
+
});
|
123
|
+
RArray::from_iter(iter).into_value()
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
impl IntoValue for Wrap<&DecimalChunked> {
|
128
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
129
|
+
let utils = utils();
|
130
|
+
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
131
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
132
|
+
opt_v.map(|v| {
|
133
|
+
utils
|
134
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
135
|
+
.unwrap()
|
136
|
+
})
|
137
|
+
});
|
138
|
+
RArray::from_iter(iter).into_value()
|
139
|
+
}
|
140
|
+
}
|