polars-df 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -1
- data/Cargo.lock +107 -59
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +15 -7
- data/ext/polars/src/batched_csv.rs +4 -4
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
- data/ext/polars/src/dataframe.rs +69 -53
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +61 -33
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +1 -0
- data/ext/polars/src/expr/string.rs +59 -8
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
- data/ext/polars/src/lib.rs +226 -168
- data/ext/polars/src/series/aggregation.rs +20 -0
- data/ext/polars/src/series/mod.rs +25 -4
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +179 -43
- data/lib/polars/data_types.rb +191 -28
- data/lib/polars/date_time_expr.rb +31 -14
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +866 -186
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +2 -2
- data/lib/polars/io.rb +18 -25
- data/lib/polars/lazy_frame.rb +367 -53
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +273 -34
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +52 -8
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +35 -5
- data/lib/polars/lazy_functions.rb +0 -1181
@@ -0,0 +1,185 @@
|
|
1
|
+
use magnus::encoding::{EncodingCapable, Index};
|
2
|
+
use magnus::{
|
3
|
+
class, prelude::*, r_hash::ForEach, Float, Integer, IntoValue, RArray, RHash, RString, Ruby,
|
4
|
+
TryConvert, Value,
|
5
|
+
};
|
6
|
+
use polars::frame::row::any_values_to_dtype;
|
7
|
+
use polars::prelude::*;
|
8
|
+
|
9
|
+
use super::{struct_dict, ObjectValue, Wrap};
|
10
|
+
|
11
|
+
use crate::rb_modules::utils;
|
12
|
+
use crate::{RbPolarsErr, RbResult, RbSeries};
|
13
|
+
|
14
|
+
impl IntoValue for Wrap<AnyValue<'_>> {
|
15
|
+
fn into_value_with(self, ruby: &Ruby) -> Value {
|
16
|
+
match self.0 {
|
17
|
+
AnyValue::UInt8(v) => ruby.into_value(v),
|
18
|
+
AnyValue::UInt16(v) => ruby.into_value(v),
|
19
|
+
AnyValue::UInt32(v) => ruby.into_value(v),
|
20
|
+
AnyValue::UInt64(v) => ruby.into_value(v),
|
21
|
+
AnyValue::Int8(v) => ruby.into_value(v),
|
22
|
+
AnyValue::Int16(v) => ruby.into_value(v),
|
23
|
+
AnyValue::Int32(v) => ruby.into_value(v),
|
24
|
+
AnyValue::Int64(v) => ruby.into_value(v),
|
25
|
+
AnyValue::Float32(v) => ruby.into_value(v),
|
26
|
+
AnyValue::Float64(v) => ruby.into_value(v),
|
27
|
+
AnyValue::Null => ruby.qnil().as_value(),
|
28
|
+
AnyValue::Boolean(v) => ruby.into_value(v),
|
29
|
+
AnyValue::String(v) => ruby.into_value(v),
|
30
|
+
AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
|
31
|
+
AnyValue::Categorical(idx, rev, arr) | AnyValue::Enum(idx, rev, arr) => {
|
32
|
+
let s = if arr.is_null() {
|
33
|
+
rev.get(idx)
|
34
|
+
} else {
|
35
|
+
unsafe { arr.deref_unchecked().value(idx as usize) }
|
36
|
+
};
|
37
|
+
s.into_value()
|
38
|
+
}
|
39
|
+
AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
|
40
|
+
AnyValue::Datetime(v, time_unit, time_zone) => {
|
41
|
+
let time_unit = time_unit.to_ascii();
|
42
|
+
utils()
|
43
|
+
.funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
|
44
|
+
.unwrap()
|
45
|
+
}
|
46
|
+
AnyValue::Duration(v, time_unit) => {
|
47
|
+
let time_unit = time_unit.to_ascii();
|
48
|
+
utils()
|
49
|
+
.funcall("_to_ruby_duration", (v, time_unit))
|
50
|
+
.unwrap()
|
51
|
+
}
|
52
|
+
AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
|
53
|
+
AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
|
54
|
+
ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
|
55
|
+
AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
|
56
|
+
AnyValue::Object(v) => {
|
57
|
+
let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
|
58
|
+
object.to_object()
|
59
|
+
}
|
60
|
+
AnyValue::ObjectOwned(v) => {
|
61
|
+
let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
|
62
|
+
object.to_object()
|
63
|
+
}
|
64
|
+
AnyValue::Binary(v) => RString::from_slice(v).into_value(),
|
65
|
+
AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
|
66
|
+
AnyValue::Decimal(v, scale) => utils()
|
67
|
+
.funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
|
68
|
+
.unwrap(),
|
69
|
+
}
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
impl<'s> TryConvert for Wrap<AnyValue<'s>> {
|
74
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
75
|
+
if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
|
76
|
+
Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
|
77
|
+
} else if let Some(v) = Integer::from_value(ob) {
|
78
|
+
Ok(AnyValue::Int64(v.to_i64()?).into())
|
79
|
+
} else if let Some(v) = Float::from_value(ob) {
|
80
|
+
Ok(AnyValue::Float64(v.to_f64()).into())
|
81
|
+
} else if let Some(v) = RString::from_value(ob) {
|
82
|
+
if v.enc_get() == Index::utf8() {
|
83
|
+
Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
|
84
|
+
} else {
|
85
|
+
Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
|
86
|
+
}
|
87
|
+
// call is_a? for ActiveSupport::TimeWithZone
|
88
|
+
} else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
|
89
|
+
let sec = ob.funcall::<_, _, i64>("to_i", ())?;
|
90
|
+
let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
|
91
|
+
let v = sec * 1_000_000_000 + nsec;
|
92
|
+
// TODO support time zone when possible
|
93
|
+
// https://github.com/pola-rs/polars/issues/9103
|
94
|
+
Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
|
95
|
+
} else if ob.is_nil() {
|
96
|
+
Ok(AnyValue::Null.into())
|
97
|
+
} else if let Some(dict) = RHash::from_value(ob) {
|
98
|
+
let len = dict.len();
|
99
|
+
let mut keys = Vec::with_capacity(len);
|
100
|
+
let mut vals = Vec::with_capacity(len);
|
101
|
+
dict.foreach(|k: Value, v: Value| {
|
102
|
+
let key = String::try_convert(k)?;
|
103
|
+
let val = Wrap::<AnyValue>::try_convert(v)?.0;
|
104
|
+
let dtype = DataType::from(&val);
|
105
|
+
keys.push(Field::new(&key, dtype));
|
106
|
+
vals.push(val);
|
107
|
+
Ok(ForEach::Continue)
|
108
|
+
})?;
|
109
|
+
Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
|
110
|
+
} else if let Some(v) = RArray::from_value(ob) {
|
111
|
+
if v.is_empty() {
|
112
|
+
Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
|
113
|
+
} else {
|
114
|
+
let list = v;
|
115
|
+
|
116
|
+
let mut avs = Vec::with_capacity(25);
|
117
|
+
let mut iter = list.each();
|
118
|
+
|
119
|
+
for item in (&mut iter).take(25) {
|
120
|
+
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
121
|
+
}
|
122
|
+
|
123
|
+
let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
|
124
|
+
|
125
|
+
// push the rest
|
126
|
+
avs.reserve(list.len());
|
127
|
+
for item in iter {
|
128
|
+
avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
|
129
|
+
}
|
130
|
+
|
131
|
+
let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
|
132
|
+
.map_err(RbPolarsErr::from)?;
|
133
|
+
Ok(Wrap(AnyValue::List(s)))
|
134
|
+
}
|
135
|
+
} else if ob.is_kind_of(crate::rb_modules::datetime()) {
|
136
|
+
let sec: i64 = ob.funcall("to_i", ())?;
|
137
|
+
let nsec: i64 = ob.funcall("nsec", ())?;
|
138
|
+
Ok(Wrap(AnyValue::Datetime(
|
139
|
+
sec * 1_000_000_000 + nsec,
|
140
|
+
TimeUnit::Nanoseconds,
|
141
|
+
&None,
|
142
|
+
)))
|
143
|
+
} else if ob.is_kind_of(crate::rb_modules::date()) {
|
144
|
+
// convert to DateTime for UTC
|
145
|
+
let v = ob
|
146
|
+
.funcall::<_, _, Value>("to_datetime", ())?
|
147
|
+
.funcall::<_, _, Value>("to_time", ())?
|
148
|
+
.funcall::<_, _, i64>("to_i", ())?;
|
149
|
+
Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
|
150
|
+
} else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
|
151
|
+
let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
|
152
|
+
let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
|
153
|
+
RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
|
154
|
+
})?;
|
155
|
+
if sign < 0 {
|
156
|
+
// TODO better error
|
157
|
+
v = v.checked_neg().unwrap();
|
158
|
+
}
|
159
|
+
Ok(Wrap(AnyValue::Decimal(v, scale)))
|
160
|
+
} else {
|
161
|
+
Err(RbPolarsErr::other(format!(
|
162
|
+
"object type not supported {:?}",
|
163
|
+
ob
|
164
|
+
)))
|
165
|
+
}
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
|
170
|
+
let exp = exp - (digits.len() as i32);
|
171
|
+
match digits.parse::<i128>() {
|
172
|
+
Ok(mut v) => {
|
173
|
+
let scale = if exp > 0 {
|
174
|
+
v = 10_i128
|
175
|
+
.checked_pow(exp as u32)
|
176
|
+
.and_then(|factor| v.checked_mul(factor))?;
|
177
|
+
0
|
178
|
+
} else {
|
179
|
+
(-exp) as usize
|
180
|
+
};
|
181
|
+
Some((v, scale))
|
182
|
+
}
|
183
|
+
Err(_) => None,
|
184
|
+
}
|
185
|
+
}
|
@@ -0,0 +1,140 @@
|
|
1
|
+
use magnus::{prelude::*, IntoValue, RArray, RString, Ruby, TryConvert, Value};
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use super::{get_rbseq, struct_dict, Wrap};
|
5
|
+
|
6
|
+
use crate::rb_modules::utils;
|
7
|
+
use crate::RbResult;
|
8
|
+
|
9
|
+
impl TryConvert for Wrap<StringChunked> {
|
10
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
11
|
+
let (seq, len) = get_rbseq(obj)?;
|
12
|
+
let mut builder = StringChunkedBuilder::new("", len);
|
13
|
+
|
14
|
+
for res in seq.each() {
|
15
|
+
let item = res?;
|
16
|
+
match String::try_convert(item) {
|
17
|
+
Ok(val) => builder.append_value(&val),
|
18
|
+
Err(_) => builder.append_null(),
|
19
|
+
}
|
20
|
+
}
|
21
|
+
Ok(Wrap(builder.finish()))
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
impl TryConvert for Wrap<BinaryChunked> {
|
26
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
27
|
+
let (seq, len) = get_rbseq(obj)?;
|
28
|
+
let mut builder = BinaryChunkedBuilder::new("", len);
|
29
|
+
|
30
|
+
for res in seq.each() {
|
31
|
+
let item = res?;
|
32
|
+
match RString::try_convert(item) {
|
33
|
+
Ok(val) => builder.append_value(unsafe { val.as_slice() }),
|
34
|
+
Err(_) => builder.append_null(),
|
35
|
+
}
|
36
|
+
}
|
37
|
+
Ok(Wrap(builder.finish()))
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
impl IntoValue for Wrap<&StringChunked> {
|
42
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
43
|
+
let iter = self.0.into_iter();
|
44
|
+
RArray::from_iter(iter).into_value()
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
impl IntoValue for Wrap<&BinaryChunked> {
|
49
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
50
|
+
let iter = self
|
51
|
+
.0
|
52
|
+
.into_iter()
|
53
|
+
.map(|opt_bytes| opt_bytes.map(RString::from_slice));
|
54
|
+
RArray::from_iter(iter).into_value()
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
impl IntoValue for Wrap<&StructChunked> {
|
59
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
60
|
+
let s = self.0.clone().into_series();
|
61
|
+
// todo! iterate its chunks and flatten.
|
62
|
+
// make series::iter() accept a chunk index.
|
63
|
+
let s = s.rechunk();
|
64
|
+
let iter = s.iter().map(|av| {
|
65
|
+
if let AnyValue::Struct(_, _, flds) = av {
|
66
|
+
struct_dict(av._iter_struct_av(), flds)
|
67
|
+
} else {
|
68
|
+
unreachable!()
|
69
|
+
}
|
70
|
+
});
|
71
|
+
|
72
|
+
RArray::from_iter(iter).into_value()
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
impl IntoValue for Wrap<&DurationChunked> {
|
77
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
78
|
+
let utils = utils();
|
79
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
80
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
81
|
+
opt_v.map(|v| {
|
82
|
+
utils
|
83
|
+
.funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
|
84
|
+
.unwrap()
|
85
|
+
})
|
86
|
+
});
|
87
|
+
RArray::from_iter(iter).into_value()
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
impl IntoValue for Wrap<&DatetimeChunked> {
|
92
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
93
|
+
let utils = utils();
|
94
|
+
let time_unit = Wrap(self.0.time_unit()).into_value();
|
95
|
+
let time_zone = self.0.time_zone().clone().into_value();
|
96
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
97
|
+
opt_v.map(|v| {
|
98
|
+
utils
|
99
|
+
.funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
|
100
|
+
.unwrap()
|
101
|
+
})
|
102
|
+
});
|
103
|
+
RArray::from_iter(iter).into_value()
|
104
|
+
}
|
105
|
+
}
|
106
|
+
|
107
|
+
impl IntoValue for Wrap<&TimeChunked> {
|
108
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
109
|
+
let utils = utils();
|
110
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
111
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
|
112
|
+
});
|
113
|
+
RArray::from_iter(iter).into_value()
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
impl IntoValue for Wrap<&DateChunked> {
|
118
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
119
|
+
let utils = utils();
|
120
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
121
|
+
opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
|
122
|
+
});
|
123
|
+
RArray::from_iter(iter).into_value()
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
impl IntoValue for Wrap<&DecimalChunked> {
|
128
|
+
fn into_value_with(self, _: &Ruby) -> Value {
|
129
|
+
let utils = utils();
|
130
|
+
let rb_scale = (-(self.0.scale() as i32)).into_value();
|
131
|
+
let iter = self.0.into_iter().map(|opt_v| {
|
132
|
+
opt_v.map(|v| {
|
133
|
+
utils
|
134
|
+
.funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
|
135
|
+
.unwrap()
|
136
|
+
})
|
137
|
+
});
|
138
|
+
RArray::from_iter(iter).into_value()
|
139
|
+
}
|
140
|
+
}
|