polars-df 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +70 -9
- data/Cargo.toml +2 -0
- data/ext/polars/Cargo.toml +6 -1
- data/ext/polars/src/apply/dataframe.rs +292 -0
- data/ext/polars/src/apply/mod.rs +254 -0
- data/ext/polars/src/apply/series.rs +1173 -0
- data/ext/polars/src/conversion.rs +100 -5
- data/ext/polars/src/dataframe.rs +146 -1
- data/ext/polars/src/error.rs +8 -0
- data/ext/polars/src/lazy/apply.rs +34 -2
- data/ext/polars/src/lazy/dataframe.rs +72 -1
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lib.rs +165 -1
- data/ext/polars/src/series.rs +296 -0
- data/ext/polars/src/utils.rs +25 -0
- data/lib/polars/convert.rb +100 -0
- data/lib/polars/data_frame.rb +1457 -56
- data/lib/polars/dynamic_group_by.rb +49 -0
- data/lib/polars/expr.rb +258 -9
- data/lib/polars/functions.rb +192 -3
- data/lib/polars/group_by.rb +43 -3
- data/lib/polars/io.rb +19 -3
- data/lib/polars/lazy_frame.rb +792 -22
- data/lib/polars/lazy_functions.rb +561 -27
- data/lib/polars/rolling_group_by.rb +35 -0
- data/lib/polars/series.rb +132 -10
- data/lib/polars/utils.rb +16 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +9 -3
@@ -0,0 +1,254 @@
|
|
1
|
+
pub mod dataframe;
|
2
|
+
pub mod series;
|
3
|
+
|
4
|
+
use magnus::{RHash, Value};
|
5
|
+
use polars::chunked_array::builder::get_list_builder;
|
6
|
+
use polars::prelude::*;
|
7
|
+
use polars_core::export::rayon::prelude::*;
|
8
|
+
use polars_core::utils::CustomIterTools;
|
9
|
+
use polars_core::POOL;
|
10
|
+
|
11
|
+
use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
|
12
|
+
|
13
|
+
pub trait RbArrowPrimitiveType: PolarsNumericType {}
|
14
|
+
|
15
|
+
impl RbArrowPrimitiveType for UInt8Type {}
|
16
|
+
impl RbArrowPrimitiveType for UInt16Type {}
|
17
|
+
impl RbArrowPrimitiveType for UInt32Type {}
|
18
|
+
impl RbArrowPrimitiveType for UInt64Type {}
|
19
|
+
impl RbArrowPrimitiveType for Int8Type {}
|
20
|
+
impl RbArrowPrimitiveType for Int16Type {}
|
21
|
+
impl RbArrowPrimitiveType for Int32Type {}
|
22
|
+
impl RbArrowPrimitiveType for Int64Type {}
|
23
|
+
impl RbArrowPrimitiveType for Float32Type {}
|
24
|
+
impl RbArrowPrimitiveType for Float64Type {}
|
25
|
+
|
26
|
+
fn iterator_to_struct(
|
27
|
+
it: impl Iterator<Item = Option<Value>>,
|
28
|
+
init_null_count: usize,
|
29
|
+
first_value: AnyValue,
|
30
|
+
name: &str,
|
31
|
+
capacity: usize,
|
32
|
+
) -> RbResult<RbSeries> {
|
33
|
+
let (vals, flds) = match &first_value {
|
34
|
+
AnyValue::Struct(vals, flds) => (&**vals, *flds),
|
35
|
+
AnyValue::StructOwned(payload) => (&*payload.0, &*payload.1),
|
36
|
+
_ => {
|
37
|
+
return Err(crate::error::ComputeError::new_err(format!(
|
38
|
+
"expected struct got {:?}",
|
39
|
+
first_value
|
40
|
+
)))
|
41
|
+
}
|
42
|
+
};
|
43
|
+
|
44
|
+
let struct_width = vals.len();
|
45
|
+
|
46
|
+
// every item in the struct is kept as its own buffer of anyvalues
|
47
|
+
// so as struct with 2 items: {a, b}
|
48
|
+
// will have
|
49
|
+
// [
|
50
|
+
// [ a values ]
|
51
|
+
// [ b values ]
|
52
|
+
// ]
|
53
|
+
let mut items = Vec::with_capacity(vals.len());
|
54
|
+
for item in vals {
|
55
|
+
let mut buf = Vec::with_capacity(capacity);
|
56
|
+
for _ in 0..init_null_count {
|
57
|
+
buf.push(AnyValue::Null);
|
58
|
+
}
|
59
|
+
buf.push(item.clone());
|
60
|
+
items.push(buf);
|
61
|
+
}
|
62
|
+
|
63
|
+
for dict in it {
|
64
|
+
match dict {
|
65
|
+
None => {
|
66
|
+
for field_items in &mut items {
|
67
|
+
field_items.push(AnyValue::Null);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
Some(dict) => {
|
71
|
+
let dict = dict.try_convert::<RHash>()?;
|
72
|
+
if dict.len() != struct_width {
|
73
|
+
return Err(crate::error::ComputeError::new_err(
|
74
|
+
format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
|
75
|
+
));
|
76
|
+
}
|
77
|
+
// we ignore the keys of the rest of the dicts
|
78
|
+
// the first item determines the output name
|
79
|
+
todo!()
|
80
|
+
// for ((_, val), field_items) in dict.iter().zip(&mut items) {
|
81
|
+
// let item = val.try_convert::<Wrap<AnyValue>>()?;
|
82
|
+
// field_items.push(item.0)
|
83
|
+
// }
|
84
|
+
}
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
let fields = POOL.install(|| {
|
89
|
+
items
|
90
|
+
.par_iter()
|
91
|
+
.zip(flds)
|
92
|
+
.map(|(av, fld)| Series::new(fld.name(), av))
|
93
|
+
.collect::<Vec<_>>()
|
94
|
+
});
|
95
|
+
|
96
|
+
Ok(StructChunked::new(name, &fields)
|
97
|
+
.unwrap()
|
98
|
+
.into_series()
|
99
|
+
.into())
|
100
|
+
}
|
101
|
+
|
102
|
+
fn iterator_to_primitive<T>(
|
103
|
+
it: impl Iterator<Item = Option<T::Native>>,
|
104
|
+
init_null_count: usize,
|
105
|
+
first_value: Option<T::Native>,
|
106
|
+
name: &str,
|
107
|
+
capacity: usize,
|
108
|
+
) -> ChunkedArray<T>
|
109
|
+
where
|
110
|
+
T: RbArrowPrimitiveType,
|
111
|
+
{
|
112
|
+
// safety: we know the iterators len
|
113
|
+
let mut ca: ChunkedArray<T> = unsafe {
|
114
|
+
if init_null_count > 0 {
|
115
|
+
(0..init_null_count)
|
116
|
+
.map(|_| None)
|
117
|
+
.chain(std::iter::once(first_value))
|
118
|
+
.chain(it)
|
119
|
+
.trust_my_length(capacity)
|
120
|
+
.collect_trusted()
|
121
|
+
} else if first_value.is_some() {
|
122
|
+
std::iter::once(first_value)
|
123
|
+
.chain(it)
|
124
|
+
.trust_my_length(capacity)
|
125
|
+
.collect_trusted()
|
126
|
+
} else {
|
127
|
+
it.collect()
|
128
|
+
}
|
129
|
+
};
|
130
|
+
debug_assert_eq!(ca.len(), capacity);
|
131
|
+
ca.rename(name);
|
132
|
+
ca
|
133
|
+
}
|
134
|
+
|
135
|
+
fn iterator_to_bool(
|
136
|
+
it: impl Iterator<Item = Option<bool>>,
|
137
|
+
init_null_count: usize,
|
138
|
+
first_value: Option<bool>,
|
139
|
+
name: &str,
|
140
|
+
capacity: usize,
|
141
|
+
) -> ChunkedArray<BooleanType> {
|
142
|
+
// safety: we know the iterators len
|
143
|
+
let mut ca: BooleanChunked = unsafe {
|
144
|
+
if init_null_count > 0 {
|
145
|
+
(0..init_null_count)
|
146
|
+
.map(|_| None)
|
147
|
+
.chain(std::iter::once(first_value))
|
148
|
+
.chain(it)
|
149
|
+
.trust_my_length(capacity)
|
150
|
+
.collect_trusted()
|
151
|
+
} else if first_value.is_some() {
|
152
|
+
std::iter::once(first_value)
|
153
|
+
.chain(it)
|
154
|
+
.trust_my_length(capacity)
|
155
|
+
.collect_trusted()
|
156
|
+
} else {
|
157
|
+
it.collect()
|
158
|
+
}
|
159
|
+
};
|
160
|
+
debug_assert_eq!(ca.len(), capacity);
|
161
|
+
ca.rename(name);
|
162
|
+
ca
|
163
|
+
}
|
164
|
+
|
165
|
+
fn iterator_to_object(
|
166
|
+
it: impl Iterator<Item = Option<ObjectValue>>,
|
167
|
+
init_null_count: usize,
|
168
|
+
first_value: Option<ObjectValue>,
|
169
|
+
name: &str,
|
170
|
+
capacity: usize,
|
171
|
+
) -> ObjectChunked<ObjectValue> {
|
172
|
+
// safety: we know the iterators len
|
173
|
+
let mut ca: ObjectChunked<ObjectValue> = unsafe {
|
174
|
+
if init_null_count > 0 {
|
175
|
+
(0..init_null_count)
|
176
|
+
.map(|_| None)
|
177
|
+
.chain(std::iter::once(first_value))
|
178
|
+
.chain(it)
|
179
|
+
.trust_my_length(capacity)
|
180
|
+
.collect_trusted()
|
181
|
+
} else if first_value.is_some() {
|
182
|
+
std::iter::once(first_value)
|
183
|
+
.chain(it)
|
184
|
+
.trust_my_length(capacity)
|
185
|
+
.collect_trusted()
|
186
|
+
} else {
|
187
|
+
it.collect()
|
188
|
+
}
|
189
|
+
};
|
190
|
+
debug_assert_eq!(ca.len(), capacity);
|
191
|
+
ca.rename(name);
|
192
|
+
ca
|
193
|
+
}
|
194
|
+
|
195
|
+
fn iterator_to_utf8(
|
196
|
+
it: impl Iterator<Item = Option<String>>,
|
197
|
+
init_null_count: usize,
|
198
|
+
first_value: Option<&str>,
|
199
|
+
name: &str,
|
200
|
+
capacity: usize,
|
201
|
+
) -> Utf8Chunked {
|
202
|
+
let first_value = first_value.map(|v| v.to_string());
|
203
|
+
|
204
|
+
// safety: we know the iterators len
|
205
|
+
let mut ca: Utf8Chunked = unsafe {
|
206
|
+
if init_null_count > 0 {
|
207
|
+
(0..init_null_count)
|
208
|
+
.map(|_| None)
|
209
|
+
.chain(std::iter::once(first_value))
|
210
|
+
.chain(it)
|
211
|
+
.trust_my_length(capacity)
|
212
|
+
.collect_trusted()
|
213
|
+
} else if first_value.is_some() {
|
214
|
+
std::iter::once(first_value)
|
215
|
+
.chain(it)
|
216
|
+
.trust_my_length(capacity)
|
217
|
+
.collect_trusted()
|
218
|
+
} else {
|
219
|
+
it.collect()
|
220
|
+
}
|
221
|
+
};
|
222
|
+
debug_assert_eq!(ca.len(), capacity);
|
223
|
+
ca.rename(name);
|
224
|
+
ca
|
225
|
+
}
|
226
|
+
|
227
|
+
fn iterator_to_list(
|
228
|
+
dt: &DataType,
|
229
|
+
it: impl Iterator<Item = Option<Series>>,
|
230
|
+
init_null_count: usize,
|
231
|
+
first_value: Option<&Series>,
|
232
|
+
name: &str,
|
233
|
+
capacity: usize,
|
234
|
+
) -> RbResult<ListChunked> {
|
235
|
+
let mut builder =
|
236
|
+
get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
|
237
|
+
for _ in 0..init_null_count {
|
238
|
+
builder.append_null()
|
239
|
+
}
|
240
|
+
builder.append_opt_series(first_value);
|
241
|
+
for opt_val in it {
|
242
|
+
match opt_val {
|
243
|
+
None => builder.append_null(),
|
244
|
+
Some(s) => {
|
245
|
+
if s.len() == 0 && s.dtype() != dt {
|
246
|
+
builder.append_series(&Series::full_null("", 0, dt))
|
247
|
+
} else {
|
248
|
+
builder.append_series(&s)
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
}
|
253
|
+
Ok(builder.finish())
|
254
|
+
}
|