polars-df 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,475 @@
|
|
1
|
+
use crate::conversion::wrap;
|
2
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
3
|
+
use magnus::exception::arg_error;
|
4
|
+
use magnus::{Error, RArray, Value};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars::series::IsSorted;
|
7
|
+
use std::cell::RefCell;
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Polars::RbSeries")]
|
10
|
+
pub struct RbSeries {
|
11
|
+
pub series: RefCell<Series>,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl From<Series> for RbSeries {
|
15
|
+
fn from(series: Series) -> Self {
|
16
|
+
RbSeries::new(series)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl RbSeries {
|
21
|
+
pub fn new(series: Series) -> Self {
|
22
|
+
RbSeries {
|
23
|
+
series: RefCell::new(series),
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
28
|
+
let len = obj.len();
|
29
|
+
let mut builder = BooleanChunkedBuilder::new(&name, len);
|
30
|
+
|
31
|
+
unsafe {
|
32
|
+
for item in obj.as_slice().iter() {
|
33
|
+
if item.is_nil() {
|
34
|
+
builder.append_null()
|
35
|
+
} else {
|
36
|
+
match item.try_convert::<bool>() {
|
37
|
+
Ok(val) => builder.append_value(val),
|
38
|
+
Err(e) => {
|
39
|
+
if strict {
|
40
|
+
return Err(e);
|
41
|
+
}
|
42
|
+
builder.append_null()
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
let ca = builder.finish();
|
49
|
+
|
50
|
+
let s = ca.into_series();
|
51
|
+
Ok(RbSeries::new(s))
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
|
56
|
+
where
|
57
|
+
T: PolarsNumericType,
|
58
|
+
ChunkedArray<T>: IntoSeries,
|
59
|
+
T::Native: magnus::TryConvert,
|
60
|
+
{
|
61
|
+
let len = obj.len();
|
62
|
+
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
63
|
+
|
64
|
+
unsafe {
|
65
|
+
for item in obj.as_slice().iter() {
|
66
|
+
if item.is_nil() {
|
67
|
+
builder.append_null()
|
68
|
+
} else {
|
69
|
+
match item.try_convert::<T::Native>() {
|
70
|
+
Ok(val) => builder.append_value(val),
|
71
|
+
Err(e) => {
|
72
|
+
if strict {
|
73
|
+
return Err(e);
|
74
|
+
}
|
75
|
+
builder.append_null()
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
let ca = builder.finish();
|
82
|
+
|
83
|
+
let s = ca.into_series();
|
84
|
+
Ok(RbSeries::new(s))
|
85
|
+
}
|
86
|
+
|
87
|
+
// Init with lists that can contain Nones
|
88
|
+
macro_rules! init_method_opt {
|
89
|
+
($name:ident, $type:ty, $native: ty) => {
|
90
|
+
impl RbSeries {
|
91
|
+
pub fn $name(name: String, obj: RArray, strict: bool) -> RbResult<Self> {
|
92
|
+
new_primitive::<$type>(&name, obj, strict)
|
93
|
+
}
|
94
|
+
}
|
95
|
+
};
|
96
|
+
}
|
97
|
+
|
98
|
+
init_method_opt!(new_opt_u8, UInt8Type, u8);
|
99
|
+
init_method_opt!(new_opt_u16, UInt16Type, u16);
|
100
|
+
init_method_opt!(new_opt_u32, UInt32Type, u32);
|
101
|
+
init_method_opt!(new_opt_u64, UInt64Type, u64);
|
102
|
+
init_method_opt!(new_opt_i8, Int8Type, i8);
|
103
|
+
init_method_opt!(new_opt_i16, Int16Type, i16);
|
104
|
+
init_method_opt!(new_opt_i32, Int32Type, i32);
|
105
|
+
init_method_opt!(new_opt_i64, Int64Type, i64);
|
106
|
+
init_method_opt!(new_opt_f32, Float32Type, f32);
|
107
|
+
init_method_opt!(new_opt_f64, Float64Type, f64);
|
108
|
+
|
109
|
+
impl RbSeries {
|
110
|
+
pub fn new_str(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
111
|
+
let v = val.try_convert::<Vec<Option<String>>>()?;
|
112
|
+
let mut s = Utf8Chunked::new(&name, v).into_series();
|
113
|
+
s.rename(&name);
|
114
|
+
Ok(RbSeries::new(s))
|
115
|
+
}
|
116
|
+
|
117
|
+
pub fn rechunk(&self, in_place: bool) -> Option<Self> {
|
118
|
+
let series = self.series.borrow_mut().rechunk();
|
119
|
+
if in_place {
|
120
|
+
*self.series.borrow_mut() = series;
|
121
|
+
None
|
122
|
+
} else {
|
123
|
+
Some(series.into())
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
128
|
+
let out = self
|
129
|
+
.series
|
130
|
+
.borrow()
|
131
|
+
.bitand(&other.series.borrow())
|
132
|
+
.map_err(RbPolarsErr::from)?;
|
133
|
+
Ok(out.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
137
|
+
let out = self
|
138
|
+
.series
|
139
|
+
.borrow()
|
140
|
+
.bitor(&other.series.borrow())
|
141
|
+
.map_err(RbPolarsErr::from)?;
|
142
|
+
Ok(out.into())
|
143
|
+
}
|
144
|
+
|
145
|
+
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
146
|
+
let out = self
|
147
|
+
.series
|
148
|
+
.borrow()
|
149
|
+
.bitxor(&other.series.borrow())
|
150
|
+
.map_err(RbPolarsErr::from)?;
|
151
|
+
Ok(out.into())
|
152
|
+
}
|
153
|
+
|
154
|
+
pub fn chunk_lengths(&self) -> Vec<usize> {
|
155
|
+
self.series.borrow().chunk_lengths().collect()
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn name(&self) -> String {
|
159
|
+
self.series.borrow().name().into()
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn rename(&self, name: String) {
|
163
|
+
self.series.borrow_mut().rename(&name);
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn dtype(&self) -> String {
|
167
|
+
self.series.borrow().dtype().to_string()
|
168
|
+
}
|
169
|
+
|
170
|
+
pub fn inner_dtype(&self) -> Option<String> {
|
171
|
+
self.series
|
172
|
+
.borrow()
|
173
|
+
.dtype()
|
174
|
+
.inner_dtype()
|
175
|
+
.map(|dt| dt.to_string())
|
176
|
+
}
|
177
|
+
|
178
|
+
pub fn set_sorted(&self, reverse: bool) -> Self {
|
179
|
+
let mut out = self.series.borrow().clone();
|
180
|
+
if reverse {
|
181
|
+
out.set_sorted(IsSorted::Descending);
|
182
|
+
} else {
|
183
|
+
out.set_sorted(IsSorted::Ascending)
|
184
|
+
}
|
185
|
+
out.into()
|
186
|
+
}
|
187
|
+
|
188
|
+
pub fn mean(&self) -> Option<f64> {
|
189
|
+
match self.series.borrow().dtype() {
|
190
|
+
DataType::Boolean => {
|
191
|
+
let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
|
192
|
+
s.mean()
|
193
|
+
}
|
194
|
+
_ => self.series.borrow().mean(),
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn max(&self) -> Value {
|
199
|
+
wrap(self.series.borrow().max_as_series().get(0))
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn min(&self) -> Value {
|
203
|
+
wrap(self.series.borrow().min_as_series().get(0))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum(&self) -> Value {
|
207
|
+
wrap(self.series.borrow().sum_as_series().get(0))
|
208
|
+
}
|
209
|
+
|
210
|
+
pub fn n_chunks(&self) -> usize {
|
211
|
+
self.series.borrow().n_chunks()
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn append(&self, other: &RbSeries) -> RbResult<()> {
|
215
|
+
let mut binding = self.series.borrow_mut();
|
216
|
+
let res = binding.append(&other.series.borrow());
|
217
|
+
if let Err(e) = res {
|
218
|
+
Err(Error::runtime_error(e.to_string()))
|
219
|
+
} else {
|
220
|
+
Ok(())
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
|
225
|
+
self.series
|
226
|
+
.borrow_mut()
|
227
|
+
.extend(&other.series.borrow())
|
228
|
+
.map_err(RbPolarsErr::from)?;
|
229
|
+
Ok(())
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
|
233
|
+
if index >= self.series.borrow().len() {
|
234
|
+
Err(Error::new(arg_error(), "index is out of bounds"))
|
235
|
+
} else {
|
236
|
+
Ok(self.series.borrow().new_from_index(index, length).into())
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
|
241
|
+
let filter_series = &filter.series.borrow();
|
242
|
+
if let Ok(ca) = filter_series.bool() {
|
243
|
+
let series = self.series.borrow().filter(ca).unwrap();
|
244
|
+
Ok(series.into())
|
245
|
+
} else {
|
246
|
+
Err(Error::runtime_error("Expected a boolean mask".to_string()))
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
pub fn add(&self, other: &RbSeries) -> Self {
|
251
|
+
(&*self.series.borrow() + &*other.series.borrow()).into()
|
252
|
+
}
|
253
|
+
|
254
|
+
pub fn sub(&self, other: &RbSeries) -> Self {
|
255
|
+
(&*self.series.borrow() - &*other.series.borrow()).into()
|
256
|
+
}
|
257
|
+
|
258
|
+
pub fn mul(&self, other: &RbSeries) -> Self {
|
259
|
+
(&*self.series.borrow() * &*other.series.borrow()).into()
|
260
|
+
}
|
261
|
+
|
262
|
+
pub fn div(&self, other: &RbSeries) -> Self {
|
263
|
+
(&*self.series.borrow() / &*other.series.borrow()).into()
|
264
|
+
}
|
265
|
+
|
266
|
+
pub fn rem(&self, other: &RbSeries) -> Self {
|
267
|
+
(&*self.series.borrow() % &*other.series.borrow()).into()
|
268
|
+
}
|
269
|
+
|
270
|
+
pub fn sort(&self, reverse: bool) -> Self {
|
271
|
+
(self.series.borrow_mut().sort(reverse)).into()
|
272
|
+
}
|
273
|
+
|
274
|
+
pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
|
275
|
+
let df = self
|
276
|
+
.series
|
277
|
+
.borrow()
|
278
|
+
.value_counts(true, sorted)
|
279
|
+
.map_err(RbPolarsErr::from)?;
|
280
|
+
Ok(df.into())
|
281
|
+
}
|
282
|
+
|
283
|
+
pub fn arg_min(&self) -> Option<usize> {
|
284
|
+
self.series.borrow().arg_min()
|
285
|
+
}
|
286
|
+
|
287
|
+
pub fn arg_max(&self) -> Option<usize> {
|
288
|
+
self.series.borrow().arg_max()
|
289
|
+
}
|
290
|
+
|
291
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
292
|
+
let binding = indices.series.borrow();
|
293
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
294
|
+
let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
295
|
+
Ok(RbSeries::new(take))
|
296
|
+
}
|
297
|
+
|
298
|
+
pub fn null_count(&self) -> RbResult<usize> {
|
299
|
+
Ok(self.series.borrow().null_count())
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn has_validity(&self) -> bool {
|
303
|
+
self.series.borrow().has_validity()
|
304
|
+
}
|
305
|
+
|
306
|
+
pub fn sample_n(
|
307
|
+
&self,
|
308
|
+
n: usize,
|
309
|
+
with_replacement: bool,
|
310
|
+
shuffle: bool,
|
311
|
+
seed: Option<u64>,
|
312
|
+
) -> RbResult<Self> {
|
313
|
+
let s = self
|
314
|
+
.series
|
315
|
+
.borrow()
|
316
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
317
|
+
.map_err(RbPolarsErr::from)?;
|
318
|
+
Ok(s.into())
|
319
|
+
}
|
320
|
+
|
321
|
+
pub fn sample_frac(
|
322
|
+
&self,
|
323
|
+
frac: f64,
|
324
|
+
with_replacement: bool,
|
325
|
+
shuffle: bool,
|
326
|
+
seed: Option<u64>,
|
327
|
+
) -> RbResult<Self> {
|
328
|
+
let s = self
|
329
|
+
.series
|
330
|
+
.borrow()
|
331
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
332
|
+
.map_err(RbPolarsErr::from)?;
|
333
|
+
Ok(s.into())
|
334
|
+
}
|
335
|
+
|
336
|
+
pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
337
|
+
if strict {
|
338
|
+
self.series.borrow().eq(&other.series.borrow())
|
339
|
+
} else if null_equal {
|
340
|
+
self.series
|
341
|
+
.borrow()
|
342
|
+
.series_equal_missing(&other.series.borrow())
|
343
|
+
} else {
|
344
|
+
self.series.borrow().series_equal(&other.series.borrow())
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
pub fn eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
349
|
+
let s = self
|
350
|
+
.series
|
351
|
+
.borrow()
|
352
|
+
.equal(&*rhs.series.borrow())
|
353
|
+
.map_err(RbPolarsErr::from)?;
|
354
|
+
Ok(Self::new(s.into_series()))
|
355
|
+
}
|
356
|
+
|
357
|
+
pub fn neq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
358
|
+
let s = self
|
359
|
+
.series
|
360
|
+
.borrow()
|
361
|
+
.not_equal(&*rhs.series.borrow())
|
362
|
+
.map_err(RbPolarsErr::from)?;
|
363
|
+
Ok(Self::new(s.into_series()))
|
364
|
+
}
|
365
|
+
|
366
|
+
pub fn gt(&self, rhs: &RbSeries) -> RbResult<Self> {
|
367
|
+
let s = self
|
368
|
+
.series
|
369
|
+
.borrow()
|
370
|
+
.gt(&*rhs.series.borrow())
|
371
|
+
.map_err(RbPolarsErr::from)?;
|
372
|
+
Ok(Self::new(s.into_series()))
|
373
|
+
}
|
374
|
+
|
375
|
+
pub fn gt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
376
|
+
let s = self
|
377
|
+
.series
|
378
|
+
.borrow()
|
379
|
+
.gt_eq(&*rhs.series.borrow())
|
380
|
+
.map_err(RbPolarsErr::from)?;
|
381
|
+
Ok(Self::new(s.into_series()))
|
382
|
+
}
|
383
|
+
|
384
|
+
pub fn lt(&self, rhs: &RbSeries) -> RbResult<Self> {
|
385
|
+
let s = self
|
386
|
+
.series
|
387
|
+
.borrow()
|
388
|
+
.lt(&*rhs.series.borrow())
|
389
|
+
.map_err(RbPolarsErr::from)?;
|
390
|
+
Ok(Self::new(s.into_series()))
|
391
|
+
}
|
392
|
+
|
393
|
+
pub fn lt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
394
|
+
let s = self
|
395
|
+
.series
|
396
|
+
.borrow()
|
397
|
+
.lt_eq(&*rhs.series.borrow())
|
398
|
+
.map_err(RbPolarsErr::from)?;
|
399
|
+
Ok(Self::new(s.into_series()))
|
400
|
+
}
|
401
|
+
|
402
|
+
pub fn not(&self) -> RbResult<Self> {
|
403
|
+
let binding = self.series.borrow();
|
404
|
+
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
405
|
+
Ok((!bool).into_series().into())
|
406
|
+
}
|
407
|
+
|
408
|
+
pub fn to_s(&self) -> String {
|
409
|
+
format!("{}", self.series.borrow())
|
410
|
+
}
|
411
|
+
|
412
|
+
pub fn len(&self) -> usize {
|
413
|
+
self.series.borrow().len()
|
414
|
+
}
|
415
|
+
|
416
|
+
pub fn to_a(&self) -> RArray {
|
417
|
+
let series = self.series.borrow();
|
418
|
+
if let Ok(s) = series.f32() {
|
419
|
+
s.into_iter().collect()
|
420
|
+
} else if let Ok(s) = series.f64() {
|
421
|
+
s.into_iter().collect()
|
422
|
+
} else if let Ok(s) = series.i8() {
|
423
|
+
s.into_iter().collect()
|
424
|
+
} else if let Ok(s) = series.i16() {
|
425
|
+
s.into_iter().collect()
|
426
|
+
} else if let Ok(s) = series.i32() {
|
427
|
+
s.into_iter().collect()
|
428
|
+
} else if let Ok(s) = series.i64() {
|
429
|
+
s.into_iter().collect()
|
430
|
+
} else if let Ok(s) = series.u8() {
|
431
|
+
s.into_iter().collect()
|
432
|
+
} else if let Ok(s) = series.u16() {
|
433
|
+
s.into_iter().collect()
|
434
|
+
} else if let Ok(s) = series.u32() {
|
435
|
+
s.into_iter().collect()
|
436
|
+
} else if let Ok(s) = series.u64() {
|
437
|
+
s.into_iter().collect()
|
438
|
+
} else if let Ok(s) = series.bool() {
|
439
|
+
s.into_iter().collect()
|
440
|
+
} else if let Ok(s) = series.utf8() {
|
441
|
+
s.into_iter().collect()
|
442
|
+
} else {
|
443
|
+
unimplemented!();
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
pub fn median(&self) -> Option<f64> {
|
448
|
+
match self.series.borrow().dtype() {
|
449
|
+
DataType::Boolean => {
|
450
|
+
let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
|
451
|
+
s.median()
|
452
|
+
}
|
453
|
+
_ => self.series.borrow().median(),
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
// dispatch dynamically in future?
|
458
|
+
|
459
|
+
pub fn cumsum(&self, reverse: bool) -> Self {
|
460
|
+
self.series.borrow().cumsum(reverse).into()
|
461
|
+
}
|
462
|
+
|
463
|
+
pub fn cummax(&self, reverse: bool) -> Self {
|
464
|
+
self.series.borrow().cummax(reverse).into()
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn cummin(&self, reverse: bool) -> Self {
|
468
|
+
self.series.borrow().cummin(reverse).into()
|
469
|
+
}
|
470
|
+
|
471
|
+
pub fn slice(&self, offset: i64, length: usize) -> Self {
|
472
|
+
let series = self.series.borrow().slice(offset, length);
|
473
|
+
series.into()
|
474
|
+
}
|
475
|
+
}
|