polars-df 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +93 -0
- data/ext/polars/Cargo.toml +35 -0
- data/ext/polars/extconf.rb +4 -0
- data/ext/polars/src/conversion.rs +115 -0
- data/ext/polars/src/dataframe.rs +304 -0
- data/ext/polars/src/error.rs +24 -0
- data/ext/polars/src/file.rs +28 -0
- data/ext/polars/src/lazy/dataframe.rs +123 -0
- data/ext/polars/src/lazy/dsl.rs +298 -0
- data/ext/polars/src/lazy/mod.rs +3 -0
- data/ext/polars/src/lazy/utils.rs +13 -0
- data/ext/polars/src/lib.rs +256 -0
- data/ext/polars/src/series.rs +475 -0
- data/lib/polars/data_frame.rb +315 -0
- data/lib/polars/expr.rb +233 -0
- data/lib/polars/functions.rb +45 -0
- data/lib/polars/io.rb +39 -0
- data/lib/polars/lazy_frame.rb +139 -0
- data/lib/polars/lazy_functions.rb +121 -0
- data/lib/polars/lazy_group_by.rb +13 -0
- data/lib/polars/series.rb +261 -0
- data/lib/polars/string_expr.rb +17 -0
- data/lib/polars/utils.rb +47 -0
- data/lib/polars/version.rb +3 -0
- data/lib/polars/when.rb +15 -0
- data/lib/polars/when_then.rb +18 -0
- data/lib/polars-df.rb +1 -0
- data/lib/polars.rb +25 -0
- metadata +87 -0
@@ -0,0 +1,475 @@
|
|
1
|
+
use crate::conversion::wrap;
|
2
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
3
|
+
use magnus::exception::arg_error;
|
4
|
+
use magnus::{Error, RArray, Value};
|
5
|
+
use polars::prelude::*;
|
6
|
+
use polars::series::IsSorted;
|
7
|
+
use std::cell::RefCell;
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Polars::RbSeries")]
|
10
|
+
pub struct RbSeries {
|
11
|
+
pub series: RefCell<Series>,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl From<Series> for RbSeries {
|
15
|
+
fn from(series: Series) -> Self {
|
16
|
+
RbSeries::new(series)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl RbSeries {
|
21
|
+
pub fn new(series: Series) -> Self {
|
22
|
+
RbSeries {
|
23
|
+
series: RefCell::new(series),
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
28
|
+
let len = obj.len();
|
29
|
+
let mut builder = BooleanChunkedBuilder::new(&name, len);
|
30
|
+
|
31
|
+
unsafe {
|
32
|
+
for item in obj.as_slice().iter() {
|
33
|
+
if item.is_nil() {
|
34
|
+
builder.append_null()
|
35
|
+
} else {
|
36
|
+
match item.try_convert::<bool>() {
|
37
|
+
Ok(val) => builder.append_value(val),
|
38
|
+
Err(e) => {
|
39
|
+
if strict {
|
40
|
+
return Err(e);
|
41
|
+
}
|
42
|
+
builder.append_null()
|
43
|
+
}
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
}
|
48
|
+
let ca = builder.finish();
|
49
|
+
|
50
|
+
let s = ca.into_series();
|
51
|
+
Ok(RbSeries::new(s))
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
|
56
|
+
where
|
57
|
+
T: PolarsNumericType,
|
58
|
+
ChunkedArray<T>: IntoSeries,
|
59
|
+
T::Native: magnus::TryConvert,
|
60
|
+
{
|
61
|
+
let len = obj.len();
|
62
|
+
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
63
|
+
|
64
|
+
unsafe {
|
65
|
+
for item in obj.as_slice().iter() {
|
66
|
+
if item.is_nil() {
|
67
|
+
builder.append_null()
|
68
|
+
} else {
|
69
|
+
match item.try_convert::<T::Native>() {
|
70
|
+
Ok(val) => builder.append_value(val),
|
71
|
+
Err(e) => {
|
72
|
+
if strict {
|
73
|
+
return Err(e);
|
74
|
+
}
|
75
|
+
builder.append_null()
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
let ca = builder.finish();
|
82
|
+
|
83
|
+
let s = ca.into_series();
|
84
|
+
Ok(RbSeries::new(s))
|
85
|
+
}
|
86
|
+
|
87
|
+
// Init with lists that can contain Nones
|
88
|
+
macro_rules! init_method_opt {
|
89
|
+
($name:ident, $type:ty, $native: ty) => {
|
90
|
+
impl RbSeries {
|
91
|
+
pub fn $name(name: String, obj: RArray, strict: bool) -> RbResult<Self> {
|
92
|
+
new_primitive::<$type>(&name, obj, strict)
|
93
|
+
}
|
94
|
+
}
|
95
|
+
};
|
96
|
+
}
|
97
|
+
|
98
|
+
init_method_opt!(new_opt_u8, UInt8Type, u8);
|
99
|
+
init_method_opt!(new_opt_u16, UInt16Type, u16);
|
100
|
+
init_method_opt!(new_opt_u32, UInt32Type, u32);
|
101
|
+
init_method_opt!(new_opt_u64, UInt64Type, u64);
|
102
|
+
init_method_opt!(new_opt_i8, Int8Type, i8);
|
103
|
+
init_method_opt!(new_opt_i16, Int16Type, i16);
|
104
|
+
init_method_opt!(new_opt_i32, Int32Type, i32);
|
105
|
+
init_method_opt!(new_opt_i64, Int64Type, i64);
|
106
|
+
init_method_opt!(new_opt_f32, Float32Type, f32);
|
107
|
+
init_method_opt!(new_opt_f64, Float64Type, f64);
|
108
|
+
|
109
|
+
impl RbSeries {
|
110
|
+
pub fn new_str(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
111
|
+
let v = val.try_convert::<Vec<Option<String>>>()?;
|
112
|
+
let mut s = Utf8Chunked::new(&name, v).into_series();
|
113
|
+
s.rename(&name);
|
114
|
+
Ok(RbSeries::new(s))
|
115
|
+
}
|
116
|
+
|
117
|
+
pub fn rechunk(&self, in_place: bool) -> Option<Self> {
|
118
|
+
let series = self.series.borrow_mut().rechunk();
|
119
|
+
if in_place {
|
120
|
+
*self.series.borrow_mut() = series;
|
121
|
+
None
|
122
|
+
} else {
|
123
|
+
Some(series.into())
|
124
|
+
}
|
125
|
+
}
|
126
|
+
|
127
|
+
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
128
|
+
let out = self
|
129
|
+
.series
|
130
|
+
.borrow()
|
131
|
+
.bitand(&other.series.borrow())
|
132
|
+
.map_err(RbPolarsErr::from)?;
|
133
|
+
Ok(out.into())
|
134
|
+
}
|
135
|
+
|
136
|
+
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
137
|
+
let out = self
|
138
|
+
.series
|
139
|
+
.borrow()
|
140
|
+
.bitor(&other.series.borrow())
|
141
|
+
.map_err(RbPolarsErr::from)?;
|
142
|
+
Ok(out.into())
|
143
|
+
}
|
144
|
+
|
145
|
+
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
146
|
+
let out = self
|
147
|
+
.series
|
148
|
+
.borrow()
|
149
|
+
.bitxor(&other.series.borrow())
|
150
|
+
.map_err(RbPolarsErr::from)?;
|
151
|
+
Ok(out.into())
|
152
|
+
}
|
153
|
+
|
154
|
+
pub fn chunk_lengths(&self) -> Vec<usize> {
|
155
|
+
self.series.borrow().chunk_lengths().collect()
|
156
|
+
}
|
157
|
+
|
158
|
+
pub fn name(&self) -> String {
|
159
|
+
self.series.borrow().name().into()
|
160
|
+
}
|
161
|
+
|
162
|
+
pub fn rename(&self, name: String) {
|
163
|
+
self.series.borrow_mut().rename(&name);
|
164
|
+
}
|
165
|
+
|
166
|
+
pub fn dtype(&self) -> String {
|
167
|
+
self.series.borrow().dtype().to_string()
|
168
|
+
}
|
169
|
+
|
170
|
+
pub fn inner_dtype(&self) -> Option<String> {
|
171
|
+
self.series
|
172
|
+
.borrow()
|
173
|
+
.dtype()
|
174
|
+
.inner_dtype()
|
175
|
+
.map(|dt| dt.to_string())
|
176
|
+
}
|
177
|
+
|
178
|
+
pub fn set_sorted(&self, reverse: bool) -> Self {
|
179
|
+
let mut out = self.series.borrow().clone();
|
180
|
+
if reverse {
|
181
|
+
out.set_sorted(IsSorted::Descending);
|
182
|
+
} else {
|
183
|
+
out.set_sorted(IsSorted::Ascending)
|
184
|
+
}
|
185
|
+
out.into()
|
186
|
+
}
|
187
|
+
|
188
|
+
pub fn mean(&self) -> Option<f64> {
|
189
|
+
match self.series.borrow().dtype() {
|
190
|
+
DataType::Boolean => {
|
191
|
+
let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
|
192
|
+
s.mean()
|
193
|
+
}
|
194
|
+
_ => self.series.borrow().mean(),
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn max(&self) -> Value {
|
199
|
+
wrap(self.series.borrow().max_as_series().get(0))
|
200
|
+
}
|
201
|
+
|
202
|
+
pub fn min(&self) -> Value {
|
203
|
+
wrap(self.series.borrow().min_as_series().get(0))
|
204
|
+
}
|
205
|
+
|
206
|
+
pub fn sum(&self) -> Value {
|
207
|
+
wrap(self.series.borrow().sum_as_series().get(0))
|
208
|
+
}
|
209
|
+
|
210
|
+
pub fn n_chunks(&self) -> usize {
|
211
|
+
self.series.borrow().n_chunks()
|
212
|
+
}
|
213
|
+
|
214
|
+
pub fn append(&self, other: &RbSeries) -> RbResult<()> {
|
215
|
+
let mut binding = self.series.borrow_mut();
|
216
|
+
let res = binding.append(&other.series.borrow());
|
217
|
+
if let Err(e) = res {
|
218
|
+
Err(Error::runtime_error(e.to_string()))
|
219
|
+
} else {
|
220
|
+
Ok(())
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
|
225
|
+
self.series
|
226
|
+
.borrow_mut()
|
227
|
+
.extend(&other.series.borrow())
|
228
|
+
.map_err(RbPolarsErr::from)?;
|
229
|
+
Ok(())
|
230
|
+
}
|
231
|
+
|
232
|
+
pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
|
233
|
+
if index >= self.series.borrow().len() {
|
234
|
+
Err(Error::new(arg_error(), "index is out of bounds"))
|
235
|
+
} else {
|
236
|
+
Ok(self.series.borrow().new_from_index(index, length).into())
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
|
241
|
+
let filter_series = &filter.series.borrow();
|
242
|
+
if let Ok(ca) = filter_series.bool() {
|
243
|
+
let series = self.series.borrow().filter(ca).unwrap();
|
244
|
+
Ok(series.into())
|
245
|
+
} else {
|
246
|
+
Err(Error::runtime_error("Expected a boolean mask".to_string()))
|
247
|
+
}
|
248
|
+
}
|
249
|
+
|
250
|
+
pub fn add(&self, other: &RbSeries) -> Self {
|
251
|
+
(&*self.series.borrow() + &*other.series.borrow()).into()
|
252
|
+
}
|
253
|
+
|
254
|
+
pub fn sub(&self, other: &RbSeries) -> Self {
|
255
|
+
(&*self.series.borrow() - &*other.series.borrow()).into()
|
256
|
+
}
|
257
|
+
|
258
|
+
pub fn mul(&self, other: &RbSeries) -> Self {
|
259
|
+
(&*self.series.borrow() * &*other.series.borrow()).into()
|
260
|
+
}
|
261
|
+
|
262
|
+
pub fn div(&self, other: &RbSeries) -> Self {
|
263
|
+
(&*self.series.borrow() / &*other.series.borrow()).into()
|
264
|
+
}
|
265
|
+
|
266
|
+
pub fn rem(&self, other: &RbSeries) -> Self {
|
267
|
+
(&*self.series.borrow() % &*other.series.borrow()).into()
|
268
|
+
}
|
269
|
+
|
270
|
+
pub fn sort(&self, reverse: bool) -> Self {
|
271
|
+
(self.series.borrow_mut().sort(reverse)).into()
|
272
|
+
}
|
273
|
+
|
274
|
+
pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
|
275
|
+
let df = self
|
276
|
+
.series
|
277
|
+
.borrow()
|
278
|
+
.value_counts(true, sorted)
|
279
|
+
.map_err(RbPolarsErr::from)?;
|
280
|
+
Ok(df.into())
|
281
|
+
}
|
282
|
+
|
283
|
+
pub fn arg_min(&self) -> Option<usize> {
|
284
|
+
self.series.borrow().arg_min()
|
285
|
+
}
|
286
|
+
|
287
|
+
pub fn arg_max(&self) -> Option<usize> {
|
288
|
+
self.series.borrow().arg_max()
|
289
|
+
}
|
290
|
+
|
291
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
292
|
+
let binding = indices.series.borrow();
|
293
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
294
|
+
let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
295
|
+
Ok(RbSeries::new(take))
|
296
|
+
}
|
297
|
+
|
298
|
+
pub fn null_count(&self) -> RbResult<usize> {
|
299
|
+
Ok(self.series.borrow().null_count())
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn has_validity(&self) -> bool {
|
303
|
+
self.series.borrow().has_validity()
|
304
|
+
}
|
305
|
+
|
306
|
+
pub fn sample_n(
|
307
|
+
&self,
|
308
|
+
n: usize,
|
309
|
+
with_replacement: bool,
|
310
|
+
shuffle: bool,
|
311
|
+
seed: Option<u64>,
|
312
|
+
) -> RbResult<Self> {
|
313
|
+
let s = self
|
314
|
+
.series
|
315
|
+
.borrow()
|
316
|
+
.sample_n(n, with_replacement, shuffle, seed)
|
317
|
+
.map_err(RbPolarsErr::from)?;
|
318
|
+
Ok(s.into())
|
319
|
+
}
|
320
|
+
|
321
|
+
pub fn sample_frac(
|
322
|
+
&self,
|
323
|
+
frac: f64,
|
324
|
+
with_replacement: bool,
|
325
|
+
shuffle: bool,
|
326
|
+
seed: Option<u64>,
|
327
|
+
) -> RbResult<Self> {
|
328
|
+
let s = self
|
329
|
+
.series
|
330
|
+
.borrow()
|
331
|
+
.sample_frac(frac, with_replacement, shuffle, seed)
|
332
|
+
.map_err(RbPolarsErr::from)?;
|
333
|
+
Ok(s.into())
|
334
|
+
}
|
335
|
+
|
336
|
+
pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
337
|
+
if strict {
|
338
|
+
self.series.borrow().eq(&other.series.borrow())
|
339
|
+
} else if null_equal {
|
340
|
+
self.series
|
341
|
+
.borrow()
|
342
|
+
.series_equal_missing(&other.series.borrow())
|
343
|
+
} else {
|
344
|
+
self.series.borrow().series_equal(&other.series.borrow())
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
pub fn eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
349
|
+
let s = self
|
350
|
+
.series
|
351
|
+
.borrow()
|
352
|
+
.equal(&*rhs.series.borrow())
|
353
|
+
.map_err(RbPolarsErr::from)?;
|
354
|
+
Ok(Self::new(s.into_series()))
|
355
|
+
}
|
356
|
+
|
357
|
+
pub fn neq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
358
|
+
let s = self
|
359
|
+
.series
|
360
|
+
.borrow()
|
361
|
+
.not_equal(&*rhs.series.borrow())
|
362
|
+
.map_err(RbPolarsErr::from)?;
|
363
|
+
Ok(Self::new(s.into_series()))
|
364
|
+
}
|
365
|
+
|
366
|
+
pub fn gt(&self, rhs: &RbSeries) -> RbResult<Self> {
|
367
|
+
let s = self
|
368
|
+
.series
|
369
|
+
.borrow()
|
370
|
+
.gt(&*rhs.series.borrow())
|
371
|
+
.map_err(RbPolarsErr::from)?;
|
372
|
+
Ok(Self::new(s.into_series()))
|
373
|
+
}
|
374
|
+
|
375
|
+
pub fn gt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
376
|
+
let s = self
|
377
|
+
.series
|
378
|
+
.borrow()
|
379
|
+
.gt_eq(&*rhs.series.borrow())
|
380
|
+
.map_err(RbPolarsErr::from)?;
|
381
|
+
Ok(Self::new(s.into_series()))
|
382
|
+
}
|
383
|
+
|
384
|
+
pub fn lt(&self, rhs: &RbSeries) -> RbResult<Self> {
|
385
|
+
let s = self
|
386
|
+
.series
|
387
|
+
.borrow()
|
388
|
+
.lt(&*rhs.series.borrow())
|
389
|
+
.map_err(RbPolarsErr::from)?;
|
390
|
+
Ok(Self::new(s.into_series()))
|
391
|
+
}
|
392
|
+
|
393
|
+
pub fn lt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
|
394
|
+
let s = self
|
395
|
+
.series
|
396
|
+
.borrow()
|
397
|
+
.lt_eq(&*rhs.series.borrow())
|
398
|
+
.map_err(RbPolarsErr::from)?;
|
399
|
+
Ok(Self::new(s.into_series()))
|
400
|
+
}
|
401
|
+
|
402
|
+
pub fn not(&self) -> RbResult<Self> {
|
403
|
+
let binding = self.series.borrow();
|
404
|
+
let bool = binding.bool().map_err(RbPolarsErr::from)?;
|
405
|
+
Ok((!bool).into_series().into())
|
406
|
+
}
|
407
|
+
|
408
|
+
pub fn to_s(&self) -> String {
|
409
|
+
format!("{}", self.series.borrow())
|
410
|
+
}
|
411
|
+
|
412
|
+
pub fn len(&self) -> usize {
|
413
|
+
self.series.borrow().len()
|
414
|
+
}
|
415
|
+
|
416
|
+
pub fn to_a(&self) -> RArray {
|
417
|
+
let series = self.series.borrow();
|
418
|
+
if let Ok(s) = series.f32() {
|
419
|
+
s.into_iter().collect()
|
420
|
+
} else if let Ok(s) = series.f64() {
|
421
|
+
s.into_iter().collect()
|
422
|
+
} else if let Ok(s) = series.i8() {
|
423
|
+
s.into_iter().collect()
|
424
|
+
} else if let Ok(s) = series.i16() {
|
425
|
+
s.into_iter().collect()
|
426
|
+
} else if let Ok(s) = series.i32() {
|
427
|
+
s.into_iter().collect()
|
428
|
+
} else if let Ok(s) = series.i64() {
|
429
|
+
s.into_iter().collect()
|
430
|
+
} else if let Ok(s) = series.u8() {
|
431
|
+
s.into_iter().collect()
|
432
|
+
} else if let Ok(s) = series.u16() {
|
433
|
+
s.into_iter().collect()
|
434
|
+
} else if let Ok(s) = series.u32() {
|
435
|
+
s.into_iter().collect()
|
436
|
+
} else if let Ok(s) = series.u64() {
|
437
|
+
s.into_iter().collect()
|
438
|
+
} else if let Ok(s) = series.bool() {
|
439
|
+
s.into_iter().collect()
|
440
|
+
} else if let Ok(s) = series.utf8() {
|
441
|
+
s.into_iter().collect()
|
442
|
+
} else {
|
443
|
+
unimplemented!();
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
pub fn median(&self) -> Option<f64> {
|
448
|
+
match self.series.borrow().dtype() {
|
449
|
+
DataType::Boolean => {
|
450
|
+
let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
|
451
|
+
s.median()
|
452
|
+
}
|
453
|
+
_ => self.series.borrow().median(),
|
454
|
+
}
|
455
|
+
}
|
456
|
+
|
457
|
+
// dispatch dynamically in future?
|
458
|
+
|
459
|
+
pub fn cumsum(&self, reverse: bool) -> Self {
|
460
|
+
self.series.borrow().cumsum(reverse).into()
|
461
|
+
}
|
462
|
+
|
463
|
+
pub fn cummax(&self, reverse: bool) -> Self {
|
464
|
+
self.series.borrow().cummax(reverse).into()
|
465
|
+
}
|
466
|
+
|
467
|
+
pub fn cummin(&self, reverse: bool) -> Self {
|
468
|
+
self.series.borrow().cummin(reverse).into()
|
469
|
+
}
|
470
|
+
|
471
|
+
pub fn slice(&self, offset: i64, length: usize) -> Self {
|
472
|
+
let series = self.series.borrow().slice(offset, length);
|
473
|
+
series.into()
|
474
|
+
}
|
475
|
+
}
|