polars-df 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,475 @@
1
+ use crate::conversion::wrap;
2
+ use crate::{RbDataFrame, RbPolarsErr, RbResult};
3
+ use magnus::exception::arg_error;
4
+ use magnus::{Error, RArray, Value};
5
+ use polars::prelude::*;
6
+ use polars::series::IsSorted;
7
+ use std::cell::RefCell;
8
+
9
+ #[magnus::wrap(class = "Polars::RbSeries")]
10
+ pub struct RbSeries {
11
+ pub series: RefCell<Series>,
12
+ }
13
+
14
+ impl From<Series> for RbSeries {
15
+ fn from(series: Series) -> Self {
16
+ RbSeries::new(series)
17
+ }
18
+ }
19
+
20
+ impl RbSeries {
21
+ pub fn new(series: Series) -> Self {
22
+ RbSeries {
23
+ series: RefCell::new(series),
24
+ }
25
+ }
26
+
27
+ pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
28
+ let len = obj.len();
29
+ let mut builder = BooleanChunkedBuilder::new(&name, len);
30
+
31
+ unsafe {
32
+ for item in obj.as_slice().iter() {
33
+ if item.is_nil() {
34
+ builder.append_null()
35
+ } else {
36
+ match item.try_convert::<bool>() {
37
+ Ok(val) => builder.append_value(val),
38
+ Err(e) => {
39
+ if strict {
40
+ return Err(e);
41
+ }
42
+ builder.append_null()
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+ let ca = builder.finish();
49
+
50
+ let s = ca.into_series();
51
+ Ok(RbSeries::new(s))
52
+ }
53
+ }
54
+
55
+ fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
56
+ where
57
+ T: PolarsNumericType,
58
+ ChunkedArray<T>: IntoSeries,
59
+ T::Native: magnus::TryConvert,
60
+ {
61
+ let len = obj.len();
62
+ let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
63
+
64
+ unsafe {
65
+ for item in obj.as_slice().iter() {
66
+ if item.is_nil() {
67
+ builder.append_null()
68
+ } else {
69
+ match item.try_convert::<T::Native>() {
70
+ Ok(val) => builder.append_value(val),
71
+ Err(e) => {
72
+ if strict {
73
+ return Err(e);
74
+ }
75
+ builder.append_null()
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ let ca = builder.finish();
82
+
83
+ let s = ca.into_series();
84
+ Ok(RbSeries::new(s))
85
+ }
86
+
87
+ // Init with lists that can contain Nones
88
+ macro_rules! init_method_opt {
89
+ ($name:ident, $type:ty, $native: ty) => {
90
+ impl RbSeries {
91
+ pub fn $name(name: String, obj: RArray, strict: bool) -> RbResult<Self> {
92
+ new_primitive::<$type>(&name, obj, strict)
93
+ }
94
+ }
95
+ };
96
+ }
97
+
98
+ init_method_opt!(new_opt_u8, UInt8Type, u8);
99
+ init_method_opt!(new_opt_u16, UInt16Type, u16);
100
+ init_method_opt!(new_opt_u32, UInt32Type, u32);
101
+ init_method_opt!(new_opt_u64, UInt64Type, u64);
102
+ init_method_opt!(new_opt_i8, Int8Type, i8);
103
+ init_method_opt!(new_opt_i16, Int16Type, i16);
104
+ init_method_opt!(new_opt_i32, Int32Type, i32);
105
+ init_method_opt!(new_opt_i64, Int64Type, i64);
106
+ init_method_opt!(new_opt_f32, Float32Type, f32);
107
+ init_method_opt!(new_opt_f64, Float64Type, f64);
108
+
109
+ impl RbSeries {
110
+ pub fn new_str(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
111
+ let v = val.try_convert::<Vec<Option<String>>>()?;
112
+ let mut s = Utf8Chunked::new(&name, v).into_series();
113
+ s.rename(&name);
114
+ Ok(RbSeries::new(s))
115
+ }
116
+
117
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
118
+ let series = self.series.borrow_mut().rechunk();
119
+ if in_place {
120
+ *self.series.borrow_mut() = series;
121
+ None
122
+ } else {
123
+ Some(series.into())
124
+ }
125
+ }
126
+
127
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
128
+ let out = self
129
+ .series
130
+ .borrow()
131
+ .bitand(&other.series.borrow())
132
+ .map_err(RbPolarsErr::from)?;
133
+ Ok(out.into())
134
+ }
135
+
136
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
137
+ let out = self
138
+ .series
139
+ .borrow()
140
+ .bitor(&other.series.borrow())
141
+ .map_err(RbPolarsErr::from)?;
142
+ Ok(out.into())
143
+ }
144
+
145
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
146
+ let out = self
147
+ .series
148
+ .borrow()
149
+ .bitxor(&other.series.borrow())
150
+ .map_err(RbPolarsErr::from)?;
151
+ Ok(out.into())
152
+ }
153
+
154
+ pub fn chunk_lengths(&self) -> Vec<usize> {
155
+ self.series.borrow().chunk_lengths().collect()
156
+ }
157
+
158
+ pub fn name(&self) -> String {
159
+ self.series.borrow().name().into()
160
+ }
161
+
162
+ pub fn rename(&self, name: String) {
163
+ self.series.borrow_mut().rename(&name);
164
+ }
165
+
166
+ pub fn dtype(&self) -> String {
167
+ self.series.borrow().dtype().to_string()
168
+ }
169
+
170
+ pub fn inner_dtype(&self) -> Option<String> {
171
+ self.series
172
+ .borrow()
173
+ .dtype()
174
+ .inner_dtype()
175
+ .map(|dt| dt.to_string())
176
+ }
177
+
178
+ pub fn set_sorted(&self, reverse: bool) -> Self {
179
+ let mut out = self.series.borrow().clone();
180
+ if reverse {
181
+ out.set_sorted(IsSorted::Descending);
182
+ } else {
183
+ out.set_sorted(IsSorted::Ascending)
184
+ }
185
+ out.into()
186
+ }
187
+
188
+ pub fn mean(&self) -> Option<f64> {
189
+ match self.series.borrow().dtype() {
190
+ DataType::Boolean => {
191
+ let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
192
+ s.mean()
193
+ }
194
+ _ => self.series.borrow().mean(),
195
+ }
196
+ }
197
+
198
+ pub fn max(&self) -> Value {
199
+ wrap(self.series.borrow().max_as_series().get(0))
200
+ }
201
+
202
+ pub fn min(&self) -> Value {
203
+ wrap(self.series.borrow().min_as_series().get(0))
204
+ }
205
+
206
+ pub fn sum(&self) -> Value {
207
+ wrap(self.series.borrow().sum_as_series().get(0))
208
+ }
209
+
210
+ pub fn n_chunks(&self) -> usize {
211
+ self.series.borrow().n_chunks()
212
+ }
213
+
214
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
215
+ let mut binding = self.series.borrow_mut();
216
+ let res = binding.append(&other.series.borrow());
217
+ if let Err(e) = res {
218
+ Err(Error::runtime_error(e.to_string()))
219
+ } else {
220
+ Ok(())
221
+ }
222
+ }
223
+
224
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
225
+ self.series
226
+ .borrow_mut()
227
+ .extend(&other.series.borrow())
228
+ .map_err(RbPolarsErr::from)?;
229
+ Ok(())
230
+ }
231
+
232
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
233
+ if index >= self.series.borrow().len() {
234
+ Err(Error::new(arg_error(), "index is out of bounds"))
235
+ } else {
236
+ Ok(self.series.borrow().new_from_index(index, length).into())
237
+ }
238
+ }
239
+
240
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
241
+ let filter_series = &filter.series.borrow();
242
+ if let Ok(ca) = filter_series.bool() {
243
+ let series = self.series.borrow().filter(ca).unwrap();
244
+ Ok(series.into())
245
+ } else {
246
+ Err(Error::runtime_error("Expected a boolean mask".to_string()))
247
+ }
248
+ }
249
+
250
+ pub fn add(&self, other: &RbSeries) -> Self {
251
+ (&*self.series.borrow() + &*other.series.borrow()).into()
252
+ }
253
+
254
+ pub fn sub(&self, other: &RbSeries) -> Self {
255
+ (&*self.series.borrow() - &*other.series.borrow()).into()
256
+ }
257
+
258
+ pub fn mul(&self, other: &RbSeries) -> Self {
259
+ (&*self.series.borrow() * &*other.series.borrow()).into()
260
+ }
261
+
262
+ pub fn div(&self, other: &RbSeries) -> Self {
263
+ (&*self.series.borrow() / &*other.series.borrow()).into()
264
+ }
265
+
266
+ pub fn rem(&self, other: &RbSeries) -> Self {
267
+ (&*self.series.borrow() % &*other.series.borrow()).into()
268
+ }
269
+
270
+ pub fn sort(&self, reverse: bool) -> Self {
271
+ (self.series.borrow_mut().sort(reverse)).into()
272
+ }
273
+
274
+ pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
275
+ let df = self
276
+ .series
277
+ .borrow()
278
+ .value_counts(true, sorted)
279
+ .map_err(RbPolarsErr::from)?;
280
+ Ok(df.into())
281
+ }
282
+
283
+ pub fn arg_min(&self) -> Option<usize> {
284
+ self.series.borrow().arg_min()
285
+ }
286
+
287
+ pub fn arg_max(&self) -> Option<usize> {
288
+ self.series.borrow().arg_max()
289
+ }
290
+
291
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
292
+ let binding = indices.series.borrow();
293
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
294
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
295
+ Ok(RbSeries::new(take))
296
+ }
297
+
298
+ pub fn null_count(&self) -> RbResult<usize> {
299
+ Ok(self.series.borrow().null_count())
300
+ }
301
+
302
+ pub fn has_validity(&self) -> bool {
303
+ self.series.borrow().has_validity()
304
+ }
305
+
306
+ pub fn sample_n(
307
+ &self,
308
+ n: usize,
309
+ with_replacement: bool,
310
+ shuffle: bool,
311
+ seed: Option<u64>,
312
+ ) -> RbResult<Self> {
313
+ let s = self
314
+ .series
315
+ .borrow()
316
+ .sample_n(n, with_replacement, shuffle, seed)
317
+ .map_err(RbPolarsErr::from)?;
318
+ Ok(s.into())
319
+ }
320
+
321
+ pub fn sample_frac(
322
+ &self,
323
+ frac: f64,
324
+ with_replacement: bool,
325
+ shuffle: bool,
326
+ seed: Option<u64>,
327
+ ) -> RbResult<Self> {
328
+ let s = self
329
+ .series
330
+ .borrow()
331
+ .sample_frac(frac, with_replacement, shuffle, seed)
332
+ .map_err(RbPolarsErr::from)?;
333
+ Ok(s.into())
334
+ }
335
+
336
+ pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
337
+ if strict {
338
+ self.series.borrow().eq(&other.series.borrow())
339
+ } else if null_equal {
340
+ self.series
341
+ .borrow()
342
+ .series_equal_missing(&other.series.borrow())
343
+ } else {
344
+ self.series.borrow().series_equal(&other.series.borrow())
345
+ }
346
+ }
347
+
348
+ pub fn eq(&self, rhs: &RbSeries) -> RbResult<Self> {
349
+ let s = self
350
+ .series
351
+ .borrow()
352
+ .equal(&*rhs.series.borrow())
353
+ .map_err(RbPolarsErr::from)?;
354
+ Ok(Self::new(s.into_series()))
355
+ }
356
+
357
+ pub fn neq(&self, rhs: &RbSeries) -> RbResult<Self> {
358
+ let s = self
359
+ .series
360
+ .borrow()
361
+ .not_equal(&*rhs.series.borrow())
362
+ .map_err(RbPolarsErr::from)?;
363
+ Ok(Self::new(s.into_series()))
364
+ }
365
+
366
+ pub fn gt(&self, rhs: &RbSeries) -> RbResult<Self> {
367
+ let s = self
368
+ .series
369
+ .borrow()
370
+ .gt(&*rhs.series.borrow())
371
+ .map_err(RbPolarsErr::from)?;
372
+ Ok(Self::new(s.into_series()))
373
+ }
374
+
375
+ pub fn gt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
376
+ let s = self
377
+ .series
378
+ .borrow()
379
+ .gt_eq(&*rhs.series.borrow())
380
+ .map_err(RbPolarsErr::from)?;
381
+ Ok(Self::new(s.into_series()))
382
+ }
383
+
384
+ pub fn lt(&self, rhs: &RbSeries) -> RbResult<Self> {
385
+ let s = self
386
+ .series
387
+ .borrow()
388
+ .lt(&*rhs.series.borrow())
389
+ .map_err(RbPolarsErr::from)?;
390
+ Ok(Self::new(s.into_series()))
391
+ }
392
+
393
+ pub fn lt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
394
+ let s = self
395
+ .series
396
+ .borrow()
397
+ .lt_eq(&*rhs.series.borrow())
398
+ .map_err(RbPolarsErr::from)?;
399
+ Ok(Self::new(s.into_series()))
400
+ }
401
+
402
+ pub fn not(&self) -> RbResult<Self> {
403
+ let binding = self.series.borrow();
404
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
405
+ Ok((!bool).into_series().into())
406
+ }
407
+
408
+ pub fn to_s(&self) -> String {
409
+ format!("{}", self.series.borrow())
410
+ }
411
+
412
+ pub fn len(&self) -> usize {
413
+ self.series.borrow().len()
414
+ }
415
+
416
+ pub fn to_a(&self) -> RArray {
417
+ let series = self.series.borrow();
418
+ if let Ok(s) = series.f32() {
419
+ s.into_iter().collect()
420
+ } else if let Ok(s) = series.f64() {
421
+ s.into_iter().collect()
422
+ } else if let Ok(s) = series.i8() {
423
+ s.into_iter().collect()
424
+ } else if let Ok(s) = series.i16() {
425
+ s.into_iter().collect()
426
+ } else if let Ok(s) = series.i32() {
427
+ s.into_iter().collect()
428
+ } else if let Ok(s) = series.i64() {
429
+ s.into_iter().collect()
430
+ } else if let Ok(s) = series.u8() {
431
+ s.into_iter().collect()
432
+ } else if let Ok(s) = series.u16() {
433
+ s.into_iter().collect()
434
+ } else if let Ok(s) = series.u32() {
435
+ s.into_iter().collect()
436
+ } else if let Ok(s) = series.u64() {
437
+ s.into_iter().collect()
438
+ } else if let Ok(s) = series.bool() {
439
+ s.into_iter().collect()
440
+ } else if let Ok(s) = series.utf8() {
441
+ s.into_iter().collect()
442
+ } else {
443
+ unimplemented!();
444
+ }
445
+ }
446
+
447
+ pub fn median(&self) -> Option<f64> {
448
+ match self.series.borrow().dtype() {
449
+ DataType::Boolean => {
450
+ let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
451
+ s.median()
452
+ }
453
+ _ => self.series.borrow().median(),
454
+ }
455
+ }
456
+
457
+ // dispatch dynamically in future?
458
+
459
+ pub fn cumsum(&self, reverse: bool) -> Self {
460
+ self.series.borrow().cumsum(reverse).into()
461
+ }
462
+
463
+ pub fn cummax(&self, reverse: bool) -> Self {
464
+ self.series.borrow().cummax(reverse).into()
465
+ }
466
+
467
+ pub fn cummin(&self, reverse: bool) -> Self {
468
+ self.series.borrow().cummin(reverse).into()
469
+ }
470
+
471
+ pub fn slice(&self, offset: i64, length: usize) -> Self {
472
+ let series = self.series.borrow().slice(offset, length);
473
+ series.into()
474
+ }
475
+ }