polars-df 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,475 @@
1
+ use crate::conversion::wrap;
2
+ use crate::{RbDataFrame, RbPolarsErr, RbResult};
3
+ use magnus::exception::arg_error;
4
+ use magnus::{Error, RArray, Value};
5
+ use polars::prelude::*;
6
+ use polars::series::IsSorted;
7
+ use std::cell::RefCell;
8
+
9
+ #[magnus::wrap(class = "Polars::RbSeries")]
10
+ pub struct RbSeries {
11
+ pub series: RefCell<Series>,
12
+ }
13
+
14
+ impl From<Series> for RbSeries {
15
+ fn from(series: Series) -> Self {
16
+ RbSeries::new(series)
17
+ }
18
+ }
19
+
20
+ impl RbSeries {
21
+ pub fn new(series: Series) -> Self {
22
+ RbSeries {
23
+ series: RefCell::new(series),
24
+ }
25
+ }
26
+
27
+ pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
28
+ let len = obj.len();
29
+ let mut builder = BooleanChunkedBuilder::new(&name, len);
30
+
31
+ unsafe {
32
+ for item in obj.as_slice().iter() {
33
+ if item.is_nil() {
34
+ builder.append_null()
35
+ } else {
36
+ match item.try_convert::<bool>() {
37
+ Ok(val) => builder.append_value(val),
38
+ Err(e) => {
39
+ if strict {
40
+ return Err(e);
41
+ }
42
+ builder.append_null()
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+ let ca = builder.finish();
49
+
50
+ let s = ca.into_series();
51
+ Ok(RbSeries::new(s))
52
+ }
53
+ }
54
+
55
+ fn new_primitive<T>(name: &str, obj: RArray, strict: bool) -> RbResult<RbSeries>
56
+ where
57
+ T: PolarsNumericType,
58
+ ChunkedArray<T>: IntoSeries,
59
+ T::Native: magnus::TryConvert,
60
+ {
61
+ let len = obj.len();
62
+ let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
63
+
64
+ unsafe {
65
+ for item in obj.as_slice().iter() {
66
+ if item.is_nil() {
67
+ builder.append_null()
68
+ } else {
69
+ match item.try_convert::<T::Native>() {
70
+ Ok(val) => builder.append_value(val),
71
+ Err(e) => {
72
+ if strict {
73
+ return Err(e);
74
+ }
75
+ builder.append_null()
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ let ca = builder.finish();
82
+
83
+ let s = ca.into_series();
84
+ Ok(RbSeries::new(s))
85
+ }
86
+
87
+ // Init with lists that can contain Nones
88
+ macro_rules! init_method_opt {
89
+ ($name:ident, $type:ty, $native: ty) => {
90
+ impl RbSeries {
91
+ pub fn $name(name: String, obj: RArray, strict: bool) -> RbResult<Self> {
92
+ new_primitive::<$type>(&name, obj, strict)
93
+ }
94
+ }
95
+ };
96
+ }
97
+
98
+ init_method_opt!(new_opt_u8, UInt8Type, u8);
99
+ init_method_opt!(new_opt_u16, UInt16Type, u16);
100
+ init_method_opt!(new_opt_u32, UInt32Type, u32);
101
+ init_method_opt!(new_opt_u64, UInt64Type, u64);
102
+ init_method_opt!(new_opt_i8, Int8Type, i8);
103
+ init_method_opt!(new_opt_i16, Int16Type, i16);
104
+ init_method_opt!(new_opt_i32, Int32Type, i32);
105
+ init_method_opt!(new_opt_i64, Int64Type, i64);
106
+ init_method_opt!(new_opt_f32, Float32Type, f32);
107
+ init_method_opt!(new_opt_f64, Float64Type, f64);
108
+
109
+ impl RbSeries {
110
+ pub fn new_str(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
111
+ let v = val.try_convert::<Vec<Option<String>>>()?;
112
+ let mut s = Utf8Chunked::new(&name, v).into_series();
113
+ s.rename(&name);
114
+ Ok(RbSeries::new(s))
115
+ }
116
+
117
+ pub fn rechunk(&self, in_place: bool) -> Option<Self> {
118
+ let series = self.series.borrow_mut().rechunk();
119
+ if in_place {
120
+ *self.series.borrow_mut() = series;
121
+ None
122
+ } else {
123
+ Some(series.into())
124
+ }
125
+ }
126
+
127
+ pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
128
+ let out = self
129
+ .series
130
+ .borrow()
131
+ .bitand(&other.series.borrow())
132
+ .map_err(RbPolarsErr::from)?;
133
+ Ok(out.into())
134
+ }
135
+
136
+ pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
137
+ let out = self
138
+ .series
139
+ .borrow()
140
+ .bitor(&other.series.borrow())
141
+ .map_err(RbPolarsErr::from)?;
142
+ Ok(out.into())
143
+ }
144
+
145
+ pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
146
+ let out = self
147
+ .series
148
+ .borrow()
149
+ .bitxor(&other.series.borrow())
150
+ .map_err(RbPolarsErr::from)?;
151
+ Ok(out.into())
152
+ }
153
+
154
+ pub fn chunk_lengths(&self) -> Vec<usize> {
155
+ self.series.borrow().chunk_lengths().collect()
156
+ }
157
+
158
+ pub fn name(&self) -> String {
159
+ self.series.borrow().name().into()
160
+ }
161
+
162
+ pub fn rename(&self, name: String) {
163
+ self.series.borrow_mut().rename(&name);
164
+ }
165
+
166
+ pub fn dtype(&self) -> String {
167
+ self.series.borrow().dtype().to_string()
168
+ }
169
+
170
+ pub fn inner_dtype(&self) -> Option<String> {
171
+ self.series
172
+ .borrow()
173
+ .dtype()
174
+ .inner_dtype()
175
+ .map(|dt| dt.to_string())
176
+ }
177
+
178
+ pub fn set_sorted(&self, reverse: bool) -> Self {
179
+ let mut out = self.series.borrow().clone();
180
+ if reverse {
181
+ out.set_sorted(IsSorted::Descending);
182
+ } else {
183
+ out.set_sorted(IsSorted::Ascending)
184
+ }
185
+ out.into()
186
+ }
187
+
188
+ pub fn mean(&self) -> Option<f64> {
189
+ match self.series.borrow().dtype() {
190
+ DataType::Boolean => {
191
+ let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
192
+ s.mean()
193
+ }
194
+ _ => self.series.borrow().mean(),
195
+ }
196
+ }
197
+
198
+ pub fn max(&self) -> Value {
199
+ wrap(self.series.borrow().max_as_series().get(0))
200
+ }
201
+
202
+ pub fn min(&self) -> Value {
203
+ wrap(self.series.borrow().min_as_series().get(0))
204
+ }
205
+
206
+ pub fn sum(&self) -> Value {
207
+ wrap(self.series.borrow().sum_as_series().get(0))
208
+ }
209
+
210
+ pub fn n_chunks(&self) -> usize {
211
+ self.series.borrow().n_chunks()
212
+ }
213
+
214
+ pub fn append(&self, other: &RbSeries) -> RbResult<()> {
215
+ let mut binding = self.series.borrow_mut();
216
+ let res = binding.append(&other.series.borrow());
217
+ if let Err(e) = res {
218
+ Err(Error::runtime_error(e.to_string()))
219
+ } else {
220
+ Ok(())
221
+ }
222
+ }
223
+
224
+ pub fn extend(&self, other: &RbSeries) -> RbResult<()> {
225
+ self.series
226
+ .borrow_mut()
227
+ .extend(&other.series.borrow())
228
+ .map_err(RbPolarsErr::from)?;
229
+ Ok(())
230
+ }
231
+
232
+ pub fn new_from_index(&self, index: usize, length: usize) -> RbResult<Self> {
233
+ if index >= self.series.borrow().len() {
234
+ Err(Error::new(arg_error(), "index is out of bounds"))
235
+ } else {
236
+ Ok(self.series.borrow().new_from_index(index, length).into())
237
+ }
238
+ }
239
+
240
+ pub fn filter(&self, filter: &RbSeries) -> RbResult<Self> {
241
+ let filter_series = &filter.series.borrow();
242
+ if let Ok(ca) = filter_series.bool() {
243
+ let series = self.series.borrow().filter(ca).unwrap();
244
+ Ok(series.into())
245
+ } else {
246
+ Err(Error::runtime_error("Expected a boolean mask".to_string()))
247
+ }
248
+ }
249
+
250
+ pub fn add(&self, other: &RbSeries) -> Self {
251
+ (&*self.series.borrow() + &*other.series.borrow()).into()
252
+ }
253
+
254
+ pub fn sub(&self, other: &RbSeries) -> Self {
255
+ (&*self.series.borrow() - &*other.series.borrow()).into()
256
+ }
257
+
258
+ pub fn mul(&self, other: &RbSeries) -> Self {
259
+ (&*self.series.borrow() * &*other.series.borrow()).into()
260
+ }
261
+
262
+ pub fn div(&self, other: &RbSeries) -> Self {
263
+ (&*self.series.borrow() / &*other.series.borrow()).into()
264
+ }
265
+
266
+ pub fn rem(&self, other: &RbSeries) -> Self {
267
+ (&*self.series.borrow() % &*other.series.borrow()).into()
268
+ }
269
+
270
+ pub fn sort(&self, reverse: bool) -> Self {
271
+ (self.series.borrow_mut().sort(reverse)).into()
272
+ }
273
+
274
+ pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
275
+ let df = self
276
+ .series
277
+ .borrow()
278
+ .value_counts(true, sorted)
279
+ .map_err(RbPolarsErr::from)?;
280
+ Ok(df.into())
281
+ }
282
+
283
+ pub fn arg_min(&self) -> Option<usize> {
284
+ self.series.borrow().arg_min()
285
+ }
286
+
287
+ pub fn arg_max(&self) -> Option<usize> {
288
+ self.series.borrow().arg_max()
289
+ }
290
+
291
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
292
+ let binding = indices.series.borrow();
293
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
294
+ let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?;
295
+ Ok(RbSeries::new(take))
296
+ }
297
+
298
+ pub fn null_count(&self) -> RbResult<usize> {
299
+ Ok(self.series.borrow().null_count())
300
+ }
301
+
302
+ pub fn has_validity(&self) -> bool {
303
+ self.series.borrow().has_validity()
304
+ }
305
+
306
+ pub fn sample_n(
307
+ &self,
308
+ n: usize,
309
+ with_replacement: bool,
310
+ shuffle: bool,
311
+ seed: Option<u64>,
312
+ ) -> RbResult<Self> {
313
+ let s = self
314
+ .series
315
+ .borrow()
316
+ .sample_n(n, with_replacement, shuffle, seed)
317
+ .map_err(RbPolarsErr::from)?;
318
+ Ok(s.into())
319
+ }
320
+
321
+ pub fn sample_frac(
322
+ &self,
323
+ frac: f64,
324
+ with_replacement: bool,
325
+ shuffle: bool,
326
+ seed: Option<u64>,
327
+ ) -> RbResult<Self> {
328
+ let s = self
329
+ .series
330
+ .borrow()
331
+ .sample_frac(frac, with_replacement, shuffle, seed)
332
+ .map_err(RbPolarsErr::from)?;
333
+ Ok(s.into())
334
+ }
335
+
336
+ pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
337
+ if strict {
338
+ self.series.borrow().eq(&other.series.borrow())
339
+ } else if null_equal {
340
+ self.series
341
+ .borrow()
342
+ .series_equal_missing(&other.series.borrow())
343
+ } else {
344
+ self.series.borrow().series_equal(&other.series.borrow())
345
+ }
346
+ }
347
+
348
+ pub fn eq(&self, rhs: &RbSeries) -> RbResult<Self> {
349
+ let s = self
350
+ .series
351
+ .borrow()
352
+ .equal(&*rhs.series.borrow())
353
+ .map_err(RbPolarsErr::from)?;
354
+ Ok(Self::new(s.into_series()))
355
+ }
356
+
357
+ pub fn neq(&self, rhs: &RbSeries) -> RbResult<Self> {
358
+ let s = self
359
+ .series
360
+ .borrow()
361
+ .not_equal(&*rhs.series.borrow())
362
+ .map_err(RbPolarsErr::from)?;
363
+ Ok(Self::new(s.into_series()))
364
+ }
365
+
366
+ pub fn gt(&self, rhs: &RbSeries) -> RbResult<Self> {
367
+ let s = self
368
+ .series
369
+ .borrow()
370
+ .gt(&*rhs.series.borrow())
371
+ .map_err(RbPolarsErr::from)?;
372
+ Ok(Self::new(s.into_series()))
373
+ }
374
+
375
+ pub fn gt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
376
+ let s = self
377
+ .series
378
+ .borrow()
379
+ .gt_eq(&*rhs.series.borrow())
380
+ .map_err(RbPolarsErr::from)?;
381
+ Ok(Self::new(s.into_series()))
382
+ }
383
+
384
+ pub fn lt(&self, rhs: &RbSeries) -> RbResult<Self> {
385
+ let s = self
386
+ .series
387
+ .borrow()
388
+ .lt(&*rhs.series.borrow())
389
+ .map_err(RbPolarsErr::from)?;
390
+ Ok(Self::new(s.into_series()))
391
+ }
392
+
393
+ pub fn lt_eq(&self, rhs: &RbSeries) -> RbResult<Self> {
394
+ let s = self
395
+ .series
396
+ .borrow()
397
+ .lt_eq(&*rhs.series.borrow())
398
+ .map_err(RbPolarsErr::from)?;
399
+ Ok(Self::new(s.into_series()))
400
+ }
401
+
402
+ pub fn not(&self) -> RbResult<Self> {
403
+ let binding = self.series.borrow();
404
+ let bool = binding.bool().map_err(RbPolarsErr::from)?;
405
+ Ok((!bool).into_series().into())
406
+ }
407
+
408
+ pub fn to_s(&self) -> String {
409
+ format!("{}", self.series.borrow())
410
+ }
411
+
412
+ pub fn len(&self) -> usize {
413
+ self.series.borrow().len()
414
+ }
415
+
416
+ pub fn to_a(&self) -> RArray {
417
+ let series = self.series.borrow();
418
+ if let Ok(s) = series.f32() {
419
+ s.into_iter().collect()
420
+ } else if let Ok(s) = series.f64() {
421
+ s.into_iter().collect()
422
+ } else if let Ok(s) = series.i8() {
423
+ s.into_iter().collect()
424
+ } else if let Ok(s) = series.i16() {
425
+ s.into_iter().collect()
426
+ } else if let Ok(s) = series.i32() {
427
+ s.into_iter().collect()
428
+ } else if let Ok(s) = series.i64() {
429
+ s.into_iter().collect()
430
+ } else if let Ok(s) = series.u8() {
431
+ s.into_iter().collect()
432
+ } else if let Ok(s) = series.u16() {
433
+ s.into_iter().collect()
434
+ } else if let Ok(s) = series.u32() {
435
+ s.into_iter().collect()
436
+ } else if let Ok(s) = series.u64() {
437
+ s.into_iter().collect()
438
+ } else if let Ok(s) = series.bool() {
439
+ s.into_iter().collect()
440
+ } else if let Ok(s) = series.utf8() {
441
+ s.into_iter().collect()
442
+ } else {
443
+ unimplemented!();
444
+ }
445
+ }
446
+
447
+ pub fn median(&self) -> Option<f64> {
448
+ match self.series.borrow().dtype() {
449
+ DataType::Boolean => {
450
+ let s = self.series.borrow().cast(&DataType::UInt8).unwrap();
451
+ s.median()
452
+ }
453
+ _ => self.series.borrow().median(),
454
+ }
455
+ }
456
+
457
+ // dispatch dynamically in future?
458
+
459
+ pub fn cumsum(&self, reverse: bool) -> Self {
460
+ self.series.borrow().cumsum(reverse).into()
461
+ }
462
+
463
+ pub fn cummax(&self, reverse: bool) -> Self {
464
+ self.series.borrow().cummax(reverse).into()
465
+ }
466
+
467
+ pub fn cummin(&self, reverse: bool) -> Self {
468
+ self.series.borrow().cummin(reverse).into()
469
+ }
470
+
471
+ pub fn slice(&self, offset: i64, length: usize) -> Self {
472
+ let series = self.series.borrow().slice(offset, length);
473
+ series.into()
474
+ }
475
+ }