polars-df 0.9.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +23 -0
- data/Cargo.lock +144 -57
- data/README.md +7 -6
- data/ext/polars/Cargo.toml +10 -6
- data/ext/polars/src/batched_csv.rs +53 -50
- data/ext/polars/src/conversion/anyvalue.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +31 -67
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/array.rs +6 -2
- data/ext/polars/src/expr/datetime.rs +13 -4
- data/ext/polars/src/expr/general.rs +50 -9
- data/ext/polars/src/expr/list.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +12 -33
- data/ext/polars/src/file.rs +158 -11
- data/ext/polars/src/functions/lazy.rs +20 -3
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/functions/whenthen.rs +47 -17
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +111 -56
- data/ext/polars/src/lib.rs +68 -34
- data/ext/polars/src/map/dataframe.rs +17 -9
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +13 -133
- data/lib/polars/array_expr.rb +6 -2
- data/lib/polars/batched_csv_reader.rb +11 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +225 -370
- data/lib/polars/date_time_expr.rb +11 -4
- data/lib/polars/date_time_name_space.rb +14 -4
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1171 -54
- data/lib/polars/functions/lazy.rb +3 -3
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/whenthen.rb +74 -5
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +307 -489
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +55 -195
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +6 -2
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +14 -12
- data/lib/polars/string_expr.rb +38 -36
- data/lib/polars/utils.rb +89 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +83 -0
- data/lib/polars.rb +10 -3
- metadata +23 -8
- data/ext/polars/src/dataframe.rs +0 -1182
- data/lib/polars/when.rb +0 -16
- data/lib/polars/when_then.rb +0 -19
@@ -1,57 +1,139 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{value::qnil, IntoValue, RArray, Value};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::prelude::*;
|
5
|
+
use crate::RbSeries;
|
5
6
|
|
6
7
|
impl RbSeries {
|
7
|
-
///
|
8
|
-
/// This
|
9
|
-
pub fn
|
10
|
-
let
|
11
|
-
match s.dtype() {
|
12
|
-
DataType::String => {
|
13
|
-
let ca = s.str().unwrap();
|
8
|
+
/// Convert this Series to a Ruby array.
|
9
|
+
/// This operation copies data.
|
10
|
+
pub fn to_a(&self) -> Value {
|
11
|
+
let series = &self.series.borrow();
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
13
|
+
fn to_a_recursive(series: &Series) -> Value {
|
14
|
+
let rblist = match series.dtype() {
|
15
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
16
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
17
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
18
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
19
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
20
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
21
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
27
|
+
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
28
|
+
}
|
29
|
+
DataType::Object(_, _) => {
|
30
|
+
let v = RArray::with_capacity(series.len());
|
31
|
+
for i in 0..series.len() {
|
32
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
|
+
match obj {
|
34
|
+
Some(val) => v.push(val.to_object()).unwrap(),
|
35
|
+
None => v.push(qnil()).unwrap(),
|
36
|
+
};
|
37
|
+
}
|
38
|
+
v.into_value()
|
39
|
+
}
|
40
|
+
DataType::List(_) => {
|
41
|
+
let v = RArray::new();
|
42
|
+
let ca = series.list().unwrap();
|
43
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
44
|
+
match opt_s {
|
45
|
+
None => {
|
46
|
+
v.push(qnil()).unwrap();
|
47
|
+
}
|
48
|
+
Some(s) => {
|
49
|
+
let rblst = to_a_recursive(s.as_ref());
|
50
|
+
v.push(rblst).unwrap();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
v.into_value()
|
55
|
+
}
|
56
|
+
DataType::Array(_, _) => {
|
57
|
+
let v = RArray::new();
|
58
|
+
let ca = series.array().unwrap();
|
59
|
+
for opt_s in ca.amortized_iter() {
|
60
|
+
match opt_s {
|
61
|
+
None => {
|
62
|
+
v.push(qnil()).unwrap();
|
63
|
+
}
|
64
|
+
Some(s) => {
|
65
|
+
let rblst = to_a_recursive(s.as_ref());
|
66
|
+
v.push(rblst).unwrap();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
v.into_value()
|
71
|
+
}
|
72
|
+
DataType::Date => {
|
73
|
+
let ca = series.date().unwrap();
|
74
|
+
return Wrap(ca).into_value();
|
75
|
+
}
|
76
|
+
DataType::Time => {
|
77
|
+
let ca = series.time().unwrap();
|
78
|
+
return Wrap(ca).into_value();
|
79
|
+
}
|
80
|
+
DataType::Datetime(_, _) => {
|
81
|
+
let ca = series.datetime().unwrap();
|
82
|
+
return Wrap(ca).into_value();
|
83
|
+
}
|
84
|
+
DataType::Decimal(_, _) => {
|
85
|
+
let ca = series.decimal().unwrap();
|
86
|
+
return Wrap(ca).into_value();
|
87
|
+
}
|
88
|
+
DataType::String => {
|
89
|
+
let ca = series.str().unwrap();
|
90
|
+
return Wrap(ca).into_value();
|
91
|
+
}
|
92
|
+
DataType::Struct(_) => {
|
93
|
+
let ca = series.struct_().unwrap();
|
94
|
+
return Wrap(ca).into_value();
|
95
|
+
}
|
96
|
+
DataType::Duration(_) => {
|
97
|
+
let ca = series.duration().unwrap();
|
98
|
+
return Wrap(ca).into_value();
|
99
|
+
}
|
100
|
+
DataType::Binary => {
|
101
|
+
let ca = series.binary().unwrap();
|
102
|
+
return Wrap(ca).into_value();
|
103
|
+
}
|
104
|
+
DataType::Null => {
|
105
|
+
let null: Option<u8> = None;
|
106
|
+
let n = series.len();
|
107
|
+
let iter = std::iter::repeat(null).take(n);
|
108
|
+
use std::iter::{Repeat, Take};
|
109
|
+
struct NullIter {
|
110
|
+
iter: Take<Repeat<Option<u8>>>,
|
111
|
+
n: usize,
|
112
|
+
}
|
113
|
+
impl Iterator for NullIter {
|
114
|
+
type Item = Option<u8>;
|
115
|
+
|
116
|
+
fn next(&mut self) -> Option<Self::Item> {
|
117
|
+
self.iter.next()
|
118
|
+
}
|
119
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
120
|
+
(self.n, Some(self.n))
|
121
|
+
}
|
122
|
+
}
|
123
|
+
impl ExactSizeIterator for NullIter {}
|
124
|
+
|
125
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
126
|
+
}
|
127
|
+
DataType::Unknown(_) => {
|
128
|
+
panic!("to_a not implemented for unknown")
|
129
|
+
}
|
130
|
+
DataType::BinaryOffset => {
|
131
|
+
unreachable!()
|
132
|
+
}
|
133
|
+
};
|
134
|
+
rblist
|
55
135
|
}
|
136
|
+
|
137
|
+
to_a_recursive(series)
|
56
138
|
}
|
57
139
|
}
|
@@ -5,7 +5,7 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, prelude::*,
|
8
|
+
use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
@@ -233,8 +233,18 @@ impl RbSeries {
|
|
233
233
|
}
|
234
234
|
}
|
235
235
|
|
236
|
-
pub fn sort(&self, descending: bool, nulls_last: bool) -> Self {
|
237
|
-
(self
|
236
|
+
pub fn sort(&self, descending: bool, nulls_last: bool, multithreaded: bool) -> RbResult<Self> {
|
237
|
+
Ok(self
|
238
|
+
.series
|
239
|
+
.borrow_mut()
|
240
|
+
.sort(
|
241
|
+
SortOptions::default()
|
242
|
+
.with_order_descending(descending)
|
243
|
+
.with_nulls_last(nulls_last)
|
244
|
+
.with_multithreaded(multithreaded),
|
245
|
+
)
|
246
|
+
.map_err(RbPolarsErr::from)?
|
247
|
+
.into())
|
238
248
|
}
|
239
249
|
|
240
250
|
pub fn value_counts(&self, sorted: bool) -> RbResult<RbDataFrame> {
|
@@ -315,136 +325,6 @@ impl RbSeries {
|
|
315
325
|
self.series.borrow().len()
|
316
326
|
}
|
317
327
|
|
318
|
-
pub fn to_a(&self) -> Value {
|
319
|
-
let series = &self.series.borrow();
|
320
|
-
|
321
|
-
fn to_a_recursive(series: &Series) -> Value {
|
322
|
-
let rblist = match series.dtype() {
|
323
|
-
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
324
|
-
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
325
|
-
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
326
|
-
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
327
|
-
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
328
|
-
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
329
|
-
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
330
|
-
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
331
|
-
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
332
|
-
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
333
|
-
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
334
|
-
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
335
|
-
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
336
|
-
}
|
337
|
-
DataType::Object(_, _) => {
|
338
|
-
let v = RArray::with_capacity(series.len());
|
339
|
-
for i in 0..series.len() {
|
340
|
-
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
341
|
-
match obj {
|
342
|
-
Some(val) => v.push(val.to_object()).unwrap(),
|
343
|
-
None => v.push(qnil()).unwrap(),
|
344
|
-
};
|
345
|
-
}
|
346
|
-
v.into_value()
|
347
|
-
}
|
348
|
-
DataType::List(_) => {
|
349
|
-
let v = RArray::new();
|
350
|
-
let ca = series.list().unwrap();
|
351
|
-
for opt_s in unsafe { ca.amortized_iter() } {
|
352
|
-
match opt_s {
|
353
|
-
None => {
|
354
|
-
v.push(qnil()).unwrap();
|
355
|
-
}
|
356
|
-
Some(s) => {
|
357
|
-
let rblst = to_a_recursive(s.as_ref());
|
358
|
-
v.push(rblst).unwrap();
|
359
|
-
}
|
360
|
-
}
|
361
|
-
}
|
362
|
-
v.into_value()
|
363
|
-
}
|
364
|
-
DataType::Array(_, _) => {
|
365
|
-
let v = RArray::new();
|
366
|
-
let ca = series.array().unwrap();
|
367
|
-
for opt_s in ca.amortized_iter() {
|
368
|
-
match opt_s {
|
369
|
-
None => {
|
370
|
-
v.push(qnil()).unwrap();
|
371
|
-
}
|
372
|
-
Some(s) => {
|
373
|
-
let rblst = to_a_recursive(s.as_ref());
|
374
|
-
v.push(rblst).unwrap();
|
375
|
-
}
|
376
|
-
}
|
377
|
-
}
|
378
|
-
v.into_value()
|
379
|
-
}
|
380
|
-
DataType::Date => {
|
381
|
-
let ca = series.date().unwrap();
|
382
|
-
return Wrap(ca).into_value();
|
383
|
-
}
|
384
|
-
DataType::Time => {
|
385
|
-
let ca = series.time().unwrap();
|
386
|
-
return Wrap(ca).into_value();
|
387
|
-
}
|
388
|
-
DataType::Datetime(_, _) => {
|
389
|
-
let ca = series.datetime().unwrap();
|
390
|
-
return Wrap(ca).into_value();
|
391
|
-
}
|
392
|
-
DataType::Decimal(_, _) => {
|
393
|
-
let ca = series.decimal().unwrap();
|
394
|
-
return Wrap(ca).into_value();
|
395
|
-
}
|
396
|
-
DataType::String => {
|
397
|
-
let ca = series.str().unwrap();
|
398
|
-
return Wrap(ca).into_value();
|
399
|
-
}
|
400
|
-
DataType::Struct(_) => {
|
401
|
-
let ca = series.struct_().unwrap();
|
402
|
-
return Wrap(ca).into_value();
|
403
|
-
}
|
404
|
-
DataType::Duration(_) => {
|
405
|
-
let ca = series.duration().unwrap();
|
406
|
-
return Wrap(ca).into_value();
|
407
|
-
}
|
408
|
-
DataType::Binary => {
|
409
|
-
let ca = series.binary().unwrap();
|
410
|
-
return Wrap(ca).into_value();
|
411
|
-
}
|
412
|
-
DataType::Null => {
|
413
|
-
let null: Option<u8> = None;
|
414
|
-
let n = series.len();
|
415
|
-
let iter = std::iter::repeat(null).take(n);
|
416
|
-
use std::iter::{Repeat, Take};
|
417
|
-
struct NullIter {
|
418
|
-
iter: Take<Repeat<Option<u8>>>,
|
419
|
-
n: usize,
|
420
|
-
}
|
421
|
-
impl Iterator for NullIter {
|
422
|
-
type Item = Option<u8>;
|
423
|
-
|
424
|
-
fn next(&mut self) -> Option<Self::Item> {
|
425
|
-
self.iter.next()
|
426
|
-
}
|
427
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
428
|
-
(self.n, Some(self.n))
|
429
|
-
}
|
430
|
-
}
|
431
|
-
impl ExactSizeIterator for NullIter {}
|
432
|
-
|
433
|
-
RArray::from_iter(NullIter { iter, n }).into_value()
|
434
|
-
}
|
435
|
-
DataType::Unknown => {
|
436
|
-
panic!("to_a not implemented for unknown")
|
437
|
-
}
|
438
|
-
DataType::BinaryOffset => {
|
439
|
-
unreachable!()
|
440
|
-
}
|
441
|
-
};
|
442
|
-
rblist
|
443
|
-
}
|
444
|
-
|
445
|
-
to_a_recursive(series)
|
446
|
-
}
|
447
|
-
|
448
328
|
pub fn clone(&self) -> Self {
|
449
329
|
RbSeries::new(self.series.borrow().clone())
|
450
330
|
}
|
data/lib/polars/array_expr.rb
CHANGED
@@ -333,6 +333,10 @@ module Polars
|
|
333
333
|
#
|
334
334
|
# @param index [Integer]
|
335
335
|
# Index to return per sub-array
|
336
|
+
# @param null_on_oob [Boolean]
|
337
|
+
# Behavior if an index is out of bounds:
|
338
|
+
# true -> set as null
|
339
|
+
# false -> raise an error
|
336
340
|
#
|
337
341
|
# @return [Expr]
|
338
342
|
#
|
@@ -353,9 +357,9 @@ module Polars
|
|
353
357
|
# # │ [4, 5, 6] ┆ -2 ┆ 5 │
|
354
358
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
355
359
|
# # └───────────────┴─────┴──────┘
|
356
|
-
def get(index)
|
360
|
+
def get(index, null_on_oob: true)
|
357
361
|
index = Utils.parse_as_expression(index)
|
358
|
-
Utils.wrap_expr(_rbexpr.arr_get(index))
|
362
|
+
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
359
363
|
end
|
360
364
|
|
361
365
|
# Get the first value of the sub-arrays.
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -27,10 +28,13 @@ module Polars
|
|
27
28
|
row_count_offset: 0,
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
|
-
new_columns: nil
|
31
|
+
new_columns: nil,
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
31
35
|
)
|
32
36
|
if Utils.pathlike?(file)
|
33
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
34
38
|
end
|
35
39
|
|
36
40
|
dtype_list = nil
|
@@ -71,11 +75,15 @@ module Polars
|
|
71
75
|
comment_char,
|
72
76
|
quote_char,
|
73
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
74
79
|
parse_dates,
|
75
80
|
skip_rows_after_header,
|
76
81
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
77
82
|
sample_size,
|
78
|
-
eol_char
|
83
|
+
eol_char,
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
79
87
|
)
|
80
88
|
self.new_columns = new_columns
|
81
89
|
end
|
data/lib/polars/convert.rb
CHANGED
@@ -27,7 +27,12 @@ module Polars
|
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
29
|
def from_hash(data, schema: nil, columns: nil)
|
30
|
-
|
30
|
+
Utils.wrap_df(
|
31
|
+
DataFrame.hash_to_rbdf(
|
32
|
+
data,
|
33
|
+
schema: schema || columns
|
34
|
+
)
|
35
|
+
)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|