polars-df 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +7 -5
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/mod.rs +13 -60
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/datetime.rs +6 -2
- data/ext/polars/src/expr/general.rs +28 -6
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +9 -30
- data/ext/polars/src/functions/lazy.rs +2 -0
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +54 -38
- data/ext/polars/src/lib.rs +46 -21
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +1 -131
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +21 -5
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,57 +1,139 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{value::qnil, IntoValue, RArray, Value};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::prelude::*;
|
5
|
+
use crate::RbSeries;
|
5
6
|
|
6
7
|
impl RbSeries {
|
7
|
-
///
|
8
|
-
/// This
|
9
|
-
pub fn
|
10
|
-
let
|
11
|
-
match s.dtype() {
|
12
|
-
DataType::String => {
|
13
|
-
let ca = s.str().unwrap();
|
8
|
+
/// Convert this Series to a Ruby array.
|
9
|
+
/// This operation copies data.
|
10
|
+
pub fn to_a(&self) -> Value {
|
11
|
+
let series = &self.series.borrow();
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
13
|
+
fn to_a_recursive(series: &Series) -> Value {
|
14
|
+
let rblist = match series.dtype() {
|
15
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
16
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
17
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
18
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
19
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
20
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
21
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
27
|
+
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
28
|
+
}
|
29
|
+
DataType::Object(_, _) => {
|
30
|
+
let v = RArray::with_capacity(series.len());
|
31
|
+
for i in 0..series.len() {
|
32
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
|
+
match obj {
|
34
|
+
Some(val) => v.push(val.to_object()).unwrap(),
|
35
|
+
None => v.push(qnil()).unwrap(),
|
36
|
+
};
|
37
|
+
}
|
38
|
+
v.into_value()
|
39
|
+
}
|
40
|
+
DataType::List(_) => {
|
41
|
+
let v = RArray::new();
|
42
|
+
let ca = series.list().unwrap();
|
43
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
44
|
+
match opt_s {
|
45
|
+
None => {
|
46
|
+
v.push(qnil()).unwrap();
|
47
|
+
}
|
48
|
+
Some(s) => {
|
49
|
+
let rblst = to_a_recursive(s.as_ref());
|
50
|
+
v.push(rblst).unwrap();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
v.into_value()
|
55
|
+
}
|
56
|
+
DataType::Array(_, _) => {
|
57
|
+
let v = RArray::new();
|
58
|
+
let ca = series.array().unwrap();
|
59
|
+
for opt_s in ca.amortized_iter() {
|
60
|
+
match opt_s {
|
61
|
+
None => {
|
62
|
+
v.push(qnil()).unwrap();
|
63
|
+
}
|
64
|
+
Some(s) => {
|
65
|
+
let rblst = to_a_recursive(s.as_ref());
|
66
|
+
v.push(rblst).unwrap();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
v.into_value()
|
71
|
+
}
|
72
|
+
DataType::Date => {
|
73
|
+
let ca = series.date().unwrap();
|
74
|
+
return Wrap(ca).into_value();
|
75
|
+
}
|
76
|
+
DataType::Time => {
|
77
|
+
let ca = series.time().unwrap();
|
78
|
+
return Wrap(ca).into_value();
|
79
|
+
}
|
80
|
+
DataType::Datetime(_, _) => {
|
81
|
+
let ca = series.datetime().unwrap();
|
82
|
+
return Wrap(ca).into_value();
|
83
|
+
}
|
84
|
+
DataType::Decimal(_, _) => {
|
85
|
+
let ca = series.decimal().unwrap();
|
86
|
+
return Wrap(ca).into_value();
|
87
|
+
}
|
88
|
+
DataType::String => {
|
89
|
+
let ca = series.str().unwrap();
|
90
|
+
return Wrap(ca).into_value();
|
91
|
+
}
|
92
|
+
DataType::Struct(_) => {
|
93
|
+
let ca = series.struct_().unwrap();
|
94
|
+
return Wrap(ca).into_value();
|
95
|
+
}
|
96
|
+
DataType::Duration(_) => {
|
97
|
+
let ca = series.duration().unwrap();
|
98
|
+
return Wrap(ca).into_value();
|
99
|
+
}
|
100
|
+
DataType::Binary => {
|
101
|
+
let ca = series.binary().unwrap();
|
102
|
+
return Wrap(ca).into_value();
|
103
|
+
}
|
104
|
+
DataType::Null => {
|
105
|
+
let null: Option<u8> = None;
|
106
|
+
let n = series.len();
|
107
|
+
let iter = std::iter::repeat(null).take(n);
|
108
|
+
use std::iter::{Repeat, Take};
|
109
|
+
struct NullIter {
|
110
|
+
iter: Take<Repeat<Option<u8>>>,
|
111
|
+
n: usize,
|
112
|
+
}
|
113
|
+
impl Iterator for NullIter {
|
114
|
+
type Item = Option<u8>;
|
115
|
+
|
116
|
+
fn next(&mut self) -> Option<Self::Item> {
|
117
|
+
self.iter.next()
|
118
|
+
}
|
119
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
120
|
+
(self.n, Some(self.n))
|
121
|
+
}
|
122
|
+
}
|
123
|
+
impl ExactSizeIterator for NullIter {}
|
124
|
+
|
125
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
126
|
+
}
|
127
|
+
DataType::Unknown(_) => {
|
128
|
+
panic!("to_a not implemented for unknown")
|
129
|
+
}
|
130
|
+
DataType::BinaryOffset => {
|
131
|
+
unreachable!()
|
132
|
+
}
|
133
|
+
};
|
134
|
+
rblist
|
55
135
|
}
|
136
|
+
|
137
|
+
to_a_recursive(series)
|
56
138
|
}
|
57
139
|
}
|
@@ -5,7 +5,7 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, prelude::*,
|
8
|
+
use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
@@ -325,136 +325,6 @@ impl RbSeries {
|
|
325
325
|
self.series.borrow().len()
|
326
326
|
}
|
327
327
|
|
328
|
-
pub fn to_a(&self) -> Value {
|
329
|
-
let series = &self.series.borrow();
|
330
|
-
|
331
|
-
fn to_a_recursive(series: &Series) -> Value {
|
332
|
-
let rblist = match series.dtype() {
|
333
|
-
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
334
|
-
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
335
|
-
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
336
|
-
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
337
|
-
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
338
|
-
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
339
|
-
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
340
|
-
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
341
|
-
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
342
|
-
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
343
|
-
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
344
|
-
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
345
|
-
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
346
|
-
}
|
347
|
-
DataType::Object(_, _) => {
|
348
|
-
let v = RArray::with_capacity(series.len());
|
349
|
-
for i in 0..series.len() {
|
350
|
-
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
351
|
-
match obj {
|
352
|
-
Some(val) => v.push(val.to_object()).unwrap(),
|
353
|
-
None => v.push(qnil()).unwrap(),
|
354
|
-
};
|
355
|
-
}
|
356
|
-
v.into_value()
|
357
|
-
}
|
358
|
-
DataType::List(_) => {
|
359
|
-
let v = RArray::new();
|
360
|
-
let ca = series.list().unwrap();
|
361
|
-
for opt_s in unsafe { ca.amortized_iter() } {
|
362
|
-
match opt_s {
|
363
|
-
None => {
|
364
|
-
v.push(qnil()).unwrap();
|
365
|
-
}
|
366
|
-
Some(s) => {
|
367
|
-
let rblst = to_a_recursive(s.as_ref());
|
368
|
-
v.push(rblst).unwrap();
|
369
|
-
}
|
370
|
-
}
|
371
|
-
}
|
372
|
-
v.into_value()
|
373
|
-
}
|
374
|
-
DataType::Array(_, _) => {
|
375
|
-
let v = RArray::new();
|
376
|
-
let ca = series.array().unwrap();
|
377
|
-
for opt_s in ca.amortized_iter() {
|
378
|
-
match opt_s {
|
379
|
-
None => {
|
380
|
-
v.push(qnil()).unwrap();
|
381
|
-
}
|
382
|
-
Some(s) => {
|
383
|
-
let rblst = to_a_recursive(s.as_ref());
|
384
|
-
v.push(rblst).unwrap();
|
385
|
-
}
|
386
|
-
}
|
387
|
-
}
|
388
|
-
v.into_value()
|
389
|
-
}
|
390
|
-
DataType::Date => {
|
391
|
-
let ca = series.date().unwrap();
|
392
|
-
return Wrap(ca).into_value();
|
393
|
-
}
|
394
|
-
DataType::Time => {
|
395
|
-
let ca = series.time().unwrap();
|
396
|
-
return Wrap(ca).into_value();
|
397
|
-
}
|
398
|
-
DataType::Datetime(_, _) => {
|
399
|
-
let ca = series.datetime().unwrap();
|
400
|
-
return Wrap(ca).into_value();
|
401
|
-
}
|
402
|
-
DataType::Decimal(_, _) => {
|
403
|
-
let ca = series.decimal().unwrap();
|
404
|
-
return Wrap(ca).into_value();
|
405
|
-
}
|
406
|
-
DataType::String => {
|
407
|
-
let ca = series.str().unwrap();
|
408
|
-
return Wrap(ca).into_value();
|
409
|
-
}
|
410
|
-
DataType::Struct(_) => {
|
411
|
-
let ca = series.struct_().unwrap();
|
412
|
-
return Wrap(ca).into_value();
|
413
|
-
}
|
414
|
-
DataType::Duration(_) => {
|
415
|
-
let ca = series.duration().unwrap();
|
416
|
-
return Wrap(ca).into_value();
|
417
|
-
}
|
418
|
-
DataType::Binary => {
|
419
|
-
let ca = series.binary().unwrap();
|
420
|
-
return Wrap(ca).into_value();
|
421
|
-
}
|
422
|
-
DataType::Null => {
|
423
|
-
let null: Option<u8> = None;
|
424
|
-
let n = series.len();
|
425
|
-
let iter = std::iter::repeat(null).take(n);
|
426
|
-
use std::iter::{Repeat, Take};
|
427
|
-
struct NullIter {
|
428
|
-
iter: Take<Repeat<Option<u8>>>,
|
429
|
-
n: usize,
|
430
|
-
}
|
431
|
-
impl Iterator for NullIter {
|
432
|
-
type Item = Option<u8>;
|
433
|
-
|
434
|
-
fn next(&mut self) -> Option<Self::Item> {
|
435
|
-
self.iter.next()
|
436
|
-
}
|
437
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
438
|
-
(self.n, Some(self.n))
|
439
|
-
}
|
440
|
-
}
|
441
|
-
impl ExactSizeIterator for NullIter {}
|
442
|
-
|
443
|
-
RArray::from_iter(NullIter { iter, n }).into_value()
|
444
|
-
}
|
445
|
-
DataType::Unknown => {
|
446
|
-
panic!("to_a not implemented for unknown")
|
447
|
-
}
|
448
|
-
DataType::BinaryOffset => {
|
449
|
-
unreachable!()
|
450
|
-
}
|
451
|
-
};
|
452
|
-
rblist
|
453
|
-
}
|
454
|
-
|
455
|
-
to_a_recursive(series)
|
456
|
-
}
|
457
|
-
|
458
328
|
pub fn clone(&self) -> Self {
|
459
329
|
RbSeries::new(self.series.borrow().clone())
|
460
330
|
}
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
81
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|
data/lib/polars/convert.rb
CHANGED
@@ -27,7 +27,12 @@ module Polars
|
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
29
|
def from_hash(data, schema: nil, columns: nil)
|
30
|
-
|
30
|
+
Utils.wrap_df(
|
31
|
+
DataFrame.hash_to_rbdf(
|
32
|
+
data,
|
33
|
+
schema: schema || columns
|
34
|
+
)
|
35
|
+
)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|