polars-df 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +7 -5
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/mod.rs +13 -60
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/datetime.rs +6 -2
- data/ext/polars/src/expr/general.rs +28 -6
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +9 -30
- data/ext/polars/src/functions/lazy.rs +2 -0
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +54 -38
- data/ext/polars/src/lib.rs +46 -21
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +1 -131
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +21 -5
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -1,57 +1,139 @@
|
|
1
|
-
use magnus::{
|
1
|
+
use magnus::{value::qnil, IntoValue, RArray, Value};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::prelude::*;
|
5
|
+
use crate::RbSeries;
|
5
6
|
|
6
7
|
impl RbSeries {
|
7
|
-
///
|
8
|
-
/// This
|
9
|
-
pub fn
|
10
|
-
let
|
11
|
-
match s.dtype() {
|
12
|
-
DataType::String => {
|
13
|
-
let ca = s.str().unwrap();
|
8
|
+
/// Convert this Series to a Ruby array.
|
9
|
+
/// This operation copies data.
|
10
|
+
pub fn to_a(&self) -> Value {
|
11
|
+
let series = &self.series.borrow();
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
.
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
13
|
+
fn to_a_recursive(series: &Series) -> Value {
|
14
|
+
let rblist = match series.dtype() {
|
15
|
+
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
16
|
+
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
17
|
+
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
18
|
+
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
19
|
+
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
20
|
+
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
21
|
+
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
22
|
+
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
23
|
+
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
24
|
+
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
25
|
+
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
26
|
+
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
27
|
+
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
28
|
+
}
|
29
|
+
DataType::Object(_, _) => {
|
30
|
+
let v = RArray::with_capacity(series.len());
|
31
|
+
for i in 0..series.len() {
|
32
|
+
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
|
+
match obj {
|
34
|
+
Some(val) => v.push(val.to_object()).unwrap(),
|
35
|
+
None => v.push(qnil()).unwrap(),
|
36
|
+
};
|
37
|
+
}
|
38
|
+
v.into_value()
|
39
|
+
}
|
40
|
+
DataType::List(_) => {
|
41
|
+
let v = RArray::new();
|
42
|
+
let ca = series.list().unwrap();
|
43
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
44
|
+
match opt_s {
|
45
|
+
None => {
|
46
|
+
v.push(qnil()).unwrap();
|
47
|
+
}
|
48
|
+
Some(s) => {
|
49
|
+
let rblst = to_a_recursive(s.as_ref());
|
50
|
+
v.push(rblst).unwrap();
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
v.into_value()
|
55
|
+
}
|
56
|
+
DataType::Array(_, _) => {
|
57
|
+
let v = RArray::new();
|
58
|
+
let ca = series.array().unwrap();
|
59
|
+
for opt_s in ca.amortized_iter() {
|
60
|
+
match opt_s {
|
61
|
+
None => {
|
62
|
+
v.push(qnil()).unwrap();
|
63
|
+
}
|
64
|
+
Some(s) => {
|
65
|
+
let rblst = to_a_recursive(s.as_ref());
|
66
|
+
v.push(rblst).unwrap();
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
v.into_value()
|
71
|
+
}
|
72
|
+
DataType::Date => {
|
73
|
+
let ca = series.date().unwrap();
|
74
|
+
return Wrap(ca).into_value();
|
75
|
+
}
|
76
|
+
DataType::Time => {
|
77
|
+
let ca = series.time().unwrap();
|
78
|
+
return Wrap(ca).into_value();
|
79
|
+
}
|
80
|
+
DataType::Datetime(_, _) => {
|
81
|
+
let ca = series.datetime().unwrap();
|
82
|
+
return Wrap(ca).into_value();
|
83
|
+
}
|
84
|
+
DataType::Decimal(_, _) => {
|
85
|
+
let ca = series.decimal().unwrap();
|
86
|
+
return Wrap(ca).into_value();
|
87
|
+
}
|
88
|
+
DataType::String => {
|
89
|
+
let ca = series.str().unwrap();
|
90
|
+
return Wrap(ca).into_value();
|
91
|
+
}
|
92
|
+
DataType::Struct(_) => {
|
93
|
+
let ca = series.struct_().unwrap();
|
94
|
+
return Wrap(ca).into_value();
|
95
|
+
}
|
96
|
+
DataType::Duration(_) => {
|
97
|
+
let ca = series.duration().unwrap();
|
98
|
+
return Wrap(ca).into_value();
|
99
|
+
}
|
100
|
+
DataType::Binary => {
|
101
|
+
let ca = series.binary().unwrap();
|
102
|
+
return Wrap(ca).into_value();
|
103
|
+
}
|
104
|
+
DataType::Null => {
|
105
|
+
let null: Option<u8> = None;
|
106
|
+
let n = series.len();
|
107
|
+
let iter = std::iter::repeat(null).take(n);
|
108
|
+
use std::iter::{Repeat, Take};
|
109
|
+
struct NullIter {
|
110
|
+
iter: Take<Repeat<Option<u8>>>,
|
111
|
+
n: usize,
|
112
|
+
}
|
113
|
+
impl Iterator for NullIter {
|
114
|
+
type Item = Option<u8>;
|
115
|
+
|
116
|
+
fn next(&mut self) -> Option<Self::Item> {
|
117
|
+
self.iter.next()
|
118
|
+
}
|
119
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
120
|
+
(self.n, Some(self.n))
|
121
|
+
}
|
122
|
+
}
|
123
|
+
impl ExactSizeIterator for NullIter {}
|
124
|
+
|
125
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
126
|
+
}
|
127
|
+
DataType::Unknown(_) => {
|
128
|
+
panic!("to_a not implemented for unknown")
|
129
|
+
}
|
130
|
+
DataType::BinaryOffset => {
|
131
|
+
unreachable!()
|
132
|
+
}
|
133
|
+
};
|
134
|
+
rblist
|
55
135
|
}
|
136
|
+
|
137
|
+
to_a_recursive(series)
|
56
138
|
}
|
57
139
|
}
|
@@ -5,7 +5,7 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, prelude::*,
|
8
|
+
use magnus::{exception, prelude::*, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
@@ -325,136 +325,6 @@ impl RbSeries {
|
|
325
325
|
self.series.borrow().len()
|
326
326
|
}
|
327
327
|
|
328
|
-
pub fn to_a(&self) -> Value {
|
329
|
-
let series = &self.series.borrow();
|
330
|
-
|
331
|
-
fn to_a_recursive(series: &Series) -> Value {
|
332
|
-
let rblist = match series.dtype() {
|
333
|
-
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
334
|
-
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
335
|
-
DataType::UInt16 => RArray::from_iter(series.u16().unwrap()).into_value(),
|
336
|
-
DataType::UInt32 => RArray::from_iter(series.u32().unwrap()).into_value(),
|
337
|
-
DataType::UInt64 => RArray::from_iter(series.u64().unwrap()).into_value(),
|
338
|
-
DataType::Int8 => RArray::from_iter(series.i8().unwrap()).into_value(),
|
339
|
-
DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
|
340
|
-
DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
|
341
|
-
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
342
|
-
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
343
|
-
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
344
|
-
DataType::Categorical(_, _) | DataType::Enum(_, _) => {
|
345
|
-
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
346
|
-
}
|
347
|
-
DataType::Object(_, _) => {
|
348
|
-
let v = RArray::with_capacity(series.len());
|
349
|
-
for i in 0..series.len() {
|
350
|
-
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
351
|
-
match obj {
|
352
|
-
Some(val) => v.push(val.to_object()).unwrap(),
|
353
|
-
None => v.push(qnil()).unwrap(),
|
354
|
-
};
|
355
|
-
}
|
356
|
-
v.into_value()
|
357
|
-
}
|
358
|
-
DataType::List(_) => {
|
359
|
-
let v = RArray::new();
|
360
|
-
let ca = series.list().unwrap();
|
361
|
-
for opt_s in unsafe { ca.amortized_iter() } {
|
362
|
-
match opt_s {
|
363
|
-
None => {
|
364
|
-
v.push(qnil()).unwrap();
|
365
|
-
}
|
366
|
-
Some(s) => {
|
367
|
-
let rblst = to_a_recursive(s.as_ref());
|
368
|
-
v.push(rblst).unwrap();
|
369
|
-
}
|
370
|
-
}
|
371
|
-
}
|
372
|
-
v.into_value()
|
373
|
-
}
|
374
|
-
DataType::Array(_, _) => {
|
375
|
-
let v = RArray::new();
|
376
|
-
let ca = series.array().unwrap();
|
377
|
-
for opt_s in ca.amortized_iter() {
|
378
|
-
match opt_s {
|
379
|
-
None => {
|
380
|
-
v.push(qnil()).unwrap();
|
381
|
-
}
|
382
|
-
Some(s) => {
|
383
|
-
let rblst = to_a_recursive(s.as_ref());
|
384
|
-
v.push(rblst).unwrap();
|
385
|
-
}
|
386
|
-
}
|
387
|
-
}
|
388
|
-
v.into_value()
|
389
|
-
}
|
390
|
-
DataType::Date => {
|
391
|
-
let ca = series.date().unwrap();
|
392
|
-
return Wrap(ca).into_value();
|
393
|
-
}
|
394
|
-
DataType::Time => {
|
395
|
-
let ca = series.time().unwrap();
|
396
|
-
return Wrap(ca).into_value();
|
397
|
-
}
|
398
|
-
DataType::Datetime(_, _) => {
|
399
|
-
let ca = series.datetime().unwrap();
|
400
|
-
return Wrap(ca).into_value();
|
401
|
-
}
|
402
|
-
DataType::Decimal(_, _) => {
|
403
|
-
let ca = series.decimal().unwrap();
|
404
|
-
return Wrap(ca).into_value();
|
405
|
-
}
|
406
|
-
DataType::String => {
|
407
|
-
let ca = series.str().unwrap();
|
408
|
-
return Wrap(ca).into_value();
|
409
|
-
}
|
410
|
-
DataType::Struct(_) => {
|
411
|
-
let ca = series.struct_().unwrap();
|
412
|
-
return Wrap(ca).into_value();
|
413
|
-
}
|
414
|
-
DataType::Duration(_) => {
|
415
|
-
let ca = series.duration().unwrap();
|
416
|
-
return Wrap(ca).into_value();
|
417
|
-
}
|
418
|
-
DataType::Binary => {
|
419
|
-
let ca = series.binary().unwrap();
|
420
|
-
return Wrap(ca).into_value();
|
421
|
-
}
|
422
|
-
DataType::Null => {
|
423
|
-
let null: Option<u8> = None;
|
424
|
-
let n = series.len();
|
425
|
-
let iter = std::iter::repeat(null).take(n);
|
426
|
-
use std::iter::{Repeat, Take};
|
427
|
-
struct NullIter {
|
428
|
-
iter: Take<Repeat<Option<u8>>>,
|
429
|
-
n: usize,
|
430
|
-
}
|
431
|
-
impl Iterator for NullIter {
|
432
|
-
type Item = Option<u8>;
|
433
|
-
|
434
|
-
fn next(&mut self) -> Option<Self::Item> {
|
435
|
-
self.iter.next()
|
436
|
-
}
|
437
|
-
fn size_hint(&self) -> (usize, Option<usize>) {
|
438
|
-
(self.n, Some(self.n))
|
439
|
-
}
|
440
|
-
}
|
441
|
-
impl ExactSizeIterator for NullIter {}
|
442
|
-
|
443
|
-
RArray::from_iter(NullIter { iter, n }).into_value()
|
444
|
-
}
|
445
|
-
DataType::Unknown => {
|
446
|
-
panic!("to_a not implemented for unknown")
|
447
|
-
}
|
448
|
-
DataType::BinaryOffset => {
|
449
|
-
unreachable!()
|
450
|
-
}
|
451
|
-
};
|
452
|
-
rblist
|
453
|
-
}
|
454
|
-
|
455
|
-
to_a_recursive(series)
|
456
|
-
}
|
457
|
-
|
458
328
|
pub fn clone(&self) -> Self {
|
459
329
|
RbSeries::new(self.series.borrow().clone())
|
460
330
|
}
|
@@ -13,6 +13,7 @@ module Polars
|
|
13
13
|
skip_rows: 0,
|
14
14
|
dtypes: nil,
|
15
15
|
null_values: nil,
|
16
|
+
missing_utf8_is_empty_string: false,
|
16
17
|
ignore_errors: false,
|
17
18
|
parse_dates: false,
|
18
19
|
n_threads: nil,
|
@@ -28,10 +29,12 @@ module Polars
|
|
28
29
|
sample_size: 1024,
|
29
30
|
eol_char: "\n",
|
30
31
|
new_columns: nil,
|
31
|
-
|
32
|
+
raise_if_empty: true,
|
33
|
+
truncate_ragged_lines: false,
|
34
|
+
decimal_comma: false
|
32
35
|
)
|
33
36
|
if Utils.pathlike?(file)
|
34
|
-
path = Utils.
|
37
|
+
path = Utils.normalize_filepath(file)
|
35
38
|
end
|
36
39
|
|
37
40
|
dtype_list = nil
|
@@ -72,12 +75,15 @@ module Polars
|
|
72
75
|
comment_char,
|
73
76
|
quote_char,
|
74
77
|
processed_null_values,
|
78
|
+
missing_utf8_is_empty_string,
|
75
79
|
parse_dates,
|
76
80
|
skip_rows_after_header,
|
77
81
|
Utils._prepare_row_count_args(row_count_name, row_count_offset),
|
78
82
|
sample_size,
|
79
83
|
eol_char,
|
80
|
-
|
84
|
+
raise_if_empty,
|
85
|
+
truncate_ragged_lines,
|
86
|
+
decimal_comma
|
81
87
|
)
|
82
88
|
self.new_columns = new_columns
|
83
89
|
end
|
data/lib/polars/convert.rb
CHANGED
@@ -27,7 +27,12 @@ module Polars
|
|
27
27
|
# # │ 2 ┆ 4 │
|
28
28
|
# # └─────┴─────┘
|
29
29
|
def from_hash(data, schema: nil, columns: nil)
|
30
|
-
|
30
|
+
Utils.wrap_df(
|
31
|
+
DataFrame.hash_to_rbdf(
|
32
|
+
data,
|
33
|
+
schema: schema || columns
|
34
|
+
)
|
35
|
+
)
|
31
36
|
end
|
32
37
|
|
33
38
|
# Construct a DataFrame from a sequence of dictionaries. This operation clones data.
|