polars-df 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -3,17 +3,16 @@ mod arithmetic;
|
|
3
3
|
mod comparison;
|
4
4
|
mod construction;
|
5
5
|
mod export;
|
6
|
-
mod
|
6
|
+
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, Error, IntoValue, RArray, Value
|
8
|
+
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
12
12
|
|
13
|
-
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
14
13
|
use crate::apply_method_all_arrow_series2;
|
15
14
|
use crate::conversion::*;
|
16
|
-
use crate::series::
|
15
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
17
16
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
18
17
|
|
19
18
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -38,7 +37,7 @@ impl RbSeries {
|
|
38
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
39
38
|
let mut series = Vec::new();
|
40
39
|
for item in rs.each() {
|
41
|
-
series.push(
|
40
|
+
series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
|
42
41
|
}
|
43
42
|
Ok(series)
|
44
43
|
}
|
@@ -81,7 +80,7 @@ impl RbSeries {
|
|
81
80
|
|
82
81
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
83
82
|
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
84
|
-
if let DataType::
|
83
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
85
84
|
let v_trunc = &val[..val
|
86
85
|
.char_indices()
|
87
86
|
.take(str_lengths)
|
@@ -91,7 +90,7 @@ impl RbSeries {
|
|
91
90
|
if val == v_trunc {
|
92
91
|
val
|
93
92
|
} else {
|
94
|
-
format!("{}
|
93
|
+
format!("{}…", v_trunc)
|
95
94
|
}
|
96
95
|
} else {
|
97
96
|
val
|
@@ -274,15 +273,13 @@ impl RbSeries {
|
|
274
273
|
Ok(s.into())
|
275
274
|
}
|
276
275
|
|
277
|
-
pub fn
|
276
|
+
pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
278
277
|
if strict {
|
279
278
|
self.series.borrow().eq(&other.series.borrow())
|
280
279
|
} else if null_equal {
|
281
|
-
self.series
|
282
|
-
.borrow()
|
283
|
-
.series_equal_missing(&other.series.borrow())
|
280
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
284
281
|
} else {
|
285
|
-
self.series.borrow().
|
282
|
+
self.series.borrow().equals(&other.series.borrow())
|
286
283
|
}
|
287
284
|
}
|
288
285
|
|
@@ -316,16 +313,16 @@ impl RbSeries {
|
|
316
313
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
317
314
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
318
315
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
319
|
-
DataType::Categorical(_) => {
|
316
|
+
DataType::Categorical(_, _) => {
|
320
317
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
321
318
|
}
|
322
|
-
DataType::Object(_) => {
|
319
|
+
DataType::Object(_, _) => {
|
323
320
|
let v = RArray::with_capacity(series.len());
|
324
321
|
for i in 0..series.len() {
|
325
322
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
326
323
|
match obj {
|
327
324
|
Some(val) => v.push(val.to_object()).unwrap(),
|
328
|
-
None => v.push(
|
325
|
+
None => v.push(qnil()).unwrap(),
|
329
326
|
};
|
330
327
|
}
|
331
328
|
v.into_value()
|
@@ -333,10 +330,10 @@ impl RbSeries {
|
|
333
330
|
DataType::List(_) => {
|
334
331
|
let v = RArray::new();
|
335
332
|
let ca = series.list().unwrap();
|
336
|
-
for opt_s in ca.amortized_iter() {
|
333
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
337
334
|
match opt_s {
|
338
335
|
None => {
|
339
|
-
v.push(
|
336
|
+
v.push(qnil()).unwrap();
|
340
337
|
}
|
341
338
|
Some(s) => {
|
342
339
|
let rblst = to_a_recursive(s.as_ref());
|
@@ -352,7 +349,7 @@ impl RbSeries {
|
|
352
349
|
for opt_s in ca.amortized_iter() {
|
353
350
|
match opt_s {
|
354
351
|
None => {
|
355
|
-
v.push(
|
352
|
+
v.push(qnil()).unwrap();
|
356
353
|
}
|
357
354
|
Some(s) => {
|
358
355
|
let rblst = to_a_recursive(s.as_ref());
|
@@ -378,8 +375,8 @@ impl RbSeries {
|
|
378
375
|
let ca = series.decimal().unwrap();
|
379
376
|
return Wrap(ca).into_value();
|
380
377
|
}
|
381
|
-
DataType::
|
382
|
-
let ca = series.
|
378
|
+
DataType::String => {
|
379
|
+
let ca = series.str().unwrap();
|
383
380
|
return Wrap(ca).into_value();
|
384
381
|
}
|
385
382
|
DataType::Struct(_) => {
|
@@ -443,7 +440,7 @@ impl RbSeries {
|
|
443
440
|
|
444
441
|
macro_rules! dispatch_apply {
|
445
442
|
($self:expr, $method:ident, $($args:expr),*) => {
|
446
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
443
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
447
444
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
448
445
|
// ca.$method($($args),*)
|
449
446
|
todo!()
|
@@ -464,7 +461,7 @@ impl RbSeries {
|
|
464
461
|
DataType::Datetime(_, _)
|
465
462
|
| DataType::Date
|
466
463
|
| DataType::Duration(_)
|
467
|
-
| DataType::Categorical(_)
|
464
|
+
| DataType::Categorical(_, _)
|
468
465
|
| DataType::Time
|
469
466
|
) || !skip_nulls
|
470
467
|
{
|
@@ -605,12 +602,12 @@ impl RbSeries {
|
|
605
602
|
)?;
|
606
603
|
ca.into_datetime(tu, tz).into_series()
|
607
604
|
}
|
608
|
-
Some(DataType::
|
605
|
+
Some(DataType::String) => {
|
609
606
|
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
610
607
|
|
611
608
|
ca.into_series()
|
612
609
|
}
|
613
|
-
Some(DataType::Object(_)) => {
|
610
|
+
Some(DataType::Object(_, _)) => {
|
614
611
|
let ca =
|
615
612
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
616
613
|
ca.into_series()
|
@@ -643,14 +640,6 @@ impl RbSeries {
|
|
643
640
|
Ok(df.into())
|
644
641
|
}
|
645
642
|
|
646
|
-
pub fn peak_max(&self) -> Self {
|
647
|
-
self.series.borrow().peak_max().into_series().into()
|
648
|
-
}
|
649
|
-
|
650
|
-
pub fn peak_min(&self) -> Self {
|
651
|
-
self.series.borrow().peak_min().into_series().into()
|
652
|
-
}
|
653
|
-
|
654
643
|
pub fn n_unique(&self) -> RbResult<usize> {
|
655
644
|
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
656
645
|
Ok(n)
|
@@ -665,8 +654,13 @@ impl RbSeries {
|
|
665
654
|
self.series.borrow_mut().shrink_to_fit();
|
666
655
|
}
|
667
656
|
|
668
|
-
pub fn dot(&self, other: &RbSeries) ->
|
669
|
-
self
|
657
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
658
|
+
let out = self
|
659
|
+
.series
|
660
|
+
.borrow()
|
661
|
+
.dot(&other.series.borrow())
|
662
|
+
.map_err(RbPolarsErr::from)?;
|
663
|
+
Ok(out)
|
670
664
|
}
|
671
665
|
|
672
666
|
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
@@ -708,17 +702,6 @@ impl RbSeries {
|
|
708
702
|
None
|
709
703
|
}
|
710
704
|
}
|
711
|
-
|
712
|
-
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
713
|
-
let mut s = self.series.borrow_mut();
|
714
|
-
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
715
|
-
Ok(out) => {
|
716
|
-
*s = out;
|
717
|
-
Ok(())
|
718
|
-
}
|
719
|
-
Err(e) => Err(RbPolarsErr::from(e)),
|
720
|
-
}
|
721
|
-
}
|
722
705
|
}
|
723
706
|
|
724
707
|
macro_rules! impl_set_with_mask {
|
@@ -1,18 +1,34 @@
|
|
1
|
-
|
1
|
+
use polars::export::arrow::array::Array;
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
|
4
|
+
use crate::error::RbPolarsErr;
|
5
|
+
use crate::{RbResult, RbSeries};
|
6
|
+
|
7
|
+
impl RbSeries {
|
8
|
+
pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
9
|
+
let mut s = self.series.borrow_mut();
|
10
|
+
match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
11
|
+
Ok(out) => {
|
12
|
+
*s = out;
|
13
|
+
Ok(())
|
14
|
+
}
|
15
|
+
Err(e) => Err(RbPolarsErr::from(e)),
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
5
21
|
let logical_dtype = s.dtype().clone();
|
6
22
|
let idx = idx.cast(&IDX_DTYPE)?;
|
7
23
|
let idx = idx.rechunk();
|
8
24
|
let idx = idx.idx().unwrap();
|
9
25
|
let idx = idx.downcast_iter().next().unwrap();
|
10
26
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
27
|
+
if idx.null_count() > 0 {
|
28
|
+
return Err(PolarsError::ComputeError(
|
29
|
+
"index values should not be null".into(),
|
30
|
+
));
|
31
|
+
}
|
16
32
|
|
17
33
|
let idx = idx.values().as_slice();
|
18
34
|
|
@@ -27,62 +43,62 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
|
|
27
43
|
DataType::Int8 => {
|
28
44
|
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
29
45
|
let values = values.i8()?;
|
30
|
-
std::mem::take(ca).
|
46
|
+
std::mem::take(ca).scatter(idx, values)
|
31
47
|
}
|
32
48
|
DataType::Int16 => {
|
33
49
|
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
34
50
|
let values = values.i16()?;
|
35
|
-
std::mem::take(ca).
|
51
|
+
std::mem::take(ca).scatter(idx, values)
|
36
52
|
}
|
37
53
|
DataType::Int32 => {
|
38
54
|
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
39
55
|
let values = values.i32()?;
|
40
|
-
std::mem::take(ca).
|
56
|
+
std::mem::take(ca).scatter(idx, values)
|
41
57
|
}
|
42
58
|
DataType::Int64 => {
|
43
59
|
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
44
60
|
let values = values.i64()?;
|
45
|
-
std::mem::take(ca).
|
61
|
+
std::mem::take(ca).scatter(idx, values)
|
46
62
|
}
|
47
63
|
DataType::UInt8 => {
|
48
64
|
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
49
65
|
let values = values.u8()?;
|
50
|
-
std::mem::take(ca).
|
66
|
+
std::mem::take(ca).scatter(idx, values)
|
51
67
|
}
|
52
68
|
DataType::UInt16 => {
|
53
69
|
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
54
70
|
let values = values.u16()?;
|
55
|
-
std::mem::take(ca).
|
71
|
+
std::mem::take(ca).scatter(idx, values)
|
56
72
|
}
|
57
73
|
DataType::UInt32 => {
|
58
74
|
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
59
75
|
let values = values.u32()?;
|
60
|
-
std::mem::take(ca).
|
76
|
+
std::mem::take(ca).scatter(idx, values)
|
61
77
|
}
|
62
78
|
DataType::UInt64 => {
|
63
79
|
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
64
80
|
let values = values.u64()?;
|
65
|
-
std::mem::take(ca).
|
81
|
+
std::mem::take(ca).scatter(idx, values)
|
66
82
|
}
|
67
83
|
DataType::Float32 => {
|
68
84
|
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
69
85
|
let values = values.f32()?;
|
70
|
-
std::mem::take(ca).
|
86
|
+
std::mem::take(ca).scatter(idx, values)
|
71
87
|
}
|
72
88
|
DataType::Float64 => {
|
73
89
|
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
74
90
|
let values = values.f64()?;
|
75
|
-
std::mem::take(ca).
|
91
|
+
std::mem::take(ca).scatter(idx, values)
|
76
92
|
}
|
77
93
|
DataType::Boolean => {
|
78
94
|
let ca = s.bool()?;
|
79
95
|
let values = values.bool()?;
|
80
|
-
ca.
|
96
|
+
ca.scatter(idx, values)
|
81
97
|
}
|
82
|
-
DataType::
|
83
|
-
let ca = s.
|
84
|
-
let values = values.
|
85
|
-
ca.
|
98
|
+
DataType::String => {
|
99
|
+
let ca = s.str()?;
|
100
|
+
let values = values.str()?;
|
101
|
+
ca.scatter(idx, values)
|
86
102
|
}
|
87
103
|
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
88
104
|
};
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::sql::SQLContext;
|
2
|
+
use std::cell::RefCell;
|
3
|
+
|
4
|
+
use crate::{RbLazyFrame, RbPolarsErr, RbResult};
|
5
|
+
|
6
|
+
#[magnus::wrap(class = "Polars::RbSQLContext")]
|
7
|
+
#[repr(transparent)]
|
8
|
+
#[derive(Clone)]
|
9
|
+
pub struct RbSQLContext {
|
10
|
+
pub context: RefCell<SQLContext>,
|
11
|
+
}
|
12
|
+
|
13
|
+
#[allow(
|
14
|
+
clippy::wrong_self_convention,
|
15
|
+
clippy::should_implement_trait,
|
16
|
+
clippy::len_without_is_empty
|
17
|
+
)]
|
18
|
+
impl RbSQLContext {
|
19
|
+
#[allow(clippy::new_without_default)]
|
20
|
+
pub fn new() -> RbSQLContext {
|
21
|
+
RbSQLContext {
|
22
|
+
context: SQLContext::new().into(),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
|
27
|
+
Ok(self
|
28
|
+
.context
|
29
|
+
.borrow_mut()
|
30
|
+
.execute(&query)
|
31
|
+
.map_err(RbPolarsErr::from)?
|
32
|
+
.into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn get_tables(&self) -> RbResult<Vec<String>> {
|
36
|
+
Ok(self.context.borrow().get_tables())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
+
self.context.borrow_mut().register(&name, lf.ldf.clone())
|
41
|
+
}
|
42
|
+
|
43
|
+
pub fn unregister(&self, name: String) {
|
44
|
+
self.context.borrow_mut().unregister(&name)
|
45
|
+
}
|
46
|
+
}
|
data/ext/polars/src/utils.rs
CHANGED
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
23
23
|
($self:expr, $method:ident, $($args:expr),*) => {
|
24
24
|
match $self.dtype() {
|
25
25
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
-
DataType::
|
26
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
27
27
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
28
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
29
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|