polars-df 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -3,17 +3,16 @@ mod arithmetic;
|
|
3
3
|
mod comparison;
|
4
4
|
mod construction;
|
5
5
|
mod export;
|
6
|
-
mod
|
6
|
+
mod scatter;
|
7
7
|
|
8
|
-
use magnus::{exception, Error, IntoValue, RArray, Value
|
8
|
+
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
12
12
|
|
13
|
-
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
14
13
|
use crate::apply_method_all_arrow_series2;
|
15
14
|
use crate::conversion::*;
|
16
|
-
use crate::series::
|
15
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
17
16
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
18
17
|
|
19
18
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -38,7 +37,7 @@ impl RbSeries {
|
|
38
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
39
38
|
let mut series = Vec::new();
|
40
39
|
for item in rs.each() {
|
41
|
-
series.push(
|
40
|
+
series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
|
42
41
|
}
|
43
42
|
Ok(series)
|
44
43
|
}
|
@@ -81,7 +80,7 @@ impl RbSeries {
|
|
81
80
|
|
82
81
|
pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
|
83
82
|
let val = format!("{}", self.series.borrow().get(index).unwrap());
|
84
|
-
if let DataType::
|
83
|
+
if let DataType::String | DataType::Categorical(_, _) = self.series.borrow().dtype() {
|
85
84
|
let v_trunc = &val[..val
|
86
85
|
.char_indices()
|
87
86
|
.take(str_lengths)
|
@@ -91,7 +90,7 @@ impl RbSeries {
|
|
91
90
|
if val == v_trunc {
|
92
91
|
val
|
93
92
|
} else {
|
94
|
-
format!("{}
|
93
|
+
format!("{}…", v_trunc)
|
95
94
|
}
|
96
95
|
} else {
|
97
96
|
val
|
@@ -274,15 +273,13 @@ impl RbSeries {
|
|
274
273
|
Ok(s.into())
|
275
274
|
}
|
276
275
|
|
277
|
-
pub fn
|
276
|
+
pub fn equals(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool {
|
278
277
|
if strict {
|
279
278
|
self.series.borrow().eq(&other.series.borrow())
|
280
279
|
} else if null_equal {
|
281
|
-
self.series
|
282
|
-
.borrow()
|
283
|
-
.series_equal_missing(&other.series.borrow())
|
280
|
+
self.series.borrow().equals_missing(&other.series.borrow())
|
284
281
|
} else {
|
285
|
-
self.series.borrow().
|
282
|
+
self.series.borrow().equals(&other.series.borrow())
|
286
283
|
}
|
287
284
|
}
|
288
285
|
|
@@ -316,16 +313,16 @@ impl RbSeries {
|
|
316
313
|
DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
|
317
314
|
DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
|
318
315
|
DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
|
319
|
-
DataType::Categorical(_) => {
|
316
|
+
DataType::Categorical(_, _) => {
|
320
317
|
RArray::from_iter(series.categorical().unwrap().iter_str()).into_value()
|
321
318
|
}
|
322
|
-
DataType::Object(_) => {
|
319
|
+
DataType::Object(_, _) => {
|
323
320
|
let v = RArray::with_capacity(series.len());
|
324
321
|
for i in 0..series.len() {
|
325
322
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
326
323
|
match obj {
|
327
324
|
Some(val) => v.push(val.to_object()).unwrap(),
|
328
|
-
None => v.push(
|
325
|
+
None => v.push(qnil()).unwrap(),
|
329
326
|
};
|
330
327
|
}
|
331
328
|
v.into_value()
|
@@ -333,10 +330,10 @@ impl RbSeries {
|
|
333
330
|
DataType::List(_) => {
|
334
331
|
let v = RArray::new();
|
335
332
|
let ca = series.list().unwrap();
|
336
|
-
for opt_s in ca.amortized_iter() {
|
333
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
337
334
|
match opt_s {
|
338
335
|
None => {
|
339
|
-
v.push(
|
336
|
+
v.push(qnil()).unwrap();
|
340
337
|
}
|
341
338
|
Some(s) => {
|
342
339
|
let rblst = to_a_recursive(s.as_ref());
|
@@ -352,7 +349,7 @@ impl RbSeries {
|
|
352
349
|
for opt_s in ca.amortized_iter() {
|
353
350
|
match opt_s {
|
354
351
|
None => {
|
355
|
-
v.push(
|
352
|
+
v.push(qnil()).unwrap();
|
356
353
|
}
|
357
354
|
Some(s) => {
|
358
355
|
let rblst = to_a_recursive(s.as_ref());
|
@@ -378,8 +375,8 @@ impl RbSeries {
|
|
378
375
|
let ca = series.decimal().unwrap();
|
379
376
|
return Wrap(ca).into_value();
|
380
377
|
}
|
381
|
-
DataType::
|
382
|
-
let ca = series.
|
378
|
+
DataType::String => {
|
379
|
+
let ca = series.str().unwrap();
|
383
380
|
return Wrap(ca).into_value();
|
384
381
|
}
|
385
382
|
DataType::Struct(_) => {
|
@@ -443,7 +440,7 @@ impl RbSeries {
|
|
443
440
|
|
444
441
|
macro_rules! dispatch_apply {
|
445
442
|
($self:expr, $method:ident, $($args:expr),*) => {
|
446
|
-
if matches!($self.dtype(), DataType::Object(_)) {
|
443
|
+
if matches!($self.dtype(), DataType::Object(_, _)) {
|
447
444
|
// let ca = $self.0.unpack::<ObjectType<ObjectValue>>().unwrap();
|
448
445
|
// ca.$method($($args),*)
|
449
446
|
todo!()
|
@@ -464,7 +461,7 @@ impl RbSeries {
|
|
464
461
|
DataType::Datetime(_, _)
|
465
462
|
| DataType::Date
|
466
463
|
| DataType::Duration(_)
|
467
|
-
| DataType::Categorical(_)
|
464
|
+
| DataType::Categorical(_, _)
|
468
465
|
| DataType::Time
|
469
466
|
) || !skip_nulls
|
470
467
|
{
|
@@ -605,12 +602,12 @@ impl RbSeries {
|
|
605
602
|
)?;
|
606
603
|
ca.into_datetime(tu, tz).into_series()
|
607
604
|
}
|
608
|
-
Some(DataType::
|
605
|
+
Some(DataType::String) => {
|
609
606
|
let ca = dispatch_apply!(series, apply_lambda_with_utf8_out_type, lambda, 0, None)?;
|
610
607
|
|
611
608
|
ca.into_series()
|
612
609
|
}
|
613
|
-
Some(DataType::Object(_)) => {
|
610
|
+
Some(DataType::Object(_, _)) => {
|
614
611
|
let ca =
|
615
612
|
dispatch_apply!(series, apply_lambda_with_object_out_type, lambda, 0, None)?;
|
616
613
|
ca.into_series()
|
@@ -643,14 +640,6 @@ impl RbSeries {
|
|
643
640
|
Ok(df.into())
|
644
641
|
}
|
645
642
|
|
646
|
-
pub fn peak_max(&self) -> Self {
|
647
|
-
self.series.borrow().peak_max().into_series().into()
|
648
|
-
}
|
649
|
-
|
650
|
-
pub fn peak_min(&self) -> Self {
|
651
|
-
self.series.borrow().peak_min().into_series().into()
|
652
|
-
}
|
653
|
-
|
654
643
|
pub fn n_unique(&self) -> RbResult<usize> {
|
655
644
|
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
656
645
|
Ok(n)
|
@@ -665,8 +654,13 @@ impl RbSeries {
|
|
665
654
|
self.series.borrow_mut().shrink_to_fit();
|
666
655
|
}
|
667
656
|
|
668
|
-
pub fn dot(&self, other: &RbSeries) ->
|
669
|
-
self
|
657
|
+
pub fn dot(&self, other: &RbSeries) -> RbResult<f64> {
|
658
|
+
let out = self
|
659
|
+
.series
|
660
|
+
.borrow()
|
661
|
+
.dot(&other.series.borrow())
|
662
|
+
.map_err(RbPolarsErr::from)?;
|
663
|
+
Ok(out)
|
670
664
|
}
|
671
665
|
|
672
666
|
pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
|
@@ -708,17 +702,6 @@ impl RbSeries {
|
|
708
702
|
None
|
709
703
|
}
|
710
704
|
}
|
711
|
-
|
712
|
-
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
713
|
-
let mut s = self.series.borrow_mut();
|
714
|
-
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
715
|
-
Ok(out) => {
|
716
|
-
*s = out;
|
717
|
-
Ok(())
|
718
|
-
}
|
719
|
-
Err(e) => Err(RbPolarsErr::from(e)),
|
720
|
-
}
|
721
|
-
}
|
722
705
|
}
|
723
706
|
|
724
707
|
macro_rules! impl_set_with_mask {
|
@@ -1,18 +1,34 @@
|
|
1
|
-
|
1
|
+
use polars::export::arrow::array::Array;
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
|
4
|
+
use crate::error::RbPolarsErr;
|
5
|
+
use crate::{RbResult, RbSeries};
|
6
|
+
|
7
|
+
impl RbSeries {
|
8
|
+
pub fn scatter(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
9
|
+
let mut s = self.series.borrow_mut();
|
10
|
+
match scatter(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
11
|
+
Ok(out) => {
|
12
|
+
*s = out;
|
13
|
+
Ok(())
|
14
|
+
}
|
15
|
+
Err(e) => Err(RbPolarsErr::from(e)),
|
16
|
+
}
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
fn scatter(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
5
21
|
let logical_dtype = s.dtype().clone();
|
6
22
|
let idx = idx.cast(&IDX_DTYPE)?;
|
7
23
|
let idx = idx.rechunk();
|
8
24
|
let idx = idx.idx().unwrap();
|
9
25
|
let idx = idx.downcast_iter().next().unwrap();
|
10
26
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
27
|
+
if idx.null_count() > 0 {
|
28
|
+
return Err(PolarsError::ComputeError(
|
29
|
+
"index values should not be null".into(),
|
30
|
+
));
|
31
|
+
}
|
16
32
|
|
17
33
|
let idx = idx.values().as_slice();
|
18
34
|
|
@@ -27,62 +43,62 @@ pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<
|
|
27
43
|
DataType::Int8 => {
|
28
44
|
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
29
45
|
let values = values.i8()?;
|
30
|
-
std::mem::take(ca).
|
46
|
+
std::mem::take(ca).scatter(idx, values)
|
31
47
|
}
|
32
48
|
DataType::Int16 => {
|
33
49
|
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
34
50
|
let values = values.i16()?;
|
35
|
-
std::mem::take(ca).
|
51
|
+
std::mem::take(ca).scatter(idx, values)
|
36
52
|
}
|
37
53
|
DataType::Int32 => {
|
38
54
|
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
39
55
|
let values = values.i32()?;
|
40
|
-
std::mem::take(ca).
|
56
|
+
std::mem::take(ca).scatter(idx, values)
|
41
57
|
}
|
42
58
|
DataType::Int64 => {
|
43
59
|
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
44
60
|
let values = values.i64()?;
|
45
|
-
std::mem::take(ca).
|
61
|
+
std::mem::take(ca).scatter(idx, values)
|
46
62
|
}
|
47
63
|
DataType::UInt8 => {
|
48
64
|
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
49
65
|
let values = values.u8()?;
|
50
|
-
std::mem::take(ca).
|
66
|
+
std::mem::take(ca).scatter(idx, values)
|
51
67
|
}
|
52
68
|
DataType::UInt16 => {
|
53
69
|
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
54
70
|
let values = values.u16()?;
|
55
|
-
std::mem::take(ca).
|
71
|
+
std::mem::take(ca).scatter(idx, values)
|
56
72
|
}
|
57
73
|
DataType::UInt32 => {
|
58
74
|
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
59
75
|
let values = values.u32()?;
|
60
|
-
std::mem::take(ca).
|
76
|
+
std::mem::take(ca).scatter(idx, values)
|
61
77
|
}
|
62
78
|
DataType::UInt64 => {
|
63
79
|
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
64
80
|
let values = values.u64()?;
|
65
|
-
std::mem::take(ca).
|
81
|
+
std::mem::take(ca).scatter(idx, values)
|
66
82
|
}
|
67
83
|
DataType::Float32 => {
|
68
84
|
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
69
85
|
let values = values.f32()?;
|
70
|
-
std::mem::take(ca).
|
86
|
+
std::mem::take(ca).scatter(idx, values)
|
71
87
|
}
|
72
88
|
DataType::Float64 => {
|
73
89
|
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
74
90
|
let values = values.f64()?;
|
75
|
-
std::mem::take(ca).
|
91
|
+
std::mem::take(ca).scatter(idx, values)
|
76
92
|
}
|
77
93
|
DataType::Boolean => {
|
78
94
|
let ca = s.bool()?;
|
79
95
|
let values = values.bool()?;
|
80
|
-
ca.
|
96
|
+
ca.scatter(idx, values)
|
81
97
|
}
|
82
|
-
DataType::
|
83
|
-
let ca = s.
|
84
|
-
let values = values.
|
85
|
-
ca.
|
98
|
+
DataType::String => {
|
99
|
+
let ca = s.str()?;
|
100
|
+
let values = values.str()?;
|
101
|
+
ca.scatter(idx, values)
|
86
102
|
}
|
87
103
|
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
88
104
|
};
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::sql::SQLContext;
|
2
|
+
use std::cell::RefCell;
|
3
|
+
|
4
|
+
use crate::{RbLazyFrame, RbPolarsErr, RbResult};
|
5
|
+
|
6
|
+
#[magnus::wrap(class = "Polars::RbSQLContext")]
|
7
|
+
#[repr(transparent)]
|
8
|
+
#[derive(Clone)]
|
9
|
+
pub struct RbSQLContext {
|
10
|
+
pub context: RefCell<SQLContext>,
|
11
|
+
}
|
12
|
+
|
13
|
+
#[allow(
|
14
|
+
clippy::wrong_self_convention,
|
15
|
+
clippy::should_implement_trait,
|
16
|
+
clippy::len_without_is_empty
|
17
|
+
)]
|
18
|
+
impl RbSQLContext {
|
19
|
+
#[allow(clippy::new_without_default)]
|
20
|
+
pub fn new() -> RbSQLContext {
|
21
|
+
RbSQLContext {
|
22
|
+
context: SQLContext::new().into(),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
|
27
|
+
Ok(self
|
28
|
+
.context
|
29
|
+
.borrow_mut()
|
30
|
+
.execute(&query)
|
31
|
+
.map_err(RbPolarsErr::from)?
|
32
|
+
.into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn get_tables(&self) -> RbResult<Vec<String>> {
|
36
|
+
Ok(self.context.borrow().get_tables())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
+
self.context.borrow_mut().register(&name, lf.ldf.clone())
|
41
|
+
}
|
42
|
+
|
43
|
+
pub fn unregister(&self, name: String) {
|
44
|
+
self.context.borrow_mut().unregister(&name)
|
45
|
+
}
|
46
|
+
}
|
data/ext/polars/src/utils.rs
CHANGED
@@ -23,7 +23,7 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
23
23
|
($self:expr, $method:ident, $($args:expr),*) => {
|
24
24
|
match $self.dtype() {
|
25
25
|
DataType::Boolean => $self.bool().unwrap().$method($($args),*),
|
26
|
-
DataType::
|
26
|
+
DataType::String => $self.str().unwrap().$method($($args),*),
|
27
27
|
DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
|
28
28
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
29
29
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|