polars-df 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/series.rs
CHANGED
@@ -5,15 +5,14 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod set_at_idx;
|
7
7
|
|
8
|
-
use magnus::{exception, Error, IntoValue, RArray, Value
|
8
|
+
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
12
12
|
|
13
|
-
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
14
13
|
use crate::apply_method_all_arrow_series2;
|
15
14
|
use crate::conversion::*;
|
16
|
-
use crate::series::
|
15
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
17
16
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
18
17
|
|
19
18
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -38,7 +37,7 @@ impl RbSeries {
|
|
38
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
39
38
|
let mut series = Vec::new();
|
40
39
|
for item in rs.each() {
|
41
|
-
series.push(
|
40
|
+
series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
|
42
41
|
}
|
43
42
|
Ok(series)
|
44
43
|
}
|
@@ -303,7 +302,7 @@ impl RbSeries {
|
|
303
302
|
pub fn to_a(&self) -> Value {
|
304
303
|
let series = &self.series.borrow();
|
305
304
|
|
306
|
-
fn
|
305
|
+
fn to_a_recursive(series: &Series) -> Value {
|
307
306
|
let rblist = match series.dtype() {
|
308
307
|
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
309
308
|
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
@@ -325,7 +324,7 @@ impl RbSeries {
|
|
325
324
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
326
325
|
match obj {
|
327
326
|
Some(val) => v.push(val.to_object()).unwrap(),
|
328
|
-
None => v.push(
|
327
|
+
None => v.push(qnil()).unwrap(),
|
329
328
|
};
|
330
329
|
}
|
331
330
|
v.into_value()
|
@@ -333,13 +332,29 @@ impl RbSeries {
|
|
333
332
|
DataType::List(_) => {
|
334
333
|
let v = RArray::new();
|
335
334
|
let ca = series.list().unwrap();
|
335
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
336
|
+
match opt_s {
|
337
|
+
None => {
|
338
|
+
v.push(qnil()).unwrap();
|
339
|
+
}
|
340
|
+
Some(s) => {
|
341
|
+
let rblst = to_a_recursive(s.as_ref());
|
342
|
+
v.push(rblst).unwrap();
|
343
|
+
}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
v.into_value()
|
347
|
+
}
|
348
|
+
DataType::Array(_, _) => {
|
349
|
+
let v = RArray::new();
|
350
|
+
let ca = series.array().unwrap();
|
336
351
|
for opt_s in ca.amortized_iter() {
|
337
352
|
match opt_s {
|
338
353
|
None => {
|
339
|
-
v.push(
|
354
|
+
v.push(qnil()).unwrap();
|
340
355
|
}
|
341
356
|
Some(s) => {
|
342
|
-
let rblst =
|
357
|
+
let rblst = to_a_recursive(s.as_ref());
|
343
358
|
v.push(rblst).unwrap();
|
344
359
|
}
|
345
360
|
}
|
@@ -347,18 +362,20 @@ impl RbSeries {
|
|
347
362
|
v.into_value()
|
348
363
|
}
|
349
364
|
DataType::Date => {
|
350
|
-
let
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
365
|
+
let ca = series.date().unwrap();
|
366
|
+
return Wrap(ca).into_value();
|
367
|
+
}
|
368
|
+
DataType::Time => {
|
369
|
+
let ca = series.time().unwrap();
|
370
|
+
return Wrap(ca).into_value();
|
355
371
|
}
|
356
372
|
DataType::Datetime(_, _) => {
|
357
|
-
let
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
373
|
+
let ca = series.datetime().unwrap();
|
374
|
+
return Wrap(ca).into_value();
|
375
|
+
}
|
376
|
+
DataType::Decimal(_, _) => {
|
377
|
+
let ca = series.decimal().unwrap();
|
378
|
+
return Wrap(ca).into_value();
|
362
379
|
}
|
363
380
|
DataType::Utf8 => {
|
364
381
|
let ca = series.utf8().unwrap();
|
@@ -376,15 +393,37 @@ impl RbSeries {
|
|
376
393
|
let ca = series.binary().unwrap();
|
377
394
|
return Wrap(ca).into_value();
|
378
395
|
}
|
379
|
-
DataType::Null
|
396
|
+
DataType::Null => {
|
397
|
+
let null: Option<u8> = None;
|
398
|
+
let n = series.len();
|
399
|
+
let iter = std::iter::repeat(null).take(n);
|
400
|
+
use std::iter::{Repeat, Take};
|
401
|
+
struct NullIter {
|
402
|
+
iter: Take<Repeat<Option<u8>>>,
|
403
|
+
n: usize,
|
404
|
+
}
|
405
|
+
impl Iterator for NullIter {
|
406
|
+
type Item = Option<u8>;
|
407
|
+
|
408
|
+
fn next(&mut self) -> Option<Self::Item> {
|
409
|
+
self.iter.next()
|
410
|
+
}
|
411
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
412
|
+
(self.n, Some(self.n))
|
413
|
+
}
|
414
|
+
}
|
415
|
+
impl ExactSizeIterator for NullIter {}
|
416
|
+
|
417
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
418
|
+
}
|
419
|
+
DataType::Unknown => {
|
380
420
|
panic!("to_a not implemented for null/unknown")
|
381
421
|
}
|
382
|
-
_ => todo!(),
|
383
422
|
};
|
384
423
|
rblist
|
385
424
|
}
|
386
425
|
|
387
|
-
|
426
|
+
to_a_recursive(series)
|
388
427
|
}
|
389
428
|
|
390
429
|
pub fn clone(&self) -> Self {
|
@@ -594,23 +633,15 @@ impl RbSeries {
|
|
594
633
|
Ok(RbSeries::new(s))
|
595
634
|
}
|
596
635
|
|
597
|
-
pub fn to_dummies(&self, sep: Option<String
|
636
|
+
pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
|
598
637
|
let df = self
|
599
638
|
.series
|
600
639
|
.borrow()
|
601
|
-
.to_dummies(sep.as_deref())
|
640
|
+
.to_dummies(sep.as_deref(), drop_first)
|
602
641
|
.map_err(RbPolarsErr::from)?;
|
603
642
|
Ok(df.into())
|
604
643
|
}
|
605
644
|
|
606
|
-
pub fn peak_max(&self) -> Self {
|
607
|
-
self.series.borrow().peak_max().into_series().into()
|
608
|
-
}
|
609
|
-
|
610
|
-
pub fn peak_min(&self) -> Self {
|
611
|
-
self.series.borrow().peak_min().into_series().into()
|
612
|
-
}
|
613
|
-
|
614
645
|
pub fn n_unique(&self) -> RbResult<usize> {
|
615
646
|
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
616
647
|
Ok(n)
|
@@ -668,17 +699,6 @@ impl RbSeries {
|
|
668
699
|
None
|
669
700
|
}
|
670
701
|
}
|
671
|
-
|
672
|
-
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
673
|
-
let mut s = self.series.borrow_mut();
|
674
|
-
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
675
|
-
Ok(out) => {
|
676
|
-
*s = out;
|
677
|
-
Ok(())
|
678
|
-
}
|
679
|
-
Err(e) => Err(RbPolarsErr::from(e)),
|
680
|
-
}
|
681
|
-
}
|
682
702
|
}
|
683
703
|
|
684
704
|
macro_rules! impl_set_with_mask {
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::sql::SQLContext;
|
2
|
+
use std::cell::RefCell;
|
3
|
+
|
4
|
+
use crate::{RbLazyFrame, RbPolarsErr, RbResult};
|
5
|
+
|
6
|
+
#[magnus::wrap(class = "Polars::RbSQLContext")]
|
7
|
+
#[repr(transparent)]
|
8
|
+
#[derive(Clone)]
|
9
|
+
pub struct RbSQLContext {
|
10
|
+
pub context: RefCell<SQLContext>,
|
11
|
+
}
|
12
|
+
|
13
|
+
#[allow(
|
14
|
+
clippy::wrong_self_convention,
|
15
|
+
clippy::should_implement_trait,
|
16
|
+
clippy::len_without_is_empty
|
17
|
+
)]
|
18
|
+
impl RbSQLContext {
|
19
|
+
#[allow(clippy::new_without_default)]
|
20
|
+
pub fn new() -> RbSQLContext {
|
21
|
+
RbSQLContext {
|
22
|
+
context: SQLContext::new().into(),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
|
27
|
+
Ok(self
|
28
|
+
.context
|
29
|
+
.borrow_mut()
|
30
|
+
.execute(&query)
|
31
|
+
.map_err(RbPolarsErr::from)?
|
32
|
+
.into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn get_tables(&self) -> RbResult<Vec<String>> {
|
36
|
+
Ok(self.context.borrow().get_tables())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
+
self.context.borrow_mut().register(&name, lf.ldf.clone())
|
41
|
+
}
|
42
|
+
|
43
|
+
pub fn unregister(&self, name: String) {
|
44
|
+
self.context.borrow_mut().unregister(&name)
|
45
|
+
}
|
46
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for array related expressions.
|
3
|
+
class ArrayExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Compute the min values of the sub-arrays.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.min)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ i64 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 1 │
|
30
|
+
# # │ 3 │
|
31
|
+
# # └─────┘
|
32
|
+
def min
|
33
|
+
Utils.wrap_expr(_rbexpr.array_min)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Compute the max values of the sub-arrays.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# df = Polars::DataFrame.new(
|
42
|
+
# {"a" => [[1, 2], [4, 3]]},
|
43
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("a").arr.max)
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ a │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ i64 │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ 2 │
|
54
|
+
# # │ 4 │
|
55
|
+
# # └─────┘
|
56
|
+
def max
|
57
|
+
Utils.wrap_expr(_rbexpr.array_max)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compute the sum values of the sub-arrays.
|
61
|
+
#
|
62
|
+
# @return [Expr]
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# df = Polars::DataFrame.new(
|
66
|
+
# {"a" => [[1, 2], [4, 3]]},
|
67
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
68
|
+
# )
|
69
|
+
# df.select(Polars.col("a").arr.sum)
|
70
|
+
# # =>
|
71
|
+
# # shape: (2, 1)
|
72
|
+
# # ┌─────┐
|
73
|
+
# # │ a │
|
74
|
+
# # │ --- │
|
75
|
+
# # │ i64 │
|
76
|
+
# # ╞═════╡
|
77
|
+
# # │ 3 │
|
78
|
+
# # │ 7 │
|
79
|
+
# # └─────┘
|
80
|
+
def sum
|
81
|
+
Utils.wrap_expr(_rbexpr.array_sum)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.arr namespace.
|
3
|
+
class ArrayNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "arr"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Compute the min values of the sub-arrays.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# s = Polars::Series.new(
|
19
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
20
|
+
# )
|
21
|
+
# s.arr.min
|
22
|
+
# # =>
|
23
|
+
# # shape: (2,)
|
24
|
+
# # Series: 'a' [i64]
|
25
|
+
# # [
|
26
|
+
# # 1
|
27
|
+
# # 3
|
28
|
+
# # ]
|
29
|
+
def min
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Compute the max values of the sub-arrays.
|
34
|
+
#
|
35
|
+
# @return [Series]
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# s = Polars::Series.new(
|
39
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
40
|
+
# )
|
41
|
+
# s.arr.max
|
42
|
+
# # =>
|
43
|
+
# # shape: (2,)
|
44
|
+
# # Series: 'a' [i64]
|
45
|
+
# # [
|
46
|
+
# # 2
|
47
|
+
# # 4
|
48
|
+
# # ]
|
49
|
+
def max
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
# Compute the sum values of the sub-arrays.
|
54
|
+
#
|
55
|
+
# @return [Series]
|
56
|
+
#
|
57
|
+
# @example
|
58
|
+
# df = Polars::DataFrame.new(
|
59
|
+
# {"a" => [[1, 2], [4, 3]]},
|
60
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
61
|
+
# )
|
62
|
+
# df.select(Polars.col("a").arr.sum)
|
63
|
+
# # =>
|
64
|
+
# # shape: (2, 1)
|
65
|
+
# # ┌─────┐
|
66
|
+
# # │ a │
|
67
|
+
# # │ --- │
|
68
|
+
# # │ i64 │
|
69
|
+
# # ╞═════╡
|
70
|
+
# # │ 3 │
|
71
|
+
# # │ 7 │
|
72
|
+
# # └─────┘
|
73
|
+
def sum
|
74
|
+
super
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|