polars-df 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/series.rs
CHANGED
@@ -5,15 +5,14 @@ mod construction;
|
|
5
5
|
mod export;
|
6
6
|
mod set_at_idx;
|
7
7
|
|
8
|
-
use magnus::{exception, Error, IntoValue, RArray, Value
|
8
|
+
use magnus::{exception, prelude::*, value::qnil, Error, IntoValue, RArray, Value};
|
9
9
|
use polars::prelude::*;
|
10
10
|
use polars::series::IsSorted;
|
11
11
|
use std::cell::RefCell;
|
12
12
|
|
13
|
-
use crate::apply::series::{call_lambda_and_extract, ApplyLambda};
|
14
13
|
use crate::apply_method_all_arrow_series2;
|
15
14
|
use crate::conversion::*;
|
16
|
-
use crate::series::
|
15
|
+
use crate::map::series::{call_lambda_and_extract, ApplyLambda};
|
17
16
|
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
18
17
|
|
19
18
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -38,7 +37,7 @@ impl RbSeries {
|
|
38
37
|
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
39
38
|
let mut series = Vec::new();
|
40
39
|
for item in rs.each() {
|
41
|
-
series.push(
|
40
|
+
series.push(<&RbSeries>::try_convert(item?)?.series.borrow().clone());
|
42
41
|
}
|
43
42
|
Ok(series)
|
44
43
|
}
|
@@ -303,7 +302,7 @@ impl RbSeries {
|
|
303
302
|
pub fn to_a(&self) -> Value {
|
304
303
|
let series = &self.series.borrow();
|
305
304
|
|
306
|
-
fn
|
305
|
+
fn to_a_recursive(series: &Series) -> Value {
|
307
306
|
let rblist = match series.dtype() {
|
308
307
|
DataType::Boolean => RArray::from_iter(series.bool().unwrap()).into_value(),
|
309
308
|
DataType::UInt8 => RArray::from_iter(series.u8().unwrap()).into_value(),
|
@@ -325,7 +324,7 @@ impl RbSeries {
|
|
325
324
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
326
325
|
match obj {
|
327
326
|
Some(val) => v.push(val.to_object()).unwrap(),
|
328
|
-
None => v.push(
|
327
|
+
None => v.push(qnil()).unwrap(),
|
329
328
|
};
|
330
329
|
}
|
331
330
|
v.into_value()
|
@@ -333,13 +332,29 @@ impl RbSeries {
|
|
333
332
|
DataType::List(_) => {
|
334
333
|
let v = RArray::new();
|
335
334
|
let ca = series.list().unwrap();
|
335
|
+
for opt_s in unsafe { ca.amortized_iter() } {
|
336
|
+
match opt_s {
|
337
|
+
None => {
|
338
|
+
v.push(qnil()).unwrap();
|
339
|
+
}
|
340
|
+
Some(s) => {
|
341
|
+
let rblst = to_a_recursive(s.as_ref());
|
342
|
+
v.push(rblst).unwrap();
|
343
|
+
}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
v.into_value()
|
347
|
+
}
|
348
|
+
DataType::Array(_, _) => {
|
349
|
+
let v = RArray::new();
|
350
|
+
let ca = series.array().unwrap();
|
336
351
|
for opt_s in ca.amortized_iter() {
|
337
352
|
match opt_s {
|
338
353
|
None => {
|
339
|
-
v.push(
|
354
|
+
v.push(qnil()).unwrap();
|
340
355
|
}
|
341
356
|
Some(s) => {
|
342
|
-
let rblst =
|
357
|
+
let rblst = to_a_recursive(s.as_ref());
|
343
358
|
v.push(rblst).unwrap();
|
344
359
|
}
|
345
360
|
}
|
@@ -347,18 +362,20 @@ impl RbSeries {
|
|
347
362
|
v.into_value()
|
348
363
|
}
|
349
364
|
DataType::Date => {
|
350
|
-
let
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
365
|
+
let ca = series.date().unwrap();
|
366
|
+
return Wrap(ca).into_value();
|
367
|
+
}
|
368
|
+
DataType::Time => {
|
369
|
+
let ca = series.time().unwrap();
|
370
|
+
return Wrap(ca).into_value();
|
355
371
|
}
|
356
372
|
DataType::Datetime(_, _) => {
|
357
|
-
let
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
373
|
+
let ca = series.datetime().unwrap();
|
374
|
+
return Wrap(ca).into_value();
|
375
|
+
}
|
376
|
+
DataType::Decimal(_, _) => {
|
377
|
+
let ca = series.decimal().unwrap();
|
378
|
+
return Wrap(ca).into_value();
|
362
379
|
}
|
363
380
|
DataType::Utf8 => {
|
364
381
|
let ca = series.utf8().unwrap();
|
@@ -376,15 +393,37 @@ impl RbSeries {
|
|
376
393
|
let ca = series.binary().unwrap();
|
377
394
|
return Wrap(ca).into_value();
|
378
395
|
}
|
379
|
-
DataType::Null
|
396
|
+
DataType::Null => {
|
397
|
+
let null: Option<u8> = None;
|
398
|
+
let n = series.len();
|
399
|
+
let iter = std::iter::repeat(null).take(n);
|
400
|
+
use std::iter::{Repeat, Take};
|
401
|
+
struct NullIter {
|
402
|
+
iter: Take<Repeat<Option<u8>>>,
|
403
|
+
n: usize,
|
404
|
+
}
|
405
|
+
impl Iterator for NullIter {
|
406
|
+
type Item = Option<u8>;
|
407
|
+
|
408
|
+
fn next(&mut self) -> Option<Self::Item> {
|
409
|
+
self.iter.next()
|
410
|
+
}
|
411
|
+
fn size_hint(&self) -> (usize, Option<usize>) {
|
412
|
+
(self.n, Some(self.n))
|
413
|
+
}
|
414
|
+
}
|
415
|
+
impl ExactSizeIterator for NullIter {}
|
416
|
+
|
417
|
+
RArray::from_iter(NullIter { iter, n }).into_value()
|
418
|
+
}
|
419
|
+
DataType::Unknown => {
|
380
420
|
panic!("to_a not implemented for null/unknown")
|
381
421
|
}
|
382
|
-
_ => todo!(),
|
383
422
|
};
|
384
423
|
rblist
|
385
424
|
}
|
386
425
|
|
387
|
-
|
426
|
+
to_a_recursive(series)
|
388
427
|
}
|
389
428
|
|
390
429
|
pub fn clone(&self) -> Self {
|
@@ -594,23 +633,15 @@ impl RbSeries {
|
|
594
633
|
Ok(RbSeries::new(s))
|
595
634
|
}
|
596
635
|
|
597
|
-
pub fn to_dummies(&self, sep: Option<String
|
636
|
+
pub fn to_dummies(&self, sep: Option<String>, drop_first: bool) -> RbResult<RbDataFrame> {
|
598
637
|
let df = self
|
599
638
|
.series
|
600
639
|
.borrow()
|
601
|
-
.to_dummies(sep.as_deref())
|
640
|
+
.to_dummies(sep.as_deref(), drop_first)
|
602
641
|
.map_err(RbPolarsErr::from)?;
|
603
642
|
Ok(df.into())
|
604
643
|
}
|
605
644
|
|
606
|
-
pub fn peak_max(&self) -> Self {
|
607
|
-
self.series.borrow().peak_max().into_series().into()
|
608
|
-
}
|
609
|
-
|
610
|
-
pub fn peak_min(&self) -> Self {
|
611
|
-
self.series.borrow().peak_min().into_series().into()
|
612
|
-
}
|
613
|
-
|
614
645
|
pub fn n_unique(&self) -> RbResult<usize> {
|
615
646
|
let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
|
616
647
|
Ok(n)
|
@@ -668,17 +699,6 @@ impl RbSeries {
|
|
668
699
|
None
|
669
700
|
}
|
670
701
|
}
|
671
|
-
|
672
|
-
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
673
|
-
let mut s = self.series.borrow_mut();
|
674
|
-
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
675
|
-
Ok(out) => {
|
676
|
-
*s = out;
|
677
|
-
Ok(())
|
678
|
-
}
|
679
|
-
Err(e) => Err(RbPolarsErr::from(e)),
|
680
|
-
}
|
681
|
-
}
|
682
702
|
}
|
683
703
|
|
684
704
|
macro_rules! impl_set_with_mask {
|
@@ -0,0 +1,46 @@
|
|
1
|
+
use polars::sql::SQLContext;
|
2
|
+
use std::cell::RefCell;
|
3
|
+
|
4
|
+
use crate::{RbLazyFrame, RbPolarsErr, RbResult};
|
5
|
+
|
6
|
+
#[magnus::wrap(class = "Polars::RbSQLContext")]
|
7
|
+
#[repr(transparent)]
|
8
|
+
#[derive(Clone)]
|
9
|
+
pub struct RbSQLContext {
|
10
|
+
pub context: RefCell<SQLContext>,
|
11
|
+
}
|
12
|
+
|
13
|
+
#[allow(
|
14
|
+
clippy::wrong_self_convention,
|
15
|
+
clippy::should_implement_trait,
|
16
|
+
clippy::len_without_is_empty
|
17
|
+
)]
|
18
|
+
impl RbSQLContext {
|
19
|
+
#[allow(clippy::new_without_default)]
|
20
|
+
pub fn new() -> RbSQLContext {
|
21
|
+
RbSQLContext {
|
22
|
+
context: SQLContext::new().into(),
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
|
27
|
+
Ok(self
|
28
|
+
.context
|
29
|
+
.borrow_mut()
|
30
|
+
.execute(&query)
|
31
|
+
.map_err(RbPolarsErr::from)?
|
32
|
+
.into())
|
33
|
+
}
|
34
|
+
|
35
|
+
pub fn get_tables(&self) -> RbResult<Vec<String>> {
|
36
|
+
Ok(self.context.borrow().get_tables())
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
40
|
+
self.context.borrow_mut().register(&name, lf.ldf.clone())
|
41
|
+
}
|
42
|
+
|
43
|
+
pub fn unregister(&self, name: String) {
|
44
|
+
self.context.borrow_mut().unregister(&name)
|
45
|
+
}
|
46
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Polars
|
2
|
+
# Namespace for array related expressions.
|
3
|
+
class ArrayExpr
|
4
|
+
# @private
|
5
|
+
attr_accessor :_rbexpr
|
6
|
+
|
7
|
+
# @private
|
8
|
+
def initialize(expr)
|
9
|
+
self._rbexpr = expr._rbexpr
|
10
|
+
end
|
11
|
+
|
12
|
+
# Compute the min values of the sub-arrays.
|
13
|
+
#
|
14
|
+
# @return [Expr]
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# df = Polars::DataFrame.new(
|
18
|
+
# {"a" => [[1, 2], [4, 3]]},
|
19
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
20
|
+
# )
|
21
|
+
# df.select(Polars.col("a").arr.min)
|
22
|
+
# # =>
|
23
|
+
# # shape: (2, 1)
|
24
|
+
# # ┌─────┐
|
25
|
+
# # │ a │
|
26
|
+
# # │ --- │
|
27
|
+
# # │ i64 │
|
28
|
+
# # ╞═════╡
|
29
|
+
# # │ 1 │
|
30
|
+
# # │ 3 │
|
31
|
+
# # └─────┘
|
32
|
+
def min
|
33
|
+
Utils.wrap_expr(_rbexpr.array_min)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Compute the max values of the sub-arrays.
|
37
|
+
#
|
38
|
+
# @return [Expr]
|
39
|
+
#
|
40
|
+
# @example
|
41
|
+
# df = Polars::DataFrame.new(
|
42
|
+
# {"a" => [[1, 2], [4, 3]]},
|
43
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
44
|
+
# )
|
45
|
+
# df.select(Polars.col("a").arr.max)
|
46
|
+
# # =>
|
47
|
+
# # shape: (2, 1)
|
48
|
+
# # ┌─────┐
|
49
|
+
# # │ a │
|
50
|
+
# # │ --- │
|
51
|
+
# # │ i64 │
|
52
|
+
# # ╞═════╡
|
53
|
+
# # │ 2 │
|
54
|
+
# # │ 4 │
|
55
|
+
# # └─────┘
|
56
|
+
def max
|
57
|
+
Utils.wrap_expr(_rbexpr.array_max)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Compute the sum values of the sub-arrays.
|
61
|
+
#
|
62
|
+
# @return [Expr]
|
63
|
+
#
|
64
|
+
# @example
|
65
|
+
# df = Polars::DataFrame.new(
|
66
|
+
# {"a" => [[1, 2], [4, 3]]},
|
67
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
68
|
+
# )
|
69
|
+
# df.select(Polars.col("a").arr.sum)
|
70
|
+
# # =>
|
71
|
+
# # shape: (2, 1)
|
72
|
+
# # ┌─────┐
|
73
|
+
# # │ a │
|
74
|
+
# # │ --- │
|
75
|
+
# # │ i64 │
|
76
|
+
# # ╞═════╡
|
77
|
+
# # │ 3 │
|
78
|
+
# # │ 7 │
|
79
|
+
# # └─────┘
|
80
|
+
def sum
|
81
|
+
Utils.wrap_expr(_rbexpr.array_sum)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Polars
|
2
|
+
# Series.arr namespace.
|
3
|
+
class ArrayNameSpace
|
4
|
+
include ExprDispatch
|
5
|
+
|
6
|
+
self._accessor = "arr"
|
7
|
+
|
8
|
+
# @private
|
9
|
+
def initialize(series)
|
10
|
+
self._s = series._s
|
11
|
+
end
|
12
|
+
|
13
|
+
# Compute the min values of the sub-arrays.
|
14
|
+
#
|
15
|
+
# @return [Series]
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
# s = Polars::Series.new(
|
19
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
20
|
+
# )
|
21
|
+
# s.arr.min
|
22
|
+
# # =>
|
23
|
+
# # shape: (2,)
|
24
|
+
# # Series: 'a' [i64]
|
25
|
+
# # [
|
26
|
+
# # 1
|
27
|
+
# # 3
|
28
|
+
# # ]
|
29
|
+
def min
|
30
|
+
super
|
31
|
+
end
|
32
|
+
|
33
|
+
# Compute the max values of the sub-arrays.
|
34
|
+
#
|
35
|
+
# @return [Series]
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# s = Polars::Series.new(
|
39
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(2, Polars::Int64)
|
40
|
+
# )
|
41
|
+
# s.arr.max
|
42
|
+
# # =>
|
43
|
+
# # shape: (2,)
|
44
|
+
# # Series: 'a' [i64]
|
45
|
+
# # [
|
46
|
+
# # 2
|
47
|
+
# # 4
|
48
|
+
# # ]
|
49
|
+
def max
|
50
|
+
super
|
51
|
+
end
|
52
|
+
|
53
|
+
# Compute the sum values of the sub-arrays.
|
54
|
+
#
|
55
|
+
# @return [Series]
|
56
|
+
#
|
57
|
+
# @example
|
58
|
+
# df = Polars::DataFrame.new(
|
59
|
+
# {"a" => [[1, 2], [4, 3]]},
|
60
|
+
# schema: {"a" => Polars::Array.new(2, Polars::Int64)}
|
61
|
+
# )
|
62
|
+
# df.select(Polars.col("a").arr.sum)
|
63
|
+
# # =>
|
64
|
+
# # shape: (2, 1)
|
65
|
+
# # ┌─────┐
|
66
|
+
# # │ a │
|
67
|
+
# # │ --- │
|
68
|
+
# # │ i64 │
|
69
|
+
# # ╞═════╡
|
70
|
+
# # │ 3 │
|
71
|
+
# # │ 7 │
|
72
|
+
# # └─────┘
|
73
|
+
def sum
|
74
|
+
super
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|