polars-df 0.10.0 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -25,6 +25,48 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
|
|
25
25
|
}
|
26
26
|
|
27
27
|
pub fn date_range(
|
28
|
+
start: &RbExpr,
|
29
|
+
end: &RbExpr,
|
30
|
+
interval: String,
|
31
|
+
closed: Wrap<ClosedWindow>,
|
32
|
+
) -> RbExpr {
|
33
|
+
let start = start.inner.clone();
|
34
|
+
let end = end.inner.clone();
|
35
|
+
let interval = Duration::parse(&interval);
|
36
|
+
let closed = closed.0;
|
37
|
+
dsl::date_range(start, end, interval, closed).into()
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn date_ranges(
|
41
|
+
start: &RbExpr,
|
42
|
+
end: &RbExpr,
|
43
|
+
interval: String,
|
44
|
+
closed: Wrap<ClosedWindow>,
|
45
|
+
) -> RbExpr {
|
46
|
+
let start = start.inner.clone();
|
47
|
+
let end = end.inner.clone();
|
48
|
+
let interval = Duration::parse(&interval);
|
49
|
+
let closed = closed.0;
|
50
|
+
dsl::date_ranges(start, end, interval, closed).into()
|
51
|
+
}
|
52
|
+
|
53
|
+
pub fn datetime_range(
|
54
|
+
start: &RbExpr,
|
55
|
+
end: &RbExpr,
|
56
|
+
every: String,
|
57
|
+
closed: Wrap<ClosedWindow>,
|
58
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
59
|
+
time_zone: Option<TimeZone>,
|
60
|
+
) -> RbExpr {
|
61
|
+
let start = start.inner.clone();
|
62
|
+
let end = end.inner.clone();
|
63
|
+
let every = Duration::parse(&every);
|
64
|
+
let closed = closed.0;
|
65
|
+
let time_unit = time_unit.map(|x| x.0);
|
66
|
+
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
67
|
+
}
|
68
|
+
|
69
|
+
pub fn datetime_ranges(
|
28
70
|
start: &RbExpr,
|
29
71
|
end: &RbExpr,
|
30
72
|
every: String,
|
@@ -37,5 +79,31 @@ pub fn date_range(
|
|
37
79
|
let every = Duration::parse(&every);
|
38
80
|
let closed = closed.0;
|
39
81
|
let time_unit = time_unit.map(|x| x.0);
|
40
|
-
dsl::
|
82
|
+
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
83
|
+
}
|
84
|
+
|
85
|
+
pub fn time_range(
|
86
|
+
start: &RbExpr,
|
87
|
+
end: &RbExpr,
|
88
|
+
every: String,
|
89
|
+
closed: Wrap<ClosedWindow>,
|
90
|
+
) -> RbExpr {
|
91
|
+
let start = start.inner.clone();
|
92
|
+
let end = end.inner.clone();
|
93
|
+
let every = Duration::parse(&every);
|
94
|
+
let closed = closed.0;
|
95
|
+
dsl::time_range(start, end, every, closed).into()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn time_ranges(
|
99
|
+
start: &RbExpr,
|
100
|
+
end: &RbExpr,
|
101
|
+
every: String,
|
102
|
+
closed: Wrap<ClosedWindow>,
|
103
|
+
) -> RbExpr {
|
104
|
+
let start = start.inner.clone();
|
105
|
+
let end = end.inner.clone();
|
106
|
+
let every = Duration::parse(&every);
|
107
|
+
let closed = closed.0;
|
108
|
+
dsl::time_ranges(start, end, every, closed).into()
|
41
109
|
}
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod numo;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars_core::utils::try_get_supertype;
|
3
|
+
|
4
|
+
use crate::dataframe::RbDataFrame;
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn to_numo(&self) -> Option<Value> {
|
8
|
+
let mut st = None;
|
9
|
+
for s in self.df.borrow().iter() {
|
10
|
+
let dt_i = s.dtype();
|
11
|
+
match st {
|
12
|
+
None => st = Some(dt_i.clone()),
|
13
|
+
Some(ref mut st) => {
|
14
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
18
|
+
let _st = st?;
|
19
|
+
|
20
|
+
// TODO
|
21
|
+
None
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars::series::BitRepr;
|
3
|
+
use polars_core::prelude::*;
|
4
|
+
|
5
|
+
use crate::error::RbPolarsErr;
|
6
|
+
use crate::raise_err;
|
7
|
+
use crate::series::RbSeries;
|
8
|
+
use crate::RbResult;
|
9
|
+
|
10
|
+
impl RbSeries {
|
11
|
+
/// For numeric types, this should only be called for Series with null types.
|
12
|
+
/// This will cast to floats so that `nil = NAN`
|
13
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
14
|
+
let s = &self.series.borrow();
|
15
|
+
match s.dtype() {
|
16
|
+
DataType::String => {
|
17
|
+
let ca = s.str().unwrap();
|
18
|
+
|
19
|
+
// TODO make more efficient
|
20
|
+
let np_arr = RArray::from_iter(ca);
|
21
|
+
class::object()
|
22
|
+
.const_get::<_, RModule>("Numo")?
|
23
|
+
.const_get::<_, RClass>("RObject")?
|
24
|
+
.funcall("cast", (np_arr,))
|
25
|
+
}
|
26
|
+
dt if dt.is_numeric() => {
|
27
|
+
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
28
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
29
|
+
let ca = s.f64().unwrap();
|
30
|
+
// TODO make more efficient
|
31
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
32
|
+
Some(v) => v,
|
33
|
+
None => f64::NAN,
|
34
|
+
}));
|
35
|
+
class::object()
|
36
|
+
.const_get::<_, RModule>("Numo")?
|
37
|
+
.const_get::<_, RClass>("DFloat")?
|
38
|
+
.funcall("cast", (np_arr,))
|
39
|
+
} else {
|
40
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
41
|
+
let ca = s.f32().unwrap();
|
42
|
+
// TODO make more efficient
|
43
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
44
|
+
Some(v) => v,
|
45
|
+
None => f32::NAN,
|
46
|
+
}));
|
47
|
+
class::object()
|
48
|
+
.const_get::<_, RModule>("Numo")?
|
49
|
+
.const_get::<_, RClass>("SFloat")?
|
50
|
+
.funcall("cast", (np_arr,))
|
51
|
+
}
|
52
|
+
}
|
53
|
+
dt => {
|
54
|
+
raise_err!(
|
55
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
56
|
+
ComputeError
|
57
|
+
);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|