polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -25,6 +25,48 @@ pub fn int_ranges(start: &RbExpr, end: &RbExpr, step: &RbExpr, dtype: Wrap<DataT
|
|
25
25
|
}
|
26
26
|
|
27
27
|
pub fn date_range(
|
28
|
+
start: &RbExpr,
|
29
|
+
end: &RbExpr,
|
30
|
+
interval: String,
|
31
|
+
closed: Wrap<ClosedWindow>,
|
32
|
+
) -> RbExpr {
|
33
|
+
let start = start.inner.clone();
|
34
|
+
let end = end.inner.clone();
|
35
|
+
let interval = Duration::parse(&interval);
|
36
|
+
let closed = closed.0;
|
37
|
+
dsl::date_range(start, end, interval, closed).into()
|
38
|
+
}
|
39
|
+
|
40
|
+
pub fn date_ranges(
|
41
|
+
start: &RbExpr,
|
42
|
+
end: &RbExpr,
|
43
|
+
interval: String,
|
44
|
+
closed: Wrap<ClosedWindow>,
|
45
|
+
) -> RbExpr {
|
46
|
+
let start = start.inner.clone();
|
47
|
+
let end = end.inner.clone();
|
48
|
+
let interval = Duration::parse(&interval);
|
49
|
+
let closed = closed.0;
|
50
|
+
dsl::date_ranges(start, end, interval, closed).into()
|
51
|
+
}
|
52
|
+
|
53
|
+
pub fn datetime_range(
|
54
|
+
start: &RbExpr,
|
55
|
+
end: &RbExpr,
|
56
|
+
every: String,
|
57
|
+
closed: Wrap<ClosedWindow>,
|
58
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
59
|
+
time_zone: Option<TimeZone>,
|
60
|
+
) -> RbExpr {
|
61
|
+
let start = start.inner.clone();
|
62
|
+
let end = end.inner.clone();
|
63
|
+
let every = Duration::parse(&every);
|
64
|
+
let closed = closed.0;
|
65
|
+
let time_unit = time_unit.map(|x| x.0);
|
66
|
+
dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()
|
67
|
+
}
|
68
|
+
|
69
|
+
pub fn datetime_ranges(
|
28
70
|
start: &RbExpr,
|
29
71
|
end: &RbExpr,
|
30
72
|
every: String,
|
@@ -37,5 +79,31 @@ pub fn date_range(
|
|
37
79
|
let every = Duration::parse(&every);
|
38
80
|
let closed = closed.0;
|
39
81
|
let time_unit = time_unit.map(|x| x.0);
|
40
|
-
dsl::
|
82
|
+
dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()
|
83
|
+
}
|
84
|
+
|
85
|
+
pub fn time_range(
|
86
|
+
start: &RbExpr,
|
87
|
+
end: &RbExpr,
|
88
|
+
every: String,
|
89
|
+
closed: Wrap<ClosedWindow>,
|
90
|
+
) -> RbExpr {
|
91
|
+
let start = start.inner.clone();
|
92
|
+
let end = end.inner.clone();
|
93
|
+
let every = Duration::parse(&every);
|
94
|
+
let closed = closed.0;
|
95
|
+
dsl::time_range(start, end, every, closed).into()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn time_ranges(
|
99
|
+
start: &RbExpr,
|
100
|
+
end: &RbExpr,
|
101
|
+
every: String,
|
102
|
+
closed: Wrap<ClosedWindow>,
|
103
|
+
) -> RbExpr {
|
104
|
+
let start = start.inner.clone();
|
105
|
+
let end = end.inner.clone();
|
106
|
+
let every = Duration::parse(&every);
|
107
|
+
let closed = closed.0;
|
108
|
+
dsl::time_ranges(start, end, every, closed).into()
|
41
109
|
}
|
@@ -0,0 +1 @@
|
|
1
|
+
pub mod numo;
|
@@ -0,0 +1,23 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars_core::utils::try_get_supertype;
|
3
|
+
|
4
|
+
use crate::dataframe::RbDataFrame;
|
5
|
+
|
6
|
+
impl RbDataFrame {
|
7
|
+
pub fn to_numo(&self) -> Option<Value> {
|
8
|
+
let mut st = None;
|
9
|
+
for s in self.df.borrow().iter() {
|
10
|
+
let dt_i = s.dtype();
|
11
|
+
match st {
|
12
|
+
None => st = Some(dt_i.clone()),
|
13
|
+
Some(ref mut st) => {
|
14
|
+
*st = try_get_supertype(st, dt_i).ok()?;
|
15
|
+
}
|
16
|
+
}
|
17
|
+
}
|
18
|
+
let _st = st?;
|
19
|
+
|
20
|
+
// TODO
|
21
|
+
None
|
22
|
+
}
|
23
|
+
}
|
@@ -0,0 +1,61 @@
|
|
1
|
+
use magnus::{class, prelude::*, Module, RArray, RClass, RModule, Value};
|
2
|
+
use polars::series::BitRepr;
|
3
|
+
use polars_core::prelude::*;
|
4
|
+
|
5
|
+
use crate::error::RbPolarsErr;
|
6
|
+
use crate::raise_err;
|
7
|
+
use crate::series::RbSeries;
|
8
|
+
use crate::RbResult;
|
9
|
+
|
10
|
+
impl RbSeries {
|
11
|
+
/// For numeric types, this should only be called for Series with null types.
|
12
|
+
/// This will cast to floats so that `nil = NAN`
|
13
|
+
pub fn to_numo(&self) -> RbResult<Value> {
|
14
|
+
let s = &self.series.borrow();
|
15
|
+
match s.dtype() {
|
16
|
+
DataType::String => {
|
17
|
+
let ca = s.str().unwrap();
|
18
|
+
|
19
|
+
// TODO make more efficient
|
20
|
+
let np_arr = RArray::from_iter(ca);
|
21
|
+
class::object()
|
22
|
+
.const_get::<_, RModule>("Numo")?
|
23
|
+
.const_get::<_, RClass>("RObject")?
|
24
|
+
.funcall("cast", (np_arr,))
|
25
|
+
}
|
26
|
+
dt if dt.is_numeric() => {
|
27
|
+
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
28
|
+
let s = s.cast(&DataType::Float64).unwrap();
|
29
|
+
let ca = s.f64().unwrap();
|
30
|
+
// TODO make more efficient
|
31
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
32
|
+
Some(v) => v,
|
33
|
+
None => f64::NAN,
|
34
|
+
}));
|
35
|
+
class::object()
|
36
|
+
.const_get::<_, RModule>("Numo")?
|
37
|
+
.const_get::<_, RClass>("DFloat")?
|
38
|
+
.funcall("cast", (np_arr,))
|
39
|
+
} else {
|
40
|
+
let s = s.cast(&DataType::Float32).unwrap();
|
41
|
+
let ca = s.f32().unwrap();
|
42
|
+
// TODO make more efficient
|
43
|
+
let np_arr = RArray::from_iter(ca.into_iter().map(|opt_v| match opt_v {
|
44
|
+
Some(v) => v,
|
45
|
+
None => f32::NAN,
|
46
|
+
}));
|
47
|
+
class::object()
|
48
|
+
.const_get::<_, RModule>("Numo")?
|
49
|
+
.const_get::<_, RClass>("SFloat")?
|
50
|
+
.funcall("cast", (np_arr,))
|
51
|
+
}
|
52
|
+
}
|
53
|
+
dt => {
|
54
|
+
raise_err!(
|
55
|
+
format!("'to_numo' not supported for dtype: {dt:?}"),
|
56
|
+
ComputeError
|
57
|
+
);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|