polars-df 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +106 -4
- data/ext/polars/src/dataframe.rs +19 -17
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +933 -0
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +205 -303
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +99 -539
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +26 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::dsl::lit;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use polars::series::ops::NullBehavior;
|
5
|
+
|
6
|
+
use crate::conversion::Wrap;
|
7
|
+
use crate::{RbExpr, RbResult};
|
8
|
+
|
9
|
+
impl RbExpr {
|
10
|
+
pub fn list_arg_max(&self) -> Self {
|
11
|
+
self.inner.clone().arr().arg_max().into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn list_arg_min(&self) -> Self {
|
15
|
+
self.inner.clone().arr().arg_min().into()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn list_contains(&self, other: &RbExpr) -> Self {
|
19
|
+
self.inner
|
20
|
+
.clone()
|
21
|
+
.arr()
|
22
|
+
.contains(other.inner.clone())
|
23
|
+
.into()
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn list_count_match(&self, expr: &RbExpr) -> Self {
|
27
|
+
self.inner
|
28
|
+
.clone()
|
29
|
+
.arr()
|
30
|
+
.count_match(expr.inner.clone())
|
31
|
+
.into()
|
32
|
+
}
|
33
|
+
|
34
|
+
pub fn list_diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> RbResult<Self> {
|
35
|
+
Ok(self.inner.clone().arr().diff(n, null_behavior.0).into())
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn list_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
|
39
|
+
self.inner
|
40
|
+
.clone()
|
41
|
+
.arr()
|
42
|
+
.eval(expr.inner.clone(), parallel)
|
43
|
+
.into()
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn list_get(&self, index: &RbExpr) -> Self {
|
47
|
+
self.inner.clone().arr().get(index.inner.clone()).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn list_join(&self, separator: String) -> Self {
|
51
|
+
self.inner.clone().arr().join(&separator).into()
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn list_lengths(&self) -> Self {
|
55
|
+
self.inner.clone().arr().lengths().into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn list_max(&self) -> Self {
|
59
|
+
self.inner.clone().arr().max().into()
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn list_mean(&self) -> Self {
|
63
|
+
self.inner.clone().arr().mean().with_fmt("arr.mean").into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn list_min(&self) -> Self {
|
67
|
+
self.inner.clone().arr().min().into()
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn list_reverse(&self) -> Self {
|
71
|
+
self.inner.clone().arr().reverse().into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn list_shift(&self, periods: i64) -> Self {
|
75
|
+
self.inner.clone().arr().shift(periods).into()
|
76
|
+
}
|
77
|
+
|
78
|
+
pub fn list_slice(&self, offset: &RbExpr, length: Option<&RbExpr>) -> Self {
|
79
|
+
let length = match length {
|
80
|
+
Some(i) => i.inner.clone(),
|
81
|
+
None => lit(i64::MAX),
|
82
|
+
};
|
83
|
+
self.inner
|
84
|
+
.clone()
|
85
|
+
.arr()
|
86
|
+
.slice(offset.inner.clone(), length)
|
87
|
+
.into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn list_sort(&self, reverse: bool) -> Self {
|
91
|
+
self.inner
|
92
|
+
.clone()
|
93
|
+
.arr()
|
94
|
+
.sort(SortOptions {
|
95
|
+
descending: reverse,
|
96
|
+
..Default::default()
|
97
|
+
})
|
98
|
+
.with_fmt("arr.sort")
|
99
|
+
.into()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn list_sum(&self) -> Self {
|
103
|
+
self.inner.clone().arr().sum().with_fmt("arr.sum").into()
|
104
|
+
}
|
105
|
+
|
106
|
+
pub fn list_take(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
107
|
+
self.inner
|
108
|
+
.clone()
|
109
|
+
.arr()
|
110
|
+
.take(index.inner.clone(), null_on_oob)
|
111
|
+
.into()
|
112
|
+
}
|
113
|
+
|
114
|
+
pub fn list_to_struct(
|
115
|
+
&self,
|
116
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
117
|
+
_name_gen: Option<Value>,
|
118
|
+
upper_bound: usize,
|
119
|
+
) -> RbResult<Self> {
|
120
|
+
// TODO fix
|
121
|
+
let name_gen = None;
|
122
|
+
// let name_gen = name_gen.map(|lambda| {
|
123
|
+
// Arc::new(move |idx: usize| {
|
124
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
125
|
+
// out.try_convert::<String>().unwrap()
|
126
|
+
// }) as NameGenerator
|
127
|
+
// });
|
128
|
+
|
129
|
+
Ok(self
|
130
|
+
.inner
|
131
|
+
.clone()
|
132
|
+
.arr()
|
133
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
134
|
+
.into())
|
135
|
+
}
|
136
|
+
|
137
|
+
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
138
|
+
let e = self.inner.clone();
|
139
|
+
|
140
|
+
if maintain_order {
|
141
|
+
e.arr().unique_stable().into()
|
142
|
+
} else {
|
143
|
+
e.arr().unique().into()
|
144
|
+
}
|
145
|
+
}
|
146
|
+
}
|
@@ -1,6 +1,12 @@
|
|
1
|
-
use
|
1
|
+
use magnus::RArray;
|
2
|
+
|
3
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
2
4
|
|
3
5
|
impl RbExpr {
|
6
|
+
pub fn meta_eq(&self, other: &RbExpr) -> bool {
|
7
|
+
self.inner == other.inner
|
8
|
+
}
|
9
|
+
|
4
10
|
pub fn meta_pop(&self) -> RArray {
|
5
11
|
RArray::from_iter(
|
6
12
|
self.inner
|
@@ -12,11 +18,7 @@ impl RbExpr {
|
|
12
18
|
)
|
13
19
|
}
|
14
20
|
|
15
|
-
pub fn
|
16
|
-
self.inner == other.inner
|
17
|
-
}
|
18
|
-
|
19
|
-
pub fn meta_roots(&self) -> Vec<String> {
|
21
|
+
pub fn meta_root_names(&self) -> Vec<String> {
|
20
22
|
self.inner
|
21
23
|
.clone()
|
22
24
|
.meta()
|
@@ -39,4 +41,12 @@ impl RbExpr {
|
|
39
41
|
pub fn meta_undo_aliases(&self) -> RbExpr {
|
40
42
|
self.inner.clone().meta().undo_aliases().into()
|
41
43
|
}
|
44
|
+
|
45
|
+
pub fn meta_has_multiple_outputs(&self) -> bool {
|
46
|
+
self.inner.clone().meta().has_multiple_outputs()
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn meta_is_regex_projection(&self) -> bool {
|
50
|
+
self.inner.clone().meta().is_regex_projection()
|
51
|
+
}
|
42
52
|
}
|
@@ -0,0 +1,313 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::RbExpr;
|
5
|
+
|
6
|
+
impl RbExpr {
|
7
|
+
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
+
self.inner.clone().str().concat(&delimiter).into()
|
9
|
+
}
|
10
|
+
|
11
|
+
pub fn str_to_date(
|
12
|
+
&self,
|
13
|
+
format: Option<String>,
|
14
|
+
strict: bool,
|
15
|
+
exact: bool,
|
16
|
+
cache: bool,
|
17
|
+
) -> Self {
|
18
|
+
let options = StrptimeOptions {
|
19
|
+
format,
|
20
|
+
strict,
|
21
|
+
exact,
|
22
|
+
cache,
|
23
|
+
..Default::default()
|
24
|
+
};
|
25
|
+
self.inner.clone().str().to_date(options).into()
|
26
|
+
}
|
27
|
+
|
28
|
+
#[allow(clippy::too_many_arguments)]
|
29
|
+
pub fn str_to_datetime(
|
30
|
+
&self,
|
31
|
+
format: Option<String>,
|
32
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
33
|
+
time_zone: Option<TimeZone>,
|
34
|
+
strict: bool,
|
35
|
+
exact: bool,
|
36
|
+
cache: bool,
|
37
|
+
utc: bool,
|
38
|
+
tz_aware: bool,
|
39
|
+
) -> Self {
|
40
|
+
let options = StrptimeOptions {
|
41
|
+
format,
|
42
|
+
strict,
|
43
|
+
exact,
|
44
|
+
cache,
|
45
|
+
tz_aware,
|
46
|
+
utc,
|
47
|
+
};
|
48
|
+
self.inner
|
49
|
+
.clone()
|
50
|
+
.str()
|
51
|
+
.to_datetime(time_unit.map(|tu| tu.0), time_zone, options)
|
52
|
+
.into()
|
53
|
+
}
|
54
|
+
|
55
|
+
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
56
|
+
let options = StrptimeOptions {
|
57
|
+
format,
|
58
|
+
strict,
|
59
|
+
cache,
|
60
|
+
exact: true,
|
61
|
+
..Default::default()
|
62
|
+
};
|
63
|
+
self.inner.clone().str().to_time(options).into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn str_strip(&self, matches: Option<String>) -> Self {
|
67
|
+
self.inner.clone().str().strip(matches).into()
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn str_rstrip(&self, matches: Option<String>) -> Self {
|
71
|
+
self.inner.clone().str().rstrip(matches).into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn str_lstrip(&self, matches: Option<String>) -> Self {
|
75
|
+
self.inner.clone().str().lstrip(matches).into()
|
76
|
+
}
|
77
|
+
|
78
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
79
|
+
let function = move |s: Series| {
|
80
|
+
let ca = s.utf8()?;
|
81
|
+
Ok(Some(ca.str_slice(start, length)?.into_series()))
|
82
|
+
};
|
83
|
+
self.clone()
|
84
|
+
.inner
|
85
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
86
|
+
.with_fmt("str.slice")
|
87
|
+
.into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn str_to_uppercase(&self) -> Self {
|
91
|
+
self.inner.clone().str().to_uppercase().into()
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn str_to_lowercase(&self) -> Self {
|
95
|
+
self.inner.clone().str().to_lowercase().into()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn str_lengths(&self) -> Self {
|
99
|
+
let function = |s: Series| {
|
100
|
+
let ca = s.utf8()?;
|
101
|
+
Ok(Some(ca.str_lengths().into_series()))
|
102
|
+
};
|
103
|
+
self.clone()
|
104
|
+
.inner
|
105
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
106
|
+
.with_fmt("str.lengths")
|
107
|
+
.into()
|
108
|
+
}
|
109
|
+
|
110
|
+
pub fn str_n_chars(&self) -> Self {
|
111
|
+
let function = |s: Series| {
|
112
|
+
let ca = s.utf8()?;
|
113
|
+
Ok(Some(ca.str_n_chars().into_series()))
|
114
|
+
};
|
115
|
+
self.clone()
|
116
|
+
.inner
|
117
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
118
|
+
.with_fmt("str.n_chars")
|
119
|
+
.into()
|
120
|
+
}
|
121
|
+
|
122
|
+
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
123
|
+
self.inner
|
124
|
+
.clone()
|
125
|
+
.str()
|
126
|
+
.replace_n(pat.inner.clone(), val.inner.clone(), literal, n)
|
127
|
+
.into()
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn str_replace_all(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
|
131
|
+
self.inner
|
132
|
+
.clone()
|
133
|
+
.str()
|
134
|
+
.replace_all(pat.inner.clone(), val.inner.clone(), literal)
|
135
|
+
.into()
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
139
|
+
self.clone().inner.str().zfill(alignment).into()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn str_ljust(&self, width: usize, fillchar: char) -> Self {
|
143
|
+
self.clone().inner.str().ljust(width, fillchar).into()
|
144
|
+
}
|
145
|
+
|
146
|
+
pub fn str_rjust(&self, width: usize, fillchar: char) -> Self {
|
147
|
+
self.clone().inner.str().rjust(width, fillchar).into()
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
151
|
+
match literal {
|
152
|
+
Some(true) => self
|
153
|
+
.inner
|
154
|
+
.clone()
|
155
|
+
.str()
|
156
|
+
.contains_literal(pat.inner.clone())
|
157
|
+
.into(),
|
158
|
+
_ => self
|
159
|
+
.inner
|
160
|
+
.clone()
|
161
|
+
.str()
|
162
|
+
.contains(pat.inner.clone(), strict)
|
163
|
+
.into(),
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
|
168
|
+
self.inner.clone().str().ends_with(sub.inner.clone()).into()
|
169
|
+
}
|
170
|
+
|
171
|
+
pub fn str_starts_with(&self, sub: &RbExpr) -> Self {
|
172
|
+
self.inner
|
173
|
+
.clone()
|
174
|
+
.str()
|
175
|
+
.starts_with(sub.inner.clone())
|
176
|
+
.into()
|
177
|
+
}
|
178
|
+
|
179
|
+
pub fn str_hex_encode(&self) -> Self {
|
180
|
+
self.clone()
|
181
|
+
.inner
|
182
|
+
.map(
|
183
|
+
move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
|
184
|
+
GetOutput::same_type(),
|
185
|
+
)
|
186
|
+
.with_fmt("str.hex_encode")
|
187
|
+
.into()
|
188
|
+
}
|
189
|
+
|
190
|
+
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
191
|
+
self.clone()
|
192
|
+
.inner
|
193
|
+
.map(
|
194
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
195
|
+
GetOutput::same_type(),
|
196
|
+
)
|
197
|
+
.with_fmt("str.hex_decode")
|
198
|
+
.into()
|
199
|
+
}
|
200
|
+
|
201
|
+
pub fn str_base64_encode(&self) -> Self {
|
202
|
+
self.clone()
|
203
|
+
.inner
|
204
|
+
.map(
|
205
|
+
move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
|
206
|
+
GetOutput::same_type(),
|
207
|
+
)
|
208
|
+
.with_fmt("str.base64_encode")
|
209
|
+
.into()
|
210
|
+
}
|
211
|
+
|
212
|
+
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
213
|
+
self.clone()
|
214
|
+
.inner
|
215
|
+
.map(
|
216
|
+
move |s| {
|
217
|
+
s.utf8()?
|
218
|
+
.base64_decode(strict)
|
219
|
+
.map(|s| Some(s.into_series()))
|
220
|
+
},
|
221
|
+
GetOutput::same_type(),
|
222
|
+
)
|
223
|
+
.with_fmt("str.base64_decode")
|
224
|
+
.into()
|
225
|
+
}
|
226
|
+
|
227
|
+
pub fn str_parse_int(&self, radix: u32, strict: bool) -> Self {
|
228
|
+
self.inner
|
229
|
+
.clone()
|
230
|
+
.str()
|
231
|
+
.from_radix(radix, strict)
|
232
|
+
.with_fmt("str.parse_int")
|
233
|
+
.into()
|
234
|
+
}
|
235
|
+
|
236
|
+
pub fn str_json_extract(&self, dtype: Option<Wrap<DataType>>) -> Self {
|
237
|
+
let dtype = dtype.map(|wrap| wrap.0);
|
238
|
+
|
239
|
+
let output_type = match dtype.clone() {
|
240
|
+
Some(dtype) => GetOutput::from_type(dtype),
|
241
|
+
None => GetOutput::from_type(DataType::Unknown),
|
242
|
+
};
|
243
|
+
|
244
|
+
let function = move |s: Series| {
|
245
|
+
let ca = s.utf8()?;
|
246
|
+
match ca.json_extract(dtype.clone()) {
|
247
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
248
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
249
|
+
}
|
250
|
+
};
|
251
|
+
|
252
|
+
self.clone()
|
253
|
+
.inner
|
254
|
+
.map(function, output_type)
|
255
|
+
.with_fmt("str.json_extract")
|
256
|
+
.into()
|
257
|
+
}
|
258
|
+
|
259
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
260
|
+
let function = move |s: Series| {
|
261
|
+
let ca = s.utf8()?;
|
262
|
+
match ca.json_path_match(&pat) {
|
263
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
264
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
265
|
+
}
|
266
|
+
};
|
267
|
+
self.clone()
|
268
|
+
.inner
|
269
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
270
|
+
.with_fmt("str.json_path_match")
|
271
|
+
.into()
|
272
|
+
}
|
273
|
+
|
274
|
+
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
275
|
+
self.inner.clone().str().extract(&pat, group_index).into()
|
276
|
+
}
|
277
|
+
|
278
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
279
|
+
self.inner
|
280
|
+
.clone()
|
281
|
+
.str()
|
282
|
+
.extract_all(pat.inner.clone())
|
283
|
+
.into()
|
284
|
+
}
|
285
|
+
|
286
|
+
pub fn str_count_match(&self, pat: String) -> Self {
|
287
|
+
self.inner.clone().str().count_match(&pat).into()
|
288
|
+
}
|
289
|
+
|
290
|
+
pub fn str_split(&self, by: String) -> Self {
|
291
|
+
self.inner.clone().str().split(&by).into()
|
292
|
+
}
|
293
|
+
|
294
|
+
pub fn str_split_inclusive(&self, by: String) -> Self {
|
295
|
+
self.inner.clone().str().split_inclusive(&by).into()
|
296
|
+
}
|
297
|
+
|
298
|
+
pub fn str_split_exact(&self, by: String, n: usize) -> Self {
|
299
|
+
self.inner.clone().str().split_exact(&by, n).into()
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn str_split_exact_inclusive(&self, by: String, n: usize) -> Self {
|
303
|
+
self.inner
|
304
|
+
.clone()
|
305
|
+
.str()
|
306
|
+
.split_exact_inclusive(&by, n)
|
307
|
+
.into()
|
308
|
+
}
|
309
|
+
|
310
|
+
pub fn str_splitn(&self, by: String, n: usize) -> Self {
|
311
|
+
self.inner.clone().str().splitn(&by, n).into()
|
312
|
+
}
|
313
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::RbExpr;
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn struct_field_by_name(&self, name: String) -> Self {
|
5
|
+
self.inner.clone().struct_().field_by_name(&name).into()
|
6
|
+
}
|
7
|
+
|
8
|
+
pub fn struct_field_by_index(&self, index: i64) -> Self {
|
9
|
+
self.inner.clone().struct_().field_by_index(index).into()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn struct_rename_fields(&self, names: Vec<String>) -> Self {
|
13
|
+
self.inner.clone().struct_().rename_fields(names).into()
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
mod binary;
|
2
|
+
mod categorical;
|
3
|
+
mod datetime;
|
4
|
+
mod general;
|
5
|
+
mod list;
|
6
|
+
mod meta;
|
7
|
+
mod string;
|
8
|
+
mod r#struct;
|
9
|
+
|
10
|
+
use magnus::RArray;
|
11
|
+
use polars::lazy::dsl::Expr;
|
12
|
+
|
13
|
+
use crate::RbResult;
|
14
|
+
|
15
|
+
#[magnus::wrap(class = "Polars::RbExpr")]
|
16
|
+
#[derive(Clone)]
|
17
|
+
pub struct RbExpr {
|
18
|
+
pub inner: Expr,
|
19
|
+
}
|
20
|
+
|
21
|
+
impl From<Expr> for RbExpr {
|
22
|
+
fn from(inner: Expr) -> Self {
|
23
|
+
RbExpr { inner }
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult<Vec<Expr>> {
|
28
|
+
let mut exprs = Vec::new();
|
29
|
+
for item in rb_exprs.each() {
|
30
|
+
exprs.push(item?.try_convert::<&RbExpr>()?.inner.clone());
|
31
|
+
}
|
32
|
+
Ok(exprs)
|
33
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::{functions, time};
|
3
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
4
|
+
use polars_core::prelude::{DataFrame, IntoSeries};
|
5
|
+
|
6
|
+
use crate::conversion::{get_df, get_series, Wrap};
|
7
|
+
use crate::error::RbPolarsErr;
|
8
|
+
use crate::prelude::{ClosedWindow, Duration};
|
9
|
+
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
|
+
|
11
|
+
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
12
|
+
use polars_core::error::PolarsResult;
|
13
|
+
|
14
|
+
let mut iter = seq.each();
|
15
|
+
let first = iter.next().unwrap()?;
|
16
|
+
|
17
|
+
let first_rdf = get_df(first)?;
|
18
|
+
let identity_df = first_rdf.slice(0, 0);
|
19
|
+
|
20
|
+
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
21
|
+
|
22
|
+
for item in iter {
|
23
|
+
let rdf = get_df(item?)?;
|
24
|
+
rdfs.push(Ok(rdf));
|
25
|
+
}
|
26
|
+
|
27
|
+
let identity = Ok(identity_df);
|
28
|
+
|
29
|
+
let df = rdfs
|
30
|
+
.into_iter()
|
31
|
+
.fold(identity, |acc: PolarsResult<DataFrame>, df| {
|
32
|
+
let mut acc = acc?;
|
33
|
+
acc.vstack_mut(&df?)?;
|
34
|
+
Ok(acc)
|
35
|
+
})
|
36
|
+
.map_err(RbPolarsErr::from)?;
|
37
|
+
|
38
|
+
Ok(df.into())
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
42
|
+
let mut iter = seq.each();
|
43
|
+
let first = iter.next().unwrap()?;
|
44
|
+
|
45
|
+
let mut s = get_series(first)?;
|
46
|
+
|
47
|
+
for res in iter {
|
48
|
+
let item = res?;
|
49
|
+
let item = get_series(item)?;
|
50
|
+
s.append(&item).map_err(RbPolarsErr::from)?;
|
51
|
+
}
|
52
|
+
Ok(s.into())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub fn date_range(
|
56
|
+
start: i64,
|
57
|
+
stop: i64,
|
58
|
+
every: String,
|
59
|
+
closed: Wrap<ClosedWindow>,
|
60
|
+
name: String,
|
61
|
+
tu: Wrap<TimeUnit>,
|
62
|
+
tz: Option<TimeZone>,
|
63
|
+
) -> RbResult<RbSeries> {
|
64
|
+
let date_range = time::date_range_impl(
|
65
|
+
&name,
|
66
|
+
start,
|
67
|
+
stop,
|
68
|
+
Duration::parse(&every),
|
69
|
+
closed.0,
|
70
|
+
tu.0,
|
71
|
+
tz.as_ref(),
|
72
|
+
)
|
73
|
+
.map_err(RbPolarsErr::from)?;
|
74
|
+
Ok(date_range.into_series().into())
|
75
|
+
}
|
76
|
+
|
77
|
+
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
78
|
+
let mut dfs = Vec::new();
|
79
|
+
for item in seq.each() {
|
80
|
+
dfs.push(get_df(item?)?);
|
81
|
+
}
|
82
|
+
let df = functions::diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
83
|
+
Ok(df.into())
|
84
|
+
}
|
85
|
+
|
86
|
+
pub fn hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
87
|
+
let mut dfs = Vec::new();
|
88
|
+
for item in seq.each() {
|
89
|
+
dfs.push(get_df(item?)?);
|
90
|
+
}
|
91
|
+
let df = functions::hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
92
|
+
Ok(df.into())
|
93
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
use magnus::{RHash, Value};
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::file::get_file_like;
|
5
|
+
use crate::prelude::DataType;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<Value> {
|
9
|
+
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
+
let mut r = get_file_like(rb_f, false)?;
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
12
|
+
|
13
|
+
let dict = RHash::new();
|
14
|
+
for field in metadata.schema.fields {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
+
dict.aset(field.name, dt)?;
|
17
|
+
}
|
18
|
+
Ok(dict.into())
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<Value> {
|
22
|
+
use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata};
|
23
|
+
|
24
|
+
let mut r = get_file_like(rb_f, false)?;
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
|
27
|
+
|
28
|
+
let dict = RHash::new();
|
29
|
+
for field in arrow_schema.fields {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
|
+
dict.aset(field.name, dt)?;
|
32
|
+
}
|
33
|
+
Ok(dict.into())
|
34
|
+
}
|