polars-df 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/Cargo.lock +272 -191
- data/Cargo.toml +0 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +8 -4
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +1 -0
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +106 -4
- data/ext/polars/src/dataframe.rs +19 -17
- data/ext/polars/src/error.rs +0 -4
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +933 -0
- data/ext/polars/src/expr/list.rs +146 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +313 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +33 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +209 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +12 -33
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +205 -303
- data/ext/polars/src/rb_modules.rs +8 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +164 -0
- data/ext/polars/src/series.rs +99 -539
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +201 -50
- data/lib/polars/data_types.rb +6 -4
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/expr.rb +70 -10
- data/lib/polars/lazy_frame.rb +4 -3
- data/lib/polars/lazy_functions.rb +4 -1
- data/lib/polars/list_expr.rb +68 -19
- data/lib/polars/series.rb +181 -73
- data/lib/polars/string_expr.rb +149 -43
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +41 -7
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -2
- metadata +26 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,146 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::dsl::lit;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use polars::series::ops::NullBehavior;
|
5
|
+
|
6
|
+
use crate::conversion::Wrap;
|
7
|
+
use crate::{RbExpr, RbResult};
|
8
|
+
|
9
|
+
impl RbExpr {
|
10
|
+
pub fn list_arg_max(&self) -> Self {
|
11
|
+
self.inner.clone().arr().arg_max().into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn list_arg_min(&self) -> Self {
|
15
|
+
self.inner.clone().arr().arg_min().into()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn list_contains(&self, other: &RbExpr) -> Self {
|
19
|
+
self.inner
|
20
|
+
.clone()
|
21
|
+
.arr()
|
22
|
+
.contains(other.inner.clone())
|
23
|
+
.into()
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn list_count_match(&self, expr: &RbExpr) -> Self {
|
27
|
+
self.inner
|
28
|
+
.clone()
|
29
|
+
.arr()
|
30
|
+
.count_match(expr.inner.clone())
|
31
|
+
.into()
|
32
|
+
}
|
33
|
+
|
34
|
+
pub fn list_diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> RbResult<Self> {
|
35
|
+
Ok(self.inner.clone().arr().diff(n, null_behavior.0).into())
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn list_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
|
39
|
+
self.inner
|
40
|
+
.clone()
|
41
|
+
.arr()
|
42
|
+
.eval(expr.inner.clone(), parallel)
|
43
|
+
.into()
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn list_get(&self, index: &RbExpr) -> Self {
|
47
|
+
self.inner.clone().arr().get(index.inner.clone()).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn list_join(&self, separator: String) -> Self {
|
51
|
+
self.inner.clone().arr().join(&separator).into()
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn list_lengths(&self) -> Self {
|
55
|
+
self.inner.clone().arr().lengths().into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn list_max(&self) -> Self {
|
59
|
+
self.inner.clone().arr().max().into()
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn list_mean(&self) -> Self {
|
63
|
+
self.inner.clone().arr().mean().with_fmt("arr.mean").into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn list_min(&self) -> Self {
|
67
|
+
self.inner.clone().arr().min().into()
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn list_reverse(&self) -> Self {
|
71
|
+
self.inner.clone().arr().reverse().into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn list_shift(&self, periods: i64) -> Self {
|
75
|
+
self.inner.clone().arr().shift(periods).into()
|
76
|
+
}
|
77
|
+
|
78
|
+
pub fn list_slice(&self, offset: &RbExpr, length: Option<&RbExpr>) -> Self {
|
79
|
+
let length = match length {
|
80
|
+
Some(i) => i.inner.clone(),
|
81
|
+
None => lit(i64::MAX),
|
82
|
+
};
|
83
|
+
self.inner
|
84
|
+
.clone()
|
85
|
+
.arr()
|
86
|
+
.slice(offset.inner.clone(), length)
|
87
|
+
.into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn list_sort(&self, reverse: bool) -> Self {
|
91
|
+
self.inner
|
92
|
+
.clone()
|
93
|
+
.arr()
|
94
|
+
.sort(SortOptions {
|
95
|
+
descending: reverse,
|
96
|
+
..Default::default()
|
97
|
+
})
|
98
|
+
.with_fmt("arr.sort")
|
99
|
+
.into()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn list_sum(&self) -> Self {
|
103
|
+
self.inner.clone().arr().sum().with_fmt("arr.sum").into()
|
104
|
+
}
|
105
|
+
|
106
|
+
pub fn list_take(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
107
|
+
self.inner
|
108
|
+
.clone()
|
109
|
+
.arr()
|
110
|
+
.take(index.inner.clone(), null_on_oob)
|
111
|
+
.into()
|
112
|
+
}
|
113
|
+
|
114
|
+
pub fn list_to_struct(
|
115
|
+
&self,
|
116
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
117
|
+
_name_gen: Option<Value>,
|
118
|
+
upper_bound: usize,
|
119
|
+
) -> RbResult<Self> {
|
120
|
+
// TODO fix
|
121
|
+
let name_gen = None;
|
122
|
+
// let name_gen = name_gen.map(|lambda| {
|
123
|
+
// Arc::new(move |idx: usize| {
|
124
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
125
|
+
// out.try_convert::<String>().unwrap()
|
126
|
+
// }) as NameGenerator
|
127
|
+
// });
|
128
|
+
|
129
|
+
Ok(self
|
130
|
+
.inner
|
131
|
+
.clone()
|
132
|
+
.arr()
|
133
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
134
|
+
.into())
|
135
|
+
}
|
136
|
+
|
137
|
+
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
138
|
+
let e = self.inner.clone();
|
139
|
+
|
140
|
+
if maintain_order {
|
141
|
+
e.arr().unique_stable().into()
|
142
|
+
} else {
|
143
|
+
e.arr().unique().into()
|
144
|
+
}
|
145
|
+
}
|
146
|
+
}
|
@@ -1,6 +1,12 @@
|
|
1
|
-
use
|
1
|
+
use magnus::RArray;
|
2
|
+
|
3
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
2
4
|
|
3
5
|
impl RbExpr {
|
6
|
+
pub fn meta_eq(&self, other: &RbExpr) -> bool {
|
7
|
+
self.inner == other.inner
|
8
|
+
}
|
9
|
+
|
4
10
|
pub fn meta_pop(&self) -> RArray {
|
5
11
|
RArray::from_iter(
|
6
12
|
self.inner
|
@@ -12,11 +18,7 @@ impl RbExpr {
|
|
12
18
|
)
|
13
19
|
}
|
14
20
|
|
15
|
-
pub fn
|
16
|
-
self.inner == other.inner
|
17
|
-
}
|
18
|
-
|
19
|
-
pub fn meta_roots(&self) -> Vec<String> {
|
21
|
+
pub fn meta_root_names(&self) -> Vec<String> {
|
20
22
|
self.inner
|
21
23
|
.clone()
|
22
24
|
.meta()
|
@@ -39,4 +41,12 @@ impl RbExpr {
|
|
39
41
|
pub fn meta_undo_aliases(&self) -> RbExpr {
|
40
42
|
self.inner.clone().meta().undo_aliases().into()
|
41
43
|
}
|
44
|
+
|
45
|
+
pub fn meta_has_multiple_outputs(&self) -> bool {
|
46
|
+
self.inner.clone().meta().has_multiple_outputs()
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn meta_is_regex_projection(&self) -> bool {
|
50
|
+
self.inner.clone().meta().is_regex_projection()
|
51
|
+
}
|
42
52
|
}
|
@@ -0,0 +1,313 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::RbExpr;
|
5
|
+
|
6
|
+
impl RbExpr {
|
7
|
+
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
+
self.inner.clone().str().concat(&delimiter).into()
|
9
|
+
}
|
10
|
+
|
11
|
+
pub fn str_to_date(
|
12
|
+
&self,
|
13
|
+
format: Option<String>,
|
14
|
+
strict: bool,
|
15
|
+
exact: bool,
|
16
|
+
cache: bool,
|
17
|
+
) -> Self {
|
18
|
+
let options = StrptimeOptions {
|
19
|
+
format,
|
20
|
+
strict,
|
21
|
+
exact,
|
22
|
+
cache,
|
23
|
+
..Default::default()
|
24
|
+
};
|
25
|
+
self.inner.clone().str().to_date(options).into()
|
26
|
+
}
|
27
|
+
|
28
|
+
#[allow(clippy::too_many_arguments)]
|
29
|
+
pub fn str_to_datetime(
|
30
|
+
&self,
|
31
|
+
format: Option<String>,
|
32
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
33
|
+
time_zone: Option<TimeZone>,
|
34
|
+
strict: bool,
|
35
|
+
exact: bool,
|
36
|
+
cache: bool,
|
37
|
+
utc: bool,
|
38
|
+
tz_aware: bool,
|
39
|
+
) -> Self {
|
40
|
+
let options = StrptimeOptions {
|
41
|
+
format,
|
42
|
+
strict,
|
43
|
+
exact,
|
44
|
+
cache,
|
45
|
+
tz_aware,
|
46
|
+
utc,
|
47
|
+
};
|
48
|
+
self.inner
|
49
|
+
.clone()
|
50
|
+
.str()
|
51
|
+
.to_datetime(time_unit.map(|tu| tu.0), time_zone, options)
|
52
|
+
.into()
|
53
|
+
}
|
54
|
+
|
55
|
+
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
56
|
+
let options = StrptimeOptions {
|
57
|
+
format,
|
58
|
+
strict,
|
59
|
+
cache,
|
60
|
+
exact: true,
|
61
|
+
..Default::default()
|
62
|
+
};
|
63
|
+
self.inner.clone().str().to_time(options).into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn str_strip(&self, matches: Option<String>) -> Self {
|
67
|
+
self.inner.clone().str().strip(matches).into()
|
68
|
+
}
|
69
|
+
|
70
|
+
pub fn str_rstrip(&self, matches: Option<String>) -> Self {
|
71
|
+
self.inner.clone().str().rstrip(matches).into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn str_lstrip(&self, matches: Option<String>) -> Self {
|
75
|
+
self.inner.clone().str().lstrip(matches).into()
|
76
|
+
}
|
77
|
+
|
78
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
79
|
+
let function = move |s: Series| {
|
80
|
+
let ca = s.utf8()?;
|
81
|
+
Ok(Some(ca.str_slice(start, length)?.into_series()))
|
82
|
+
};
|
83
|
+
self.clone()
|
84
|
+
.inner
|
85
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
86
|
+
.with_fmt("str.slice")
|
87
|
+
.into()
|
88
|
+
}
|
89
|
+
|
90
|
+
pub fn str_to_uppercase(&self) -> Self {
|
91
|
+
self.inner.clone().str().to_uppercase().into()
|
92
|
+
}
|
93
|
+
|
94
|
+
pub fn str_to_lowercase(&self) -> Self {
|
95
|
+
self.inner.clone().str().to_lowercase().into()
|
96
|
+
}
|
97
|
+
|
98
|
+
pub fn str_lengths(&self) -> Self {
|
99
|
+
let function = |s: Series| {
|
100
|
+
let ca = s.utf8()?;
|
101
|
+
Ok(Some(ca.str_lengths().into_series()))
|
102
|
+
};
|
103
|
+
self.clone()
|
104
|
+
.inner
|
105
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
106
|
+
.with_fmt("str.lengths")
|
107
|
+
.into()
|
108
|
+
}
|
109
|
+
|
110
|
+
pub fn str_n_chars(&self) -> Self {
|
111
|
+
let function = |s: Series| {
|
112
|
+
let ca = s.utf8()?;
|
113
|
+
Ok(Some(ca.str_n_chars().into_series()))
|
114
|
+
};
|
115
|
+
self.clone()
|
116
|
+
.inner
|
117
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
118
|
+
.with_fmt("str.n_chars")
|
119
|
+
.into()
|
120
|
+
}
|
121
|
+
|
122
|
+
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
123
|
+
self.inner
|
124
|
+
.clone()
|
125
|
+
.str()
|
126
|
+
.replace_n(pat.inner.clone(), val.inner.clone(), literal, n)
|
127
|
+
.into()
|
128
|
+
}
|
129
|
+
|
130
|
+
pub fn str_replace_all(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
|
131
|
+
self.inner
|
132
|
+
.clone()
|
133
|
+
.str()
|
134
|
+
.replace_all(pat.inner.clone(), val.inner.clone(), literal)
|
135
|
+
.into()
|
136
|
+
}
|
137
|
+
|
138
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
139
|
+
self.clone().inner.str().zfill(alignment).into()
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn str_ljust(&self, width: usize, fillchar: char) -> Self {
|
143
|
+
self.clone().inner.str().ljust(width, fillchar).into()
|
144
|
+
}
|
145
|
+
|
146
|
+
pub fn str_rjust(&self, width: usize, fillchar: char) -> Self {
|
147
|
+
self.clone().inner.str().rjust(width, fillchar).into()
|
148
|
+
}
|
149
|
+
|
150
|
+
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
151
|
+
match literal {
|
152
|
+
Some(true) => self
|
153
|
+
.inner
|
154
|
+
.clone()
|
155
|
+
.str()
|
156
|
+
.contains_literal(pat.inner.clone())
|
157
|
+
.into(),
|
158
|
+
_ => self
|
159
|
+
.inner
|
160
|
+
.clone()
|
161
|
+
.str()
|
162
|
+
.contains(pat.inner.clone(), strict)
|
163
|
+
.into(),
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
|
168
|
+
self.inner.clone().str().ends_with(sub.inner.clone()).into()
|
169
|
+
}
|
170
|
+
|
171
|
+
pub fn str_starts_with(&self, sub: &RbExpr) -> Self {
|
172
|
+
self.inner
|
173
|
+
.clone()
|
174
|
+
.str()
|
175
|
+
.starts_with(sub.inner.clone())
|
176
|
+
.into()
|
177
|
+
}
|
178
|
+
|
179
|
+
pub fn str_hex_encode(&self) -> Self {
|
180
|
+
self.clone()
|
181
|
+
.inner
|
182
|
+
.map(
|
183
|
+
move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
|
184
|
+
GetOutput::same_type(),
|
185
|
+
)
|
186
|
+
.with_fmt("str.hex_encode")
|
187
|
+
.into()
|
188
|
+
}
|
189
|
+
|
190
|
+
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
191
|
+
self.clone()
|
192
|
+
.inner
|
193
|
+
.map(
|
194
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
195
|
+
GetOutput::same_type(),
|
196
|
+
)
|
197
|
+
.with_fmt("str.hex_decode")
|
198
|
+
.into()
|
199
|
+
}
|
200
|
+
|
201
|
+
pub fn str_base64_encode(&self) -> Self {
|
202
|
+
self.clone()
|
203
|
+
.inner
|
204
|
+
.map(
|
205
|
+
move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
|
206
|
+
GetOutput::same_type(),
|
207
|
+
)
|
208
|
+
.with_fmt("str.base64_encode")
|
209
|
+
.into()
|
210
|
+
}
|
211
|
+
|
212
|
+
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
213
|
+
self.clone()
|
214
|
+
.inner
|
215
|
+
.map(
|
216
|
+
move |s| {
|
217
|
+
s.utf8()?
|
218
|
+
.base64_decode(strict)
|
219
|
+
.map(|s| Some(s.into_series()))
|
220
|
+
},
|
221
|
+
GetOutput::same_type(),
|
222
|
+
)
|
223
|
+
.with_fmt("str.base64_decode")
|
224
|
+
.into()
|
225
|
+
}
|
226
|
+
|
227
|
+
pub fn str_parse_int(&self, radix: u32, strict: bool) -> Self {
|
228
|
+
self.inner
|
229
|
+
.clone()
|
230
|
+
.str()
|
231
|
+
.from_radix(radix, strict)
|
232
|
+
.with_fmt("str.parse_int")
|
233
|
+
.into()
|
234
|
+
}
|
235
|
+
|
236
|
+
pub fn str_json_extract(&self, dtype: Option<Wrap<DataType>>) -> Self {
|
237
|
+
let dtype = dtype.map(|wrap| wrap.0);
|
238
|
+
|
239
|
+
let output_type = match dtype.clone() {
|
240
|
+
Some(dtype) => GetOutput::from_type(dtype),
|
241
|
+
None => GetOutput::from_type(DataType::Unknown),
|
242
|
+
};
|
243
|
+
|
244
|
+
let function = move |s: Series| {
|
245
|
+
let ca = s.utf8()?;
|
246
|
+
match ca.json_extract(dtype.clone()) {
|
247
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
248
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
249
|
+
}
|
250
|
+
};
|
251
|
+
|
252
|
+
self.clone()
|
253
|
+
.inner
|
254
|
+
.map(function, output_type)
|
255
|
+
.with_fmt("str.json_extract")
|
256
|
+
.into()
|
257
|
+
}
|
258
|
+
|
259
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
260
|
+
let function = move |s: Series| {
|
261
|
+
let ca = s.utf8()?;
|
262
|
+
match ca.json_path_match(&pat) {
|
263
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
264
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
265
|
+
}
|
266
|
+
};
|
267
|
+
self.clone()
|
268
|
+
.inner
|
269
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
270
|
+
.with_fmt("str.json_path_match")
|
271
|
+
.into()
|
272
|
+
}
|
273
|
+
|
274
|
+
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
275
|
+
self.inner.clone().str().extract(&pat, group_index).into()
|
276
|
+
}
|
277
|
+
|
278
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
279
|
+
self.inner
|
280
|
+
.clone()
|
281
|
+
.str()
|
282
|
+
.extract_all(pat.inner.clone())
|
283
|
+
.into()
|
284
|
+
}
|
285
|
+
|
286
|
+
pub fn str_count_match(&self, pat: String) -> Self {
|
287
|
+
self.inner.clone().str().count_match(&pat).into()
|
288
|
+
}
|
289
|
+
|
290
|
+
pub fn str_split(&self, by: String) -> Self {
|
291
|
+
self.inner.clone().str().split(&by).into()
|
292
|
+
}
|
293
|
+
|
294
|
+
pub fn str_split_inclusive(&self, by: String) -> Self {
|
295
|
+
self.inner.clone().str().split_inclusive(&by).into()
|
296
|
+
}
|
297
|
+
|
298
|
+
pub fn str_split_exact(&self, by: String, n: usize) -> Self {
|
299
|
+
self.inner.clone().str().split_exact(&by, n).into()
|
300
|
+
}
|
301
|
+
|
302
|
+
pub fn str_split_exact_inclusive(&self, by: String, n: usize) -> Self {
|
303
|
+
self.inner
|
304
|
+
.clone()
|
305
|
+
.str()
|
306
|
+
.split_exact_inclusive(&by, n)
|
307
|
+
.into()
|
308
|
+
}
|
309
|
+
|
310
|
+
pub fn str_splitn(&self, by: String, n: usize) -> Self {
|
311
|
+
self.inner.clone().str().splitn(&by, n).into()
|
312
|
+
}
|
313
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::RbExpr;
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn struct_field_by_name(&self, name: String) -> Self {
|
5
|
+
self.inner.clone().struct_().field_by_name(&name).into()
|
6
|
+
}
|
7
|
+
|
8
|
+
pub fn struct_field_by_index(&self, index: i64) -> Self {
|
9
|
+
self.inner.clone().struct_().field_by_index(index).into()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn struct_rename_fields(&self, names: Vec<String>) -> Self {
|
13
|
+
self.inner.clone().struct_().rename_fields(names).into()
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
mod binary;
|
2
|
+
mod categorical;
|
3
|
+
mod datetime;
|
4
|
+
mod general;
|
5
|
+
mod list;
|
6
|
+
mod meta;
|
7
|
+
mod string;
|
8
|
+
mod r#struct;
|
9
|
+
|
10
|
+
use magnus::RArray;
|
11
|
+
use polars::lazy::dsl::Expr;
|
12
|
+
|
13
|
+
use crate::RbResult;
|
14
|
+
|
15
|
+
#[magnus::wrap(class = "Polars::RbExpr")]
|
16
|
+
#[derive(Clone)]
|
17
|
+
pub struct RbExpr {
|
18
|
+
pub inner: Expr,
|
19
|
+
}
|
20
|
+
|
21
|
+
impl From<Expr> for RbExpr {
|
22
|
+
fn from(inner: Expr) -> Self {
|
23
|
+
RbExpr { inner }
|
24
|
+
}
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult<Vec<Expr>> {
|
28
|
+
let mut exprs = Vec::new();
|
29
|
+
for item in rb_exprs.each() {
|
30
|
+
exprs.push(item?.try_convert::<&RbExpr>()?.inner.clone());
|
31
|
+
}
|
32
|
+
Ok(exprs)
|
33
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::{functions, time};
|
3
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
4
|
+
use polars_core::prelude::{DataFrame, IntoSeries};
|
5
|
+
|
6
|
+
use crate::conversion::{get_df, get_series, Wrap};
|
7
|
+
use crate::error::RbPolarsErr;
|
8
|
+
use crate::prelude::{ClosedWindow, Duration};
|
9
|
+
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
|
+
|
11
|
+
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
12
|
+
use polars_core::error::PolarsResult;
|
13
|
+
|
14
|
+
let mut iter = seq.each();
|
15
|
+
let first = iter.next().unwrap()?;
|
16
|
+
|
17
|
+
let first_rdf = get_df(first)?;
|
18
|
+
let identity_df = first_rdf.slice(0, 0);
|
19
|
+
|
20
|
+
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
21
|
+
|
22
|
+
for item in iter {
|
23
|
+
let rdf = get_df(item?)?;
|
24
|
+
rdfs.push(Ok(rdf));
|
25
|
+
}
|
26
|
+
|
27
|
+
let identity = Ok(identity_df);
|
28
|
+
|
29
|
+
let df = rdfs
|
30
|
+
.into_iter()
|
31
|
+
.fold(identity, |acc: PolarsResult<DataFrame>, df| {
|
32
|
+
let mut acc = acc?;
|
33
|
+
acc.vstack_mut(&df?)?;
|
34
|
+
Ok(acc)
|
35
|
+
})
|
36
|
+
.map_err(RbPolarsErr::from)?;
|
37
|
+
|
38
|
+
Ok(df.into())
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
42
|
+
let mut iter = seq.each();
|
43
|
+
let first = iter.next().unwrap()?;
|
44
|
+
|
45
|
+
let mut s = get_series(first)?;
|
46
|
+
|
47
|
+
for res in iter {
|
48
|
+
let item = res?;
|
49
|
+
let item = get_series(item)?;
|
50
|
+
s.append(&item).map_err(RbPolarsErr::from)?;
|
51
|
+
}
|
52
|
+
Ok(s.into())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub fn date_range(
|
56
|
+
start: i64,
|
57
|
+
stop: i64,
|
58
|
+
every: String,
|
59
|
+
closed: Wrap<ClosedWindow>,
|
60
|
+
name: String,
|
61
|
+
tu: Wrap<TimeUnit>,
|
62
|
+
tz: Option<TimeZone>,
|
63
|
+
) -> RbResult<RbSeries> {
|
64
|
+
let date_range = time::date_range_impl(
|
65
|
+
&name,
|
66
|
+
start,
|
67
|
+
stop,
|
68
|
+
Duration::parse(&every),
|
69
|
+
closed.0,
|
70
|
+
tu.0,
|
71
|
+
tz.as_ref(),
|
72
|
+
)
|
73
|
+
.map_err(RbPolarsErr::from)?;
|
74
|
+
Ok(date_range.into_series().into())
|
75
|
+
}
|
76
|
+
|
77
|
+
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
78
|
+
let mut dfs = Vec::new();
|
79
|
+
for item in seq.each() {
|
80
|
+
dfs.push(get_df(item?)?);
|
81
|
+
}
|
82
|
+
let df = functions::diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
83
|
+
Ok(df.into())
|
84
|
+
}
|
85
|
+
|
86
|
+
pub fn hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
87
|
+
let mut dfs = Vec::new();
|
88
|
+
for item in seq.each() {
|
89
|
+
dfs.push(get_df(item?)?);
|
90
|
+
}
|
91
|
+
let df = functions::hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
92
|
+
Ok(df.into())
|
93
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
use magnus::{RHash, Value};
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::file::get_file_like;
|
5
|
+
use crate::prelude::DataType;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<Value> {
|
9
|
+
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
+
let mut r = get_file_like(rb_f, false)?;
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
12
|
+
|
13
|
+
let dict = RHash::new();
|
14
|
+
for field in metadata.schema.fields {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
+
dict.aset(field.name, dt)?;
|
17
|
+
}
|
18
|
+
Ok(dict.into())
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<Value> {
|
22
|
+
use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata};
|
23
|
+
|
24
|
+
let mut r = get_file_like(rb_f, false)?;
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
|
27
|
+
|
28
|
+
let dict = RHash::new();
|
29
|
+
for field in arrow_schema.fields {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
|
+
dict.aset(field.name, dt)?;
|
32
|
+
}
|
33
|
+
Ok(dict.into())
|
34
|
+
}
|