polars-df 0.4.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +447 -410
- data/Cargo.toml +0 -1
- data/README.md +6 -5
- data/ext/polars/Cargo.toml +10 -5
- data/ext/polars/src/apply/dataframe.rs +2 -2
- data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
- data/ext/polars/src/apply/mod.rs +8 -3
- data/ext/polars/src/batched_csv.rs +7 -5
- data/ext/polars/src/conversion.rs +269 -59
- data/ext/polars/src/dataframe.rs +38 -40
- data/ext/polars/src/error.rs +6 -2
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +69 -0
- data/ext/polars/src/expr/categorical.rs +10 -0
- data/ext/polars/src/expr/datetime.rs +223 -0
- data/ext/polars/src/expr/general.rs +963 -0
- data/ext/polars/src/expr/list.rs +151 -0
- data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
- data/ext/polars/src/expr/string.rs +314 -0
- data/ext/polars/src/expr/struct.rs +15 -0
- data/ext/polars/src/expr.rs +34 -0
- data/ext/polars/src/functions/eager.rs +93 -0
- data/ext/polars/src/functions/io.rs +34 -0
- data/ext/polars/src/functions/lazy.rs +249 -0
- data/ext/polars/src/functions/meta.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/whenthen.rs +43 -0
- data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
- data/ext/polars/src/lazygroupby.rs +29 -0
- data/ext/polars/src/lib.rs +223 -316
- data/ext/polars/src/object.rs +1 -1
- data/ext/polars/src/rb_modules.rs +12 -0
- data/ext/polars/src/series/aggregation.rs +83 -0
- data/ext/polars/src/series/arithmetic.rs +88 -0
- data/ext/polars/src/series/comparison.rs +251 -0
- data/ext/polars/src/series/construction.rs +190 -0
- data/ext/polars/src/series.rs +151 -551
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/convert.rb +2 -2
- data/lib/polars/data_frame.rb +289 -96
- data/lib/polars/data_types.rb +169 -33
- data/lib/polars/date_time_expr.rb +142 -2
- data/lib/polars/date_time_name_space.rb +17 -3
- data/lib/polars/expr.rb +145 -78
- data/lib/polars/functions.rb +0 -1
- data/lib/polars/group_by.rb +1 -22
- data/lib/polars/lazy_frame.rb +84 -31
- data/lib/polars/lazy_functions.rb +71 -32
- data/lib/polars/list_expr.rb +94 -45
- data/lib/polars/list_name_space.rb +13 -13
- data/lib/polars/rolling_group_by.rb +4 -2
- data/lib/polars/series.rb +249 -87
- data/lib/polars/string_expr.rb +277 -45
- data/lib/polars/string_name_space.rb +137 -22
- data/lib/polars/struct_name_space.rb +32 -0
- data/lib/polars/utils.rb +138 -54
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +5 -2
- metadata +29 -11
- data/ext/polars/src/lazy/dsl.rs +0 -1775
- data/ext/polars/src/lazy/mod.rs +0 -5
- data/ext/polars/src/lazy/utils.rs +0 -13
- data/ext/polars/src/list_construction.rs +0 -100
- /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
- /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::dsl::lit;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use polars::series::ops::NullBehavior;
|
5
|
+
|
6
|
+
use crate::conversion::Wrap;
|
7
|
+
use crate::{RbExpr, RbResult};
|
8
|
+
|
9
|
+
impl RbExpr {
|
10
|
+
pub fn list_arg_max(&self) -> Self {
|
11
|
+
self.inner.clone().list().arg_max().into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn list_arg_min(&self) -> Self {
|
15
|
+
self.inner.clone().list().arg_min().into()
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn list_contains(&self, other: &RbExpr) -> Self {
|
19
|
+
self.inner
|
20
|
+
.clone()
|
21
|
+
.list()
|
22
|
+
.contains(other.inner.clone())
|
23
|
+
.into()
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn list_count_match(&self, expr: &RbExpr) -> Self {
|
27
|
+
self.inner
|
28
|
+
.clone()
|
29
|
+
.list()
|
30
|
+
.count_match(expr.inner.clone())
|
31
|
+
.into()
|
32
|
+
}
|
33
|
+
|
34
|
+
pub fn list_diff(&self, n: i64, null_behavior: Wrap<NullBehavior>) -> RbResult<Self> {
|
35
|
+
Ok(self.inner.clone().list().diff(n, null_behavior.0).into())
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn list_eval(&self, expr: &RbExpr, parallel: bool) -> Self {
|
39
|
+
self.inner
|
40
|
+
.clone()
|
41
|
+
.list()
|
42
|
+
.eval(expr.inner.clone(), parallel)
|
43
|
+
.into()
|
44
|
+
}
|
45
|
+
|
46
|
+
pub fn list_get(&self, index: &RbExpr) -> Self {
|
47
|
+
self.inner.clone().list().get(index.inner.clone()).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn list_join(&self, separator: String) -> Self {
|
51
|
+
self.inner.clone().list().join(&separator).into()
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn list_lengths(&self) -> Self {
|
55
|
+
self.inner.clone().list().lengths().into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn list_max(&self) -> Self {
|
59
|
+
self.inner.clone().list().max().into()
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn list_mean(&self) -> Self {
|
63
|
+
self.inner
|
64
|
+
.clone()
|
65
|
+
.list()
|
66
|
+
.mean()
|
67
|
+
.with_fmt("list.mean")
|
68
|
+
.into()
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn list_min(&self) -> Self {
|
72
|
+
self.inner.clone().list().min().into()
|
73
|
+
}
|
74
|
+
|
75
|
+
pub fn list_reverse(&self) -> Self {
|
76
|
+
self.inner.clone().list().reverse().into()
|
77
|
+
}
|
78
|
+
|
79
|
+
pub fn list_shift(&self, periods: i64) -> Self {
|
80
|
+
self.inner.clone().list().shift(periods).into()
|
81
|
+
}
|
82
|
+
|
83
|
+
pub fn list_slice(&self, offset: &RbExpr, length: Option<&RbExpr>) -> Self {
|
84
|
+
let length = match length {
|
85
|
+
Some(i) => i.inner.clone(),
|
86
|
+
None => lit(i64::MAX),
|
87
|
+
};
|
88
|
+
self.inner
|
89
|
+
.clone()
|
90
|
+
.list()
|
91
|
+
.slice(offset.inner.clone(), length)
|
92
|
+
.into()
|
93
|
+
}
|
94
|
+
|
95
|
+
pub fn list_sort(&self, reverse: bool) -> Self {
|
96
|
+
self.inner
|
97
|
+
.clone()
|
98
|
+
.list()
|
99
|
+
.sort(SortOptions {
|
100
|
+
descending: reverse,
|
101
|
+
..Default::default()
|
102
|
+
})
|
103
|
+
.with_fmt("list.sort")
|
104
|
+
.into()
|
105
|
+
}
|
106
|
+
|
107
|
+
pub fn list_sum(&self) -> Self {
|
108
|
+
self.inner.clone().list().sum().with_fmt("list.sum").into()
|
109
|
+
}
|
110
|
+
|
111
|
+
pub fn list_take(&self, index: &RbExpr, null_on_oob: bool) -> Self {
|
112
|
+
self.inner
|
113
|
+
.clone()
|
114
|
+
.list()
|
115
|
+
.take(index.inner.clone(), null_on_oob)
|
116
|
+
.into()
|
117
|
+
}
|
118
|
+
|
119
|
+
pub fn list_to_struct(
|
120
|
+
&self,
|
121
|
+
width_strat: Wrap<ListToStructWidthStrategy>,
|
122
|
+
_name_gen: Option<Value>,
|
123
|
+
upper_bound: usize,
|
124
|
+
) -> RbResult<Self> {
|
125
|
+
// TODO fix
|
126
|
+
let name_gen = None;
|
127
|
+
// let name_gen = name_gen.map(|lambda| {
|
128
|
+
// Arc::new(move |idx: usize| {
|
129
|
+
// let out: Value = lambda.funcall("call", (idx,)).unwrap();
|
130
|
+
// out.try_convert::<String>().unwrap()
|
131
|
+
// }) as NameGenerator
|
132
|
+
// });
|
133
|
+
|
134
|
+
Ok(self
|
135
|
+
.inner
|
136
|
+
.clone()
|
137
|
+
.list()
|
138
|
+
.to_struct(width_strat.0, name_gen, upper_bound)
|
139
|
+
.into())
|
140
|
+
}
|
141
|
+
|
142
|
+
pub fn list_unique(&self, maintain_order: bool) -> Self {
|
143
|
+
let e = self.inner.clone();
|
144
|
+
|
145
|
+
if maintain_order {
|
146
|
+
e.list().unique_stable().into()
|
147
|
+
} else {
|
148
|
+
e.list().unique().into()
|
149
|
+
}
|
150
|
+
}
|
151
|
+
}
|
@@ -1,6 +1,12 @@
|
|
1
|
-
use
|
1
|
+
use magnus::RArray;
|
2
|
+
|
3
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
2
4
|
|
3
5
|
impl RbExpr {
|
6
|
+
pub fn meta_eq(&self, other: &RbExpr) -> bool {
|
7
|
+
self.inner == other.inner
|
8
|
+
}
|
9
|
+
|
4
10
|
pub fn meta_pop(&self) -> RArray {
|
5
11
|
RArray::from_iter(
|
6
12
|
self.inner
|
@@ -12,11 +18,7 @@ impl RbExpr {
|
|
12
18
|
)
|
13
19
|
}
|
14
20
|
|
15
|
-
pub fn
|
16
|
-
self.inner == other.inner
|
17
|
-
}
|
18
|
-
|
19
|
-
pub fn meta_roots(&self) -> Vec<String> {
|
21
|
+
pub fn meta_root_names(&self) -> Vec<String> {
|
20
22
|
self.inner
|
21
23
|
.clone()
|
22
24
|
.meta()
|
@@ -39,4 +41,12 @@ impl RbExpr {
|
|
39
41
|
pub fn meta_undo_aliases(&self) -> RbExpr {
|
40
42
|
self.inner.clone().meta().undo_aliases().into()
|
41
43
|
}
|
44
|
+
|
45
|
+
pub fn meta_has_multiple_outputs(&self) -> bool {
|
46
|
+
self.inner.clone().meta().has_multiple_outputs()
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn meta_is_regex_projection(&self) -> bool {
|
50
|
+
self.inner.clone().meta().is_regex_projection()
|
51
|
+
}
|
42
52
|
}
|
@@ -0,0 +1,314 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::RbExpr;
|
5
|
+
|
6
|
+
impl RbExpr {
|
7
|
+
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
+
self.inner.clone().str().concat(&delimiter).into()
|
9
|
+
}
|
10
|
+
|
11
|
+
pub fn str_to_date(
|
12
|
+
&self,
|
13
|
+
format: Option<String>,
|
14
|
+
strict: bool,
|
15
|
+
exact: bool,
|
16
|
+
cache: bool,
|
17
|
+
) -> Self {
|
18
|
+
let options = StrptimeOptions {
|
19
|
+
format,
|
20
|
+
strict,
|
21
|
+
exact,
|
22
|
+
cache,
|
23
|
+
};
|
24
|
+
self.inner.clone().str().to_date(options).into()
|
25
|
+
}
|
26
|
+
|
27
|
+
pub fn str_to_datetime(
|
28
|
+
&self,
|
29
|
+
format: Option<String>,
|
30
|
+
time_unit: Option<Wrap<TimeUnit>>,
|
31
|
+
time_zone: Option<TimeZone>,
|
32
|
+
strict: bool,
|
33
|
+
exact: bool,
|
34
|
+
cache: bool,
|
35
|
+
) -> Self {
|
36
|
+
let options = StrptimeOptions {
|
37
|
+
format,
|
38
|
+
strict,
|
39
|
+
exact,
|
40
|
+
cache,
|
41
|
+
};
|
42
|
+
self.inner
|
43
|
+
.clone()
|
44
|
+
.str()
|
45
|
+
.to_datetime(time_unit.map(|tu| tu.0), time_zone, options)
|
46
|
+
.into()
|
47
|
+
}
|
48
|
+
|
49
|
+
pub fn str_to_time(&self, format: Option<String>, strict: bool, cache: bool) -> Self {
|
50
|
+
let options = StrptimeOptions {
|
51
|
+
format,
|
52
|
+
strict,
|
53
|
+
cache,
|
54
|
+
exact: true,
|
55
|
+
};
|
56
|
+
self.inner.clone().str().to_time(options).into()
|
57
|
+
}
|
58
|
+
|
59
|
+
pub fn str_strip(&self, matches: Option<String>) -> Self {
|
60
|
+
self.inner.clone().str().strip(matches).into()
|
61
|
+
}
|
62
|
+
|
63
|
+
pub fn str_rstrip(&self, matches: Option<String>) -> Self {
|
64
|
+
self.inner.clone().str().rstrip(matches).into()
|
65
|
+
}
|
66
|
+
|
67
|
+
pub fn str_lstrip(&self, matches: Option<String>) -> Self {
|
68
|
+
self.inner.clone().str().lstrip(matches).into()
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
72
|
+
let function = move |s: Series| {
|
73
|
+
let ca = s.utf8()?;
|
74
|
+
Ok(Some(ca.str_slice(start, length)?.into_series()))
|
75
|
+
};
|
76
|
+
self.clone()
|
77
|
+
.inner
|
78
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
79
|
+
.with_fmt("str.slice")
|
80
|
+
.into()
|
81
|
+
}
|
82
|
+
|
83
|
+
pub fn str_explode(&self) -> Self {
|
84
|
+
self.inner.clone().str().explode().into()
|
85
|
+
}
|
86
|
+
|
87
|
+
pub fn str_to_uppercase(&self) -> Self {
|
88
|
+
self.inner.clone().str().to_uppercase().into()
|
89
|
+
}
|
90
|
+
|
91
|
+
pub fn str_to_lowercase(&self) -> Self {
|
92
|
+
self.inner.clone().str().to_lowercase().into()
|
93
|
+
}
|
94
|
+
|
95
|
+
pub fn str_lengths(&self) -> Self {
|
96
|
+
let function = |s: Series| {
|
97
|
+
let ca = s.utf8()?;
|
98
|
+
Ok(Some(ca.str_lengths().into_series()))
|
99
|
+
};
|
100
|
+
self.clone()
|
101
|
+
.inner
|
102
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
103
|
+
.with_fmt("str.lengths")
|
104
|
+
.into()
|
105
|
+
}
|
106
|
+
|
107
|
+
pub fn str_n_chars(&self) -> Self {
|
108
|
+
let function = |s: Series| {
|
109
|
+
let ca = s.utf8()?;
|
110
|
+
Ok(Some(ca.str_n_chars().into_series()))
|
111
|
+
};
|
112
|
+
self.clone()
|
113
|
+
.inner
|
114
|
+
.map(function, GetOutput::from_type(DataType::UInt32))
|
115
|
+
.with_fmt("str.n_chars")
|
116
|
+
.into()
|
117
|
+
}
|
118
|
+
|
119
|
+
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
120
|
+
self.inner
|
121
|
+
.clone()
|
122
|
+
.str()
|
123
|
+
.replace_n(pat.inner.clone(), val.inner.clone(), literal, n)
|
124
|
+
.into()
|
125
|
+
}
|
126
|
+
|
127
|
+
pub fn str_replace_all(&self, pat: &RbExpr, val: &RbExpr, literal: bool) -> Self {
|
128
|
+
self.inner
|
129
|
+
.clone()
|
130
|
+
.str()
|
131
|
+
.replace_all(pat.inner.clone(), val.inner.clone(), literal)
|
132
|
+
.into()
|
133
|
+
}
|
134
|
+
|
135
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
136
|
+
self.clone().inner.str().zfill(alignment).into()
|
137
|
+
}
|
138
|
+
|
139
|
+
pub fn str_ljust(&self, width: usize, fillchar: char) -> Self {
|
140
|
+
self.clone().inner.str().ljust(width, fillchar).into()
|
141
|
+
}
|
142
|
+
|
143
|
+
pub fn str_rjust(&self, width: usize, fillchar: char) -> Self {
|
144
|
+
self.clone().inner.str().rjust(width, fillchar).into()
|
145
|
+
}
|
146
|
+
|
147
|
+
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
148
|
+
match literal {
|
149
|
+
Some(true) => self
|
150
|
+
.inner
|
151
|
+
.clone()
|
152
|
+
.str()
|
153
|
+
.contains_literal(pat.inner.clone())
|
154
|
+
.into(),
|
155
|
+
_ => self
|
156
|
+
.inner
|
157
|
+
.clone()
|
158
|
+
.str()
|
159
|
+
.contains(pat.inner.clone(), strict)
|
160
|
+
.into(),
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
pub fn str_ends_with(&self, sub: &RbExpr) -> Self {
|
165
|
+
self.inner.clone().str().ends_with(sub.inner.clone()).into()
|
166
|
+
}
|
167
|
+
|
168
|
+
pub fn str_starts_with(&self, sub: &RbExpr) -> Self {
|
169
|
+
self.inner
|
170
|
+
.clone()
|
171
|
+
.str()
|
172
|
+
.starts_with(sub.inner.clone())
|
173
|
+
.into()
|
174
|
+
}
|
175
|
+
|
176
|
+
pub fn str_hex_encode(&self) -> Self {
|
177
|
+
self.clone()
|
178
|
+
.inner
|
179
|
+
.map(
|
180
|
+
move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
|
181
|
+
GetOutput::same_type(),
|
182
|
+
)
|
183
|
+
.with_fmt("str.hex_encode")
|
184
|
+
.into()
|
185
|
+
}
|
186
|
+
|
187
|
+
pub fn str_hex_decode(&self, strict: bool) -> Self {
|
188
|
+
self.clone()
|
189
|
+
.inner
|
190
|
+
.map(
|
191
|
+
move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
192
|
+
GetOutput::same_type(),
|
193
|
+
)
|
194
|
+
.with_fmt("str.hex_decode")
|
195
|
+
.into()
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn str_base64_encode(&self) -> Self {
|
199
|
+
self.clone()
|
200
|
+
.inner
|
201
|
+
.map(
|
202
|
+
move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
|
203
|
+
GetOutput::same_type(),
|
204
|
+
)
|
205
|
+
.with_fmt("str.base64_encode")
|
206
|
+
.into()
|
207
|
+
}
|
208
|
+
|
209
|
+
pub fn str_base64_decode(&self, strict: bool) -> Self {
|
210
|
+
self.clone()
|
211
|
+
.inner
|
212
|
+
.map(
|
213
|
+
move |s| {
|
214
|
+
s.utf8()?
|
215
|
+
.base64_decode(strict)
|
216
|
+
.map(|s| Some(s.into_series()))
|
217
|
+
},
|
218
|
+
GetOutput::same_type(),
|
219
|
+
)
|
220
|
+
.with_fmt("str.base64_decode")
|
221
|
+
.into()
|
222
|
+
}
|
223
|
+
|
224
|
+
pub fn str_parse_int(&self, radix: u32, strict: bool) -> Self {
|
225
|
+
self.inner
|
226
|
+
.clone()
|
227
|
+
.str()
|
228
|
+
.from_radix(radix, strict)
|
229
|
+
.with_fmt("str.parse_int")
|
230
|
+
.into()
|
231
|
+
}
|
232
|
+
|
233
|
+
pub fn str_json_extract(
|
234
|
+
&self,
|
235
|
+
dtype: Option<Wrap<DataType>>,
|
236
|
+
infer_schema_len: Option<usize>,
|
237
|
+
) -> Self {
|
238
|
+
let dtype = dtype.map(|wrap| wrap.0);
|
239
|
+
|
240
|
+
let output_type = match dtype.clone() {
|
241
|
+
Some(dtype) => GetOutput::from_type(dtype),
|
242
|
+
None => GetOutput::from_type(DataType::Unknown),
|
243
|
+
};
|
244
|
+
|
245
|
+
let function = move |s: Series| {
|
246
|
+
let ca = s.utf8()?;
|
247
|
+
match ca.json_extract(dtype.clone(), infer_schema_len) {
|
248
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
249
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
250
|
+
}
|
251
|
+
};
|
252
|
+
|
253
|
+
self.clone()
|
254
|
+
.inner
|
255
|
+
.map(function, output_type)
|
256
|
+
.with_fmt("str.json_extract")
|
257
|
+
.into()
|
258
|
+
}
|
259
|
+
|
260
|
+
pub fn str_json_path_match(&self, pat: String) -> Self {
|
261
|
+
let function = move |s: Series| {
|
262
|
+
let ca = s.utf8()?;
|
263
|
+
match ca.json_path_match(&pat) {
|
264
|
+
Ok(ca) => Ok(Some(ca.into_series())),
|
265
|
+
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
266
|
+
}
|
267
|
+
};
|
268
|
+
self.clone()
|
269
|
+
.inner
|
270
|
+
.map(function, GetOutput::from_type(DataType::Utf8))
|
271
|
+
.with_fmt("str.json_path_match")
|
272
|
+
.into()
|
273
|
+
}
|
274
|
+
|
275
|
+
pub fn str_extract(&self, pat: String, group_index: usize) -> Self {
|
276
|
+
self.inner.clone().str().extract(&pat, group_index).into()
|
277
|
+
}
|
278
|
+
|
279
|
+
pub fn str_extract_all(&self, pat: &RbExpr) -> Self {
|
280
|
+
self.inner
|
281
|
+
.clone()
|
282
|
+
.str()
|
283
|
+
.extract_all(pat.inner.clone())
|
284
|
+
.into()
|
285
|
+
}
|
286
|
+
|
287
|
+
pub fn str_count_match(&self, pat: String) -> Self {
|
288
|
+
self.inner.clone().str().count_match(&pat).into()
|
289
|
+
}
|
290
|
+
|
291
|
+
pub fn str_split(&self, by: String) -> Self {
|
292
|
+
self.inner.clone().str().split(&by).into()
|
293
|
+
}
|
294
|
+
|
295
|
+
pub fn str_split_inclusive(&self, by: String) -> Self {
|
296
|
+
self.inner.clone().str().split_inclusive(&by).into()
|
297
|
+
}
|
298
|
+
|
299
|
+
pub fn str_split_exact(&self, by: String, n: usize) -> Self {
|
300
|
+
self.inner.clone().str().split_exact(&by, n).into()
|
301
|
+
}
|
302
|
+
|
303
|
+
pub fn str_split_exact_inclusive(&self, by: String, n: usize) -> Self {
|
304
|
+
self.inner
|
305
|
+
.clone()
|
306
|
+
.str()
|
307
|
+
.split_exact_inclusive(&by, n)
|
308
|
+
.into()
|
309
|
+
}
|
310
|
+
|
311
|
+
pub fn str_splitn(&self, by: String, n: usize) -> Self {
|
312
|
+
self.inner.clone().str().splitn(&by, n).into()
|
313
|
+
}
|
314
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::RbExpr;
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn struct_field_by_name(&self, name: String) -> Self {
|
5
|
+
self.inner.clone().struct_().field_by_name(&name).into()
|
6
|
+
}
|
7
|
+
|
8
|
+
pub fn struct_field_by_index(&self, index: i64) -> Self {
|
9
|
+
self.inner.clone().struct_().field_by_index(index).into()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn struct_rename_fields(&self, names: Vec<String>) -> Self {
|
13
|
+
self.inner.clone().struct_().rename_fields(names).into()
|
14
|
+
}
|
15
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
mod array;
|
2
|
+
mod binary;
|
3
|
+
mod categorical;
|
4
|
+
mod datetime;
|
5
|
+
mod general;
|
6
|
+
mod list;
|
7
|
+
mod meta;
|
8
|
+
mod string;
|
9
|
+
mod r#struct;
|
10
|
+
|
11
|
+
use magnus::RArray;
|
12
|
+
use polars::lazy::dsl::Expr;
|
13
|
+
|
14
|
+
use crate::RbResult;
|
15
|
+
|
16
|
+
#[magnus::wrap(class = "Polars::RbExpr")]
|
17
|
+
#[derive(Clone)]
|
18
|
+
pub struct RbExpr {
|
19
|
+
pub inner: Expr,
|
20
|
+
}
|
21
|
+
|
22
|
+
impl From<Expr> for RbExpr {
|
23
|
+
fn from(inner: Expr) -> Self {
|
24
|
+
RbExpr { inner }
|
25
|
+
}
|
26
|
+
}
|
27
|
+
|
28
|
+
pub fn rb_exprs_to_exprs(rb_exprs: RArray) -> RbResult<Vec<Expr>> {
|
29
|
+
let mut exprs = Vec::new();
|
30
|
+
for item in rb_exprs.each() {
|
31
|
+
exprs.push(item?.try_convert::<&RbExpr>()?.inner.clone());
|
32
|
+
}
|
33
|
+
Ok(exprs)
|
34
|
+
}
|
@@ -0,0 +1,93 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::{functions, time};
|
3
|
+
use polars_core::datatypes::{TimeUnit, TimeZone};
|
4
|
+
use polars_core::prelude::{DataFrame, IntoSeries};
|
5
|
+
|
6
|
+
use crate::conversion::{get_df, get_series, Wrap};
|
7
|
+
use crate::error::RbPolarsErr;
|
8
|
+
use crate::prelude::{ClosedWindow, Duration};
|
9
|
+
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
|
+
|
11
|
+
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
12
|
+
use polars_core::error::PolarsResult;
|
13
|
+
|
14
|
+
let mut iter = seq.each();
|
15
|
+
let first = iter.next().unwrap()?;
|
16
|
+
|
17
|
+
let first_rdf = get_df(first)?;
|
18
|
+
let identity_df = first_rdf.slice(0, 0);
|
19
|
+
|
20
|
+
let mut rdfs: Vec<PolarsResult<DataFrame>> = vec![Ok(first_rdf)];
|
21
|
+
|
22
|
+
for item in iter {
|
23
|
+
let rdf = get_df(item?)?;
|
24
|
+
rdfs.push(Ok(rdf));
|
25
|
+
}
|
26
|
+
|
27
|
+
let identity = Ok(identity_df);
|
28
|
+
|
29
|
+
let df = rdfs
|
30
|
+
.into_iter()
|
31
|
+
.fold(identity, |acc: PolarsResult<DataFrame>, df| {
|
32
|
+
let mut acc = acc?;
|
33
|
+
acc.vstack_mut(&df?)?;
|
34
|
+
Ok(acc)
|
35
|
+
})
|
36
|
+
.map_err(RbPolarsErr::from)?;
|
37
|
+
|
38
|
+
Ok(df.into())
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
42
|
+
let mut iter = seq.each();
|
43
|
+
let first = iter.next().unwrap()?;
|
44
|
+
|
45
|
+
let mut s = get_series(first)?;
|
46
|
+
|
47
|
+
for res in iter {
|
48
|
+
let item = res?;
|
49
|
+
let item = get_series(item)?;
|
50
|
+
s.append(&item).map_err(RbPolarsErr::from)?;
|
51
|
+
}
|
52
|
+
Ok(s.into())
|
53
|
+
}
|
54
|
+
|
55
|
+
pub fn date_range(
|
56
|
+
start: i64,
|
57
|
+
stop: i64,
|
58
|
+
every: String,
|
59
|
+
closed: Wrap<ClosedWindow>,
|
60
|
+
name: String,
|
61
|
+
tu: Wrap<TimeUnit>,
|
62
|
+
tz: Option<TimeZone>,
|
63
|
+
) -> RbResult<RbSeries> {
|
64
|
+
let date_range = time::date_range_impl(
|
65
|
+
&name,
|
66
|
+
start,
|
67
|
+
stop,
|
68
|
+
Duration::parse(&every),
|
69
|
+
closed.0,
|
70
|
+
tu.0,
|
71
|
+
tz.as_ref(),
|
72
|
+
)
|
73
|
+
.map_err(RbPolarsErr::from)?;
|
74
|
+
Ok(date_range.into_series().into())
|
75
|
+
}
|
76
|
+
|
77
|
+
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
78
|
+
let mut dfs = Vec::new();
|
79
|
+
for item in seq.each() {
|
80
|
+
dfs.push(get_df(item?)?);
|
81
|
+
}
|
82
|
+
let df = functions::diag_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
83
|
+
Ok(df.into())
|
84
|
+
}
|
85
|
+
|
86
|
+
pub fn hor_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
87
|
+
let mut dfs = Vec::new();
|
88
|
+
for item in seq.each() {
|
89
|
+
dfs.push(get_df(item?)?);
|
90
|
+
}
|
91
|
+
let df = functions::hor_concat_df(&dfs).map_err(RbPolarsErr::from)?;
|
92
|
+
Ok(df.into())
|
93
|
+
}
|
@@ -0,0 +1,34 @@
|
|
1
|
+
use magnus::{RHash, Value};
|
2
|
+
|
3
|
+
use crate::conversion::Wrap;
|
4
|
+
use crate::file::get_file_like;
|
5
|
+
use crate::prelude::DataType;
|
6
|
+
use crate::{RbPolarsErr, RbResult};
|
7
|
+
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<Value> {
|
9
|
+
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
+
let mut r = get_file_like(rb_f, false)?;
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
12
|
+
|
13
|
+
let dict = RHash::new();
|
14
|
+
for field in metadata.schema.fields {
|
15
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
+
dict.aset(field.name, dt)?;
|
17
|
+
}
|
18
|
+
Ok(dict.into())
|
19
|
+
}
|
20
|
+
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<Value> {
|
22
|
+
use polars_core::export::arrow::io::parquet::read::{infer_schema, read_metadata};
|
23
|
+
|
24
|
+
let mut r = get_file_like(rb_f, false)?;
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::arrow)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::arrow)?;
|
27
|
+
|
28
|
+
let dict = RHash::new();
|
29
|
+
for field in arrow_schema.fields {
|
30
|
+
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
|
+
dict.aset(field.name, dt)?;
|
32
|
+
}
|
33
|
+
Ok(dict.into())
|
34
|
+
}
|