polars-df 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/Cargo.lock +597 -599
- data/Cargo.toml +1 -0
- data/README.md +8 -7
- data/ext/polars/Cargo.toml +20 -10
- data/ext/polars/src/batched_csv.rs +27 -28
- data/ext/polars/src/conversion.rs +135 -106
- data/ext/polars/src/dataframe.rs +140 -131
- data/ext/polars/src/error.rs +0 -5
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +129 -286
- data/ext/polars/src/expr/list.rs +17 -9
- data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +201 -0
- data/ext/polars/src/expr/string.rs +94 -67
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +66 -41
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +41 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +74 -60
- data/ext/polars/src/lib.rs +175 -91
- data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
- data/ext/polars/src/{apply → map}/mod.rs +5 -5
- data/ext/polars/src/{apply → map}/series.rs +18 -22
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/rb_modules.rs +22 -7
- data/ext/polars/src/series/aggregation.rs +3 -0
- data/ext/polars/src/series/construction.rs +5 -5
- data/ext/polars/src/series/export.rs +4 -4
- data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
- data/ext/polars/src/sql.rs +46 -0
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +182 -145
- data/lib/polars/data_types.rb +4 -1
- data/lib/polars/date_time_expr.rb +23 -28
- data/lib/polars/date_time_name_space.rb +17 -37
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +398 -110
- data/lib/polars/functions.rb +29 -37
- data/lib/polars/group_by.rb +38 -55
- data/lib/polars/io.rb +40 -5
- data/lib/polars/lazy_frame.rb +116 -89
- data/lib/polars/lazy_functions.rb +40 -68
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +12 -8
- data/lib/polars/list_name_space.rb +2 -2
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +315 -43
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +114 -60
- data/lib/polars/string_name_space.rb +19 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/utils.rb +25 -13
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +3 -0
- metadata +23 -11
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
use std::any::Any;
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::RbExpr;
|
6
|
+
|
7
|
+
impl RbExpr {
|
8
|
+
pub fn rolling_sum(
|
9
|
+
&self,
|
10
|
+
window_size: String,
|
11
|
+
weights: Option<Vec<f64>>,
|
12
|
+
min_periods: usize,
|
13
|
+
center: bool,
|
14
|
+
by: Option<String>,
|
15
|
+
closed: Option<Wrap<ClosedWindow>>,
|
16
|
+
) -> Self {
|
17
|
+
let options = RollingOptions {
|
18
|
+
window_size: Duration::parse(&window_size),
|
19
|
+
weights,
|
20
|
+
min_periods,
|
21
|
+
center,
|
22
|
+
by,
|
23
|
+
closed_window: closed.map(|c| c.0),
|
24
|
+
..Default::default()
|
25
|
+
};
|
26
|
+
self.inner.clone().rolling_sum(options).into()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn rolling_min(
|
30
|
+
&self,
|
31
|
+
window_size: String,
|
32
|
+
weights: Option<Vec<f64>>,
|
33
|
+
min_periods: usize,
|
34
|
+
center: bool,
|
35
|
+
by: Option<String>,
|
36
|
+
closed: Option<Wrap<ClosedWindow>>,
|
37
|
+
) -> Self {
|
38
|
+
let options = RollingOptions {
|
39
|
+
window_size: Duration::parse(&window_size),
|
40
|
+
weights,
|
41
|
+
min_periods,
|
42
|
+
center,
|
43
|
+
by,
|
44
|
+
closed_window: closed.map(|c| c.0),
|
45
|
+
..Default::default()
|
46
|
+
};
|
47
|
+
self.inner.clone().rolling_min(options).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn rolling_max(
|
51
|
+
&self,
|
52
|
+
window_size: String,
|
53
|
+
weights: Option<Vec<f64>>,
|
54
|
+
min_periods: usize,
|
55
|
+
center: bool,
|
56
|
+
by: Option<String>,
|
57
|
+
closed: Option<Wrap<ClosedWindow>>,
|
58
|
+
) -> Self {
|
59
|
+
let options = RollingOptions {
|
60
|
+
window_size: Duration::parse(&window_size),
|
61
|
+
weights,
|
62
|
+
min_periods,
|
63
|
+
center,
|
64
|
+
by,
|
65
|
+
closed_window: closed.map(|c| c.0),
|
66
|
+
..Default::default()
|
67
|
+
};
|
68
|
+
self.inner.clone().rolling_max(options).into()
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn rolling_mean(
|
72
|
+
&self,
|
73
|
+
window_size: String,
|
74
|
+
weights: Option<Vec<f64>>,
|
75
|
+
min_periods: usize,
|
76
|
+
center: bool,
|
77
|
+
by: Option<String>,
|
78
|
+
closed: Option<Wrap<ClosedWindow>>,
|
79
|
+
) -> Self {
|
80
|
+
let options = RollingOptions {
|
81
|
+
window_size: Duration::parse(&window_size),
|
82
|
+
weights,
|
83
|
+
min_periods,
|
84
|
+
center,
|
85
|
+
by,
|
86
|
+
closed_window: closed.map(|c| c.0),
|
87
|
+
..Default::default()
|
88
|
+
};
|
89
|
+
|
90
|
+
self.inner.clone().rolling_mean(options).into()
|
91
|
+
}
|
92
|
+
|
93
|
+
#[allow(clippy::too_many_arguments)]
|
94
|
+
pub fn rolling_std(
|
95
|
+
&self,
|
96
|
+
window_size: String,
|
97
|
+
weights: Option<Vec<f64>>,
|
98
|
+
min_periods: usize,
|
99
|
+
center: bool,
|
100
|
+
by: Option<String>,
|
101
|
+
closed: Option<Wrap<ClosedWindow>>,
|
102
|
+
ddof: u8,
|
103
|
+
warn_if_unsorted: bool,
|
104
|
+
) -> Self {
|
105
|
+
let options = RollingOptions {
|
106
|
+
window_size: Duration::parse(&window_size),
|
107
|
+
weights,
|
108
|
+
min_periods,
|
109
|
+
center,
|
110
|
+
by,
|
111
|
+
closed_window: closed.map(|c| c.0),
|
112
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
113
|
+
warn_if_unsorted,
|
114
|
+
};
|
115
|
+
|
116
|
+
self.inner.clone().rolling_std(options).into()
|
117
|
+
}
|
118
|
+
|
119
|
+
#[allow(clippy::too_many_arguments)]
|
120
|
+
pub fn rolling_var(
|
121
|
+
&self,
|
122
|
+
window_size: String,
|
123
|
+
weights: Option<Vec<f64>>,
|
124
|
+
min_periods: usize,
|
125
|
+
center: bool,
|
126
|
+
by: Option<String>,
|
127
|
+
closed: Option<Wrap<ClosedWindow>>,
|
128
|
+
ddof: u8,
|
129
|
+
warn_if_unsorted: bool,
|
130
|
+
) -> Self {
|
131
|
+
let options = RollingOptions {
|
132
|
+
window_size: Duration::parse(&window_size),
|
133
|
+
weights,
|
134
|
+
min_periods,
|
135
|
+
center,
|
136
|
+
by,
|
137
|
+
closed_window: closed.map(|c| c.0),
|
138
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
139
|
+
warn_if_unsorted,
|
140
|
+
};
|
141
|
+
|
142
|
+
self.inner.clone().rolling_var(options).into()
|
143
|
+
}
|
144
|
+
|
145
|
+
pub fn rolling_median(
|
146
|
+
&self,
|
147
|
+
window_size: String,
|
148
|
+
weights: Option<Vec<f64>>,
|
149
|
+
min_periods: usize,
|
150
|
+
center: bool,
|
151
|
+
by: Option<String>,
|
152
|
+
closed: Option<Wrap<ClosedWindow>>,
|
153
|
+
warn_if_unsorted: bool,
|
154
|
+
) -> Self {
|
155
|
+
let options = RollingOptions {
|
156
|
+
window_size: Duration::parse(&window_size),
|
157
|
+
weights,
|
158
|
+
min_periods,
|
159
|
+
center,
|
160
|
+
by,
|
161
|
+
closed_window: closed.map(|c| c.0),
|
162
|
+
fn_params: None,
|
163
|
+
warn_if_unsorted,
|
164
|
+
};
|
165
|
+
self.inner.clone().rolling_median(options).into()
|
166
|
+
}
|
167
|
+
|
168
|
+
#[allow(clippy::too_many_arguments)]
|
169
|
+
pub fn rolling_quantile(
|
170
|
+
&self,
|
171
|
+
quantile: f64,
|
172
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
173
|
+
window_size: String,
|
174
|
+
weights: Option<Vec<f64>>,
|
175
|
+
min_periods: usize,
|
176
|
+
center: bool,
|
177
|
+
by: Option<String>,
|
178
|
+
closed: Option<Wrap<ClosedWindow>>,
|
179
|
+
warn_if_unsorted: bool,
|
180
|
+
) -> Self {
|
181
|
+
let options = RollingOptions {
|
182
|
+
window_size: Duration::parse(&window_size),
|
183
|
+
weights,
|
184
|
+
min_periods,
|
185
|
+
center,
|
186
|
+
by,
|
187
|
+
closed_window: closed.map(|c| c.0),
|
188
|
+
fn_params: None,
|
189
|
+
warn_if_unsorted,
|
190
|
+
};
|
191
|
+
|
192
|
+
self.inner
|
193
|
+
.clone()
|
194
|
+
.rolling_quantile(interpolation.0, quantile, options)
|
195
|
+
.into()
|
196
|
+
}
|
197
|
+
|
198
|
+
pub fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {
|
199
|
+
self.inner.clone().rolling_skew(window_size, bias).into()
|
200
|
+
}
|
201
|
+
}
|
@@ -4,8 +4,12 @@ use crate::conversion::Wrap;
|
|
4
4
|
use crate::RbExpr;
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
|
-
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
-
self.inner
|
7
|
+
pub fn str_concat(&self, delimiter: String, ignore_nulls: bool) -> Self {
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.str()
|
11
|
+
.concat(&delimiter, ignore_nulls)
|
12
|
+
.into()
|
9
13
|
}
|
10
14
|
|
11
15
|
pub fn str_to_date(
|
@@ -24,6 +28,7 @@ impl RbExpr {
|
|
24
28
|
self.inner.clone().str().to_date(options).into()
|
25
29
|
}
|
26
30
|
|
31
|
+
#[allow(clippy::too_many_arguments)]
|
27
32
|
pub fn str_to_datetime(
|
28
33
|
&self,
|
29
34
|
format: Option<String>,
|
@@ -32,6 +37,7 @@ impl RbExpr {
|
|
32
37
|
strict: bool,
|
33
38
|
exact: bool,
|
34
39
|
cache: bool,
|
40
|
+
ambiguous: &Self,
|
35
41
|
) -> Self {
|
36
42
|
let options = StrptimeOptions {
|
37
43
|
format,
|
@@ -42,7 +48,12 @@ impl RbExpr {
|
|
42
48
|
self.inner
|
43
49
|
.clone()
|
44
50
|
.str()
|
45
|
-
.to_datetime(
|
51
|
+
.to_datetime(
|
52
|
+
time_unit.map(|tu| tu.0),
|
53
|
+
time_zone,
|
54
|
+
options,
|
55
|
+
ambiguous.inner.clone(),
|
56
|
+
)
|
46
57
|
.into()
|
47
58
|
}
|
48
59
|
|
@@ -56,30 +67,50 @@ impl RbExpr {
|
|
56
67
|
self.inner.clone().str().to_time(options).into()
|
57
68
|
}
|
58
69
|
|
59
|
-
pub fn
|
60
|
-
self.inner
|
70
|
+
pub fn str_strip_chars(&self, matches: &Self) -> Self {
|
71
|
+
self.inner
|
72
|
+
.clone()
|
73
|
+
.str()
|
74
|
+
.strip_chars(matches.inner.clone())
|
75
|
+
.into()
|
61
76
|
}
|
62
77
|
|
63
|
-
pub fn
|
64
|
-
self.inner
|
78
|
+
pub fn str_strip_chars_start(&self, matches: &Self) -> Self {
|
79
|
+
self.inner
|
80
|
+
.clone()
|
81
|
+
.str()
|
82
|
+
.strip_chars_start(matches.inner.clone())
|
83
|
+
.into()
|
65
84
|
}
|
66
85
|
|
67
|
-
pub fn
|
68
|
-
self.inner
|
86
|
+
pub fn str_strip_chars_end(&self, matches: &Self) -> Self {
|
87
|
+
self.inner
|
88
|
+
.clone()
|
89
|
+
.str()
|
90
|
+
.strip_chars_end(matches.inner.clone())
|
91
|
+
.into()
|
69
92
|
}
|
70
93
|
|
71
|
-
pub fn
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
94
|
+
pub fn str_strip_prefix(&self, prefix: &Self) -> Self {
|
95
|
+
self.inner
|
96
|
+
.clone()
|
97
|
+
.str()
|
98
|
+
.strip_prefix(prefix.inner.clone())
|
99
|
+
.into()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn str_strip_suffix(&self, suffix: &Self) -> Self {
|
103
|
+
self.inner
|
104
|
+
.clone()
|
105
|
+
.str()
|
106
|
+
.strip_suffix(suffix.inner.clone())
|
80
107
|
.into()
|
81
108
|
}
|
82
109
|
|
110
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
111
|
+
self.inner.clone().str().slice(start, length).into()
|
112
|
+
}
|
113
|
+
|
83
114
|
pub fn str_explode(&self) -> Self {
|
84
115
|
self.inner.clone().str().explode().into()
|
85
116
|
}
|
@@ -92,28 +123,12 @@ impl RbExpr {
|
|
92
123
|
self.inner.clone().str().to_lowercase().into()
|
93
124
|
}
|
94
125
|
|
95
|
-
pub fn
|
96
|
-
|
97
|
-
let ca = s.utf8()?;
|
98
|
-
Ok(Some(ca.str_lengths().into_series()))
|
99
|
-
};
|
100
|
-
self.clone()
|
101
|
-
.inner
|
102
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
103
|
-
.with_fmt("str.lengths")
|
104
|
-
.into()
|
126
|
+
pub fn str_len_bytes(&self) -> Self {
|
127
|
+
self.inner.clone().str().len_bytes().into()
|
105
128
|
}
|
106
129
|
|
107
|
-
pub fn
|
108
|
-
|
109
|
-
let ca = s.utf8()?;
|
110
|
-
Ok(Some(ca.str_n_chars().into_series()))
|
111
|
-
};
|
112
|
-
self.clone()
|
113
|
-
.inner
|
114
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
115
|
-
.with_fmt("str.n_chars")
|
116
|
-
.into()
|
130
|
+
pub fn str_len_chars(&self) -> Self {
|
131
|
+
self.inner.clone().str().len_chars().into()
|
117
132
|
}
|
118
133
|
|
119
134
|
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
@@ -132,16 +147,16 @@ impl RbExpr {
|
|
132
147
|
.into()
|
133
148
|
}
|
134
149
|
|
135
|
-
pub fn
|
136
|
-
self.clone().inner.str().
|
150
|
+
pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
|
151
|
+
self.clone().inner.str().pad_start(length, fillchar).into()
|
137
152
|
}
|
138
153
|
|
139
|
-
pub fn
|
140
|
-
self.clone().inner.str().
|
154
|
+
pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
|
155
|
+
self.clone().inner.str().pad_end(length, fillchar).into()
|
141
156
|
}
|
142
157
|
|
143
|
-
pub fn
|
144
|
-
self.clone().inner.str().
|
158
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
159
|
+
self.clone().inner.str().zfill(alignment).into()
|
145
160
|
}
|
146
161
|
|
147
162
|
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
@@ -177,7 +192,7 @@ impl RbExpr {
|
|
177
192
|
self.clone()
|
178
193
|
.inner
|
179
194
|
.map(
|
180
|
-
move |s| s.
|
195
|
+
move |s| s.str().map(|s| Some(s.hex_encode().into_series())),
|
181
196
|
GetOutput::same_type(),
|
182
197
|
)
|
183
198
|
.with_fmt("str.hex_encode")
|
@@ -188,7 +203,7 @@ impl RbExpr {
|
|
188
203
|
self.clone()
|
189
204
|
.inner
|
190
205
|
.map(
|
191
|
-
move |s| s.
|
206
|
+
move |s| s.str()?.hex_decode(strict).map(|s| Some(s.into_series())),
|
192
207
|
GetOutput::same_type(),
|
193
208
|
)
|
194
209
|
.with_fmt("str.hex_decode")
|
@@ -199,7 +214,7 @@ impl RbExpr {
|
|
199
214
|
self.clone()
|
200
215
|
.inner
|
201
216
|
.map(
|
202
|
-
move |s| s.
|
217
|
+
move |s| s.str().map(|s| Some(s.base64_encode().into_series())),
|
203
218
|
GetOutput::same_type(),
|
204
219
|
)
|
205
220
|
.with_fmt("str.base64_encode")
|
@@ -211,7 +226,7 @@ impl RbExpr {
|
|
211
226
|
.inner
|
212
227
|
.map(
|
213
228
|
move |s| {
|
214
|
-
s.
|
229
|
+
s.str()?
|
215
230
|
.base64_decode(strict)
|
216
231
|
.map(|s| Some(s.into_series()))
|
217
232
|
},
|
@@ -221,11 +236,11 @@ impl RbExpr {
|
|
221
236
|
.into()
|
222
237
|
}
|
223
238
|
|
224
|
-
pub fn
|
239
|
+
pub fn str_to_integer(&self, base: u32, strict: bool) -> Self {
|
225
240
|
self.inner
|
226
241
|
.clone()
|
227
242
|
.str()
|
228
|
-
.
|
243
|
+
.to_integer(base, strict)
|
229
244
|
.with_fmt("str.parse_int")
|
230
245
|
.into()
|
231
246
|
}
|
@@ -243,8 +258,8 @@ impl RbExpr {
|
|
243
258
|
};
|
244
259
|
|
245
260
|
let function = move |s: Series| {
|
246
|
-
let ca = s.
|
247
|
-
match ca.
|
261
|
+
let ca = s.str()?;
|
262
|
+
match ca.json_decode(dtype.clone(), infer_schema_len) {
|
248
263
|
Ok(ca) => Ok(Some(ca.into_series())),
|
249
264
|
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
250
265
|
}
|
@@ -253,13 +268,13 @@ impl RbExpr {
|
|
253
268
|
self.clone()
|
254
269
|
.inner
|
255
270
|
.map(function, output_type)
|
256
|
-
.with_fmt("str.
|
271
|
+
.with_fmt("str.json_decode")
|
257
272
|
.into()
|
258
273
|
}
|
259
274
|
|
260
275
|
pub fn str_json_path_match(&self, pat: String) -> Self {
|
261
276
|
let function = move |s: Series| {
|
262
|
-
let ca = s.
|
277
|
+
let ca = s.str()?;
|
263
278
|
match ca.json_path_match(&pat) {
|
264
279
|
Ok(ca) => Ok(Some(ca.into_series())),
|
265
280
|
Err(e) => Err(PolarsError::ComputeError(format!("{:?}", e).into())),
|
@@ -267,7 +282,7 @@ impl RbExpr {
|
|
267
282
|
};
|
268
283
|
self.clone()
|
269
284
|
.inner
|
270
|
-
.map(function, GetOutput::from_type(DataType::
|
285
|
+
.map(function, GetOutput::from_type(DataType::String))
|
271
286
|
.with_fmt("str.json_path_match")
|
272
287
|
.into()
|
273
288
|
}
|
@@ -284,31 +299,43 @@ impl RbExpr {
|
|
284
299
|
.into()
|
285
300
|
}
|
286
301
|
|
287
|
-
pub fn
|
288
|
-
self.inner
|
302
|
+
pub fn str_count_matches(&self, pat: &Self, literal: bool) -> Self {
|
303
|
+
self.inner
|
304
|
+
.clone()
|
305
|
+
.str()
|
306
|
+
.count_matches(pat.inner.clone(), literal)
|
307
|
+
.into()
|
289
308
|
}
|
290
309
|
|
291
|
-
pub fn str_split(&self, by:
|
292
|
-
self.inner.clone().str().split(
|
310
|
+
pub fn str_split(&self, by: &Self) -> Self {
|
311
|
+
self.inner.clone().str().split(by.inner.clone()).into()
|
293
312
|
}
|
294
313
|
|
295
|
-
pub fn str_split_inclusive(&self, by:
|
296
|
-
self.inner
|
314
|
+
pub fn str_split_inclusive(&self, by: &Self) -> Self {
|
315
|
+
self.inner
|
316
|
+
.clone()
|
317
|
+
.str()
|
318
|
+
.split_inclusive(by.inner.clone())
|
319
|
+
.into()
|
297
320
|
}
|
298
321
|
|
299
|
-
pub fn str_split_exact(&self, by:
|
300
|
-
self.inner
|
322
|
+
pub fn str_split_exact(&self, by: &Self, n: usize) -> Self {
|
323
|
+
self.inner
|
324
|
+
.clone()
|
325
|
+
.str()
|
326
|
+
.split_exact(by.inner.clone(), n)
|
327
|
+
.into()
|
301
328
|
}
|
302
329
|
|
303
|
-
pub fn str_split_exact_inclusive(&self, by:
|
330
|
+
pub fn str_split_exact_inclusive(&self, by: &Self, n: usize) -> Self {
|
304
331
|
self.inner
|
305
332
|
.clone()
|
306
333
|
.str()
|
307
|
-
.split_exact_inclusive(
|
334
|
+
.split_exact_inclusive(by.inner.clone(), n)
|
308
335
|
.into()
|
309
336
|
}
|
310
337
|
|
311
|
-
pub fn str_splitn(&self, by:
|
312
|
-
self.inner.clone().str().splitn(
|
338
|
+
pub fn str_splitn(&self, by: &Self, n: usize) -> Self {
|
339
|
+
self.inner.clone().str().splitn(by.inner.clone(), n).into()
|
313
340
|
}
|
314
341
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{exception, Error, RString, Value};
|
1
|
+
use magnus::{exception, prelude::*, Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
@@ -7,7 +7,7 @@ use std::path::PathBuf;
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
let str_slice =
|
10
|
+
let str_slice = PathBuf::try_convert(f)?;
|
11
11
|
let f = if truncate {
|
12
12
|
File::create(str_slice)
|
13
13
|
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
@@ -23,7 +23,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
|
|
23
23
|
// TODO avoid copy
|
24
24
|
Ok(Box::new(Cursor::new(bytes.to_vec())))
|
25
25
|
} else {
|
26
|
-
let p =
|
26
|
+
let p = PathBuf::try_convert(rb_f)?;
|
27
27
|
let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
28
28
|
Ok(Box::new(f))
|
29
29
|
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
|
4
|
+
use crate::rb_exprs_to_exprs;
|
5
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
6
|
+
|
7
|
+
pub fn all_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
8
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
9
|
+
let e = dsl::all_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
10
|
+
Ok(e.into())
|
11
|
+
}
|
12
|
+
|
13
|
+
pub fn any_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
14
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
15
|
+
let e = dsl::any_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
16
|
+
Ok(e.into())
|
17
|
+
}
|
18
|
+
|
19
|
+
pub fn max_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
20
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
21
|
+
let e = dsl::max_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
22
|
+
Ok(e.into())
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
26
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
27
|
+
let e = dsl::min_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
28
|
+
Ok(e.into())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
32
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
34
|
+
Ok(e.into())
|
35
|
+
}
|
@@ -1,11 +1,9 @@
|
|
1
1
|
use magnus::RArray;
|
2
|
-
use polars::
|
3
|
-
use polars_core::
|
4
|
-
use polars_core::prelude::{DataFrame, IntoSeries};
|
2
|
+
use polars::functions;
|
3
|
+
use polars_core::prelude::DataFrame;
|
5
4
|
|
6
|
-
use crate::conversion::{get_df, get_series
|
5
|
+
use crate::conversion::{get_df, get_series};
|
7
6
|
use crate::error::RbPolarsErr;
|
8
|
-
use crate::prelude::{ClosedWindow, Duration};
|
9
7
|
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
8
|
|
11
9
|
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
@@ -52,42 +50,20 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
|
52
50
|
Ok(s.into())
|
53
51
|
}
|
54
52
|
|
55
|
-
pub fn
|
56
|
-
start: i64,
|
57
|
-
stop: i64,
|
58
|
-
every: String,
|
59
|
-
closed: Wrap<ClosedWindow>,
|
60
|
-
name: String,
|
61
|
-
tu: Wrap<TimeUnit>,
|
62
|
-
tz: Option<TimeZone>,
|
63
|
-
) -> RbResult<RbSeries> {
|
64
|
-
let date_range = time::date_range_impl(
|
65
|
-
&name,
|
66
|
-
start,
|
67
|
-
stop,
|
68
|
-
Duration::parse(&every),
|
69
|
-
closed.0,
|
70
|
-
tu.0,
|
71
|
-
tz.as_ref(),
|
72
|
-
)
|
73
|
-
.map_err(RbPolarsErr::from)?;
|
74
|
-
Ok(date_range.into_series().into())
|
75
|
-
}
|
76
|
-
|
77
|
-
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
53
|
+
pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
78
54
|
let mut dfs = Vec::new();
|
79
55
|
for item in seq.each() {
|
80
56
|
dfs.push(get_df(item?)?);
|
81
57
|
}
|
82
|
-
let df = functions::
|
58
|
+
let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
|
83
59
|
Ok(df.into())
|
84
60
|
}
|
85
61
|
|
86
|
-
pub fn
|
62
|
+
pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
87
63
|
let mut dfs = Vec::new();
|
88
64
|
for item in seq.each() {
|
89
65
|
dfs.push(get_df(item?)?);
|
90
66
|
}
|
91
|
-
let df = functions::
|
67
|
+
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
92
68
|
Ok(df.into())
|
93
69
|
}
|
@@ -5,30 +5,30 @@ use crate::file::get_file_like;
|
|
5
5
|
use crate::prelude::DataType;
|
6
6
|
use crate::{RbPolarsErr, RbResult};
|
7
7
|
|
8
|
-
pub fn read_ipc_schema(rb_f: Value) -> RbResult<
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
9
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
10
|
let mut r = get_file_like(rb_f, false)?;
|
11
|
-
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in metadata.schema.fields {
|
14
|
+
for field in &metadata.schema.fields {
|
15
15
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
-
dict.aset(field.name, dt)?;
|
16
|
+
dict.aset(field.name.clone(), dt)?;
|
17
17
|
}
|
18
|
-
Ok(dict
|
18
|
+
Ok(dict)
|
19
19
|
}
|
20
20
|
|
21
|
-
pub fn read_parquet_schema(rb_f: Value) -> RbResult<
|
22
|
-
use
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
|
+
use polars_parquet::read::{infer_schema, read_metadata};
|
23
23
|
|
24
24
|
let mut r = get_file_like(rb_f, false)?;
|
25
|
-
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::
|
26
|
-
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
29
|
for field in arrow_schema.fields {
|
30
30
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
31
|
dict.aset(field.name, dt)?;
|
32
32
|
}
|
33
|
-
Ok(dict
|
33
|
+
Ok(dict)
|
34
34
|
}
|