polars-df 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
use magnus::{block::Proc, value::Opaque, Ruby};
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::RbExpr;
|
5
|
+
|
6
|
+
impl RbExpr {
|
7
|
+
pub fn name_keep(&self) -> Self {
|
8
|
+
self.inner.clone().name().keep().into()
|
9
|
+
}
|
10
|
+
|
11
|
+
pub fn name_map(&self, lambda: Proc) -> Self {
|
12
|
+
let lambda = Opaque::from(lambda);
|
13
|
+
self.inner
|
14
|
+
.clone()
|
15
|
+
.name()
|
16
|
+
.map(move |name| {
|
17
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
18
|
+
let out = lambda.call::<_, String>((name,));
|
19
|
+
match out {
|
20
|
+
Ok(out) => Ok(out),
|
21
|
+
Err(e) => Err(PolarsError::ComputeError(
|
22
|
+
format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
|
23
|
+
)),
|
24
|
+
}
|
25
|
+
})
|
26
|
+
.into()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn name_prefix(&self, prefix: String) -> Self {
|
30
|
+
self.inner.clone().name().prefix(&prefix).into()
|
31
|
+
}
|
32
|
+
|
33
|
+
pub fn name_suffix(&self, suffix: String) -> Self {
|
34
|
+
self.inner.clone().name().suffix(&suffix).into()
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn name_to_lowercase(&self) -> Self {
|
38
|
+
self.inner.clone().name().to_lowercase().into()
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn name_to_uppercase(&self) -> Self {
|
42
|
+
self.inner.clone().name().to_uppercase().into()
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,196 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
use std::any::Any;
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::RbExpr;
|
6
|
+
|
7
|
+
impl RbExpr {
|
8
|
+
pub fn rolling_sum(
|
9
|
+
&self,
|
10
|
+
window_size: String,
|
11
|
+
weights: Option<Vec<f64>>,
|
12
|
+
min_periods: usize,
|
13
|
+
center: bool,
|
14
|
+
by: Option<String>,
|
15
|
+
closed: Option<Wrap<ClosedWindow>>,
|
16
|
+
) -> Self {
|
17
|
+
let options = RollingOptions {
|
18
|
+
window_size: Duration::parse(&window_size),
|
19
|
+
weights,
|
20
|
+
min_periods,
|
21
|
+
center,
|
22
|
+
by,
|
23
|
+
closed_window: closed.map(|c| c.0),
|
24
|
+
..Default::default()
|
25
|
+
};
|
26
|
+
self.inner.clone().rolling_sum(options).into()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn rolling_min(
|
30
|
+
&self,
|
31
|
+
window_size: String,
|
32
|
+
weights: Option<Vec<f64>>,
|
33
|
+
min_periods: usize,
|
34
|
+
center: bool,
|
35
|
+
by: Option<String>,
|
36
|
+
closed: Option<Wrap<ClosedWindow>>,
|
37
|
+
) -> Self {
|
38
|
+
let options = RollingOptions {
|
39
|
+
window_size: Duration::parse(&window_size),
|
40
|
+
weights,
|
41
|
+
min_periods,
|
42
|
+
center,
|
43
|
+
by,
|
44
|
+
closed_window: closed.map(|c| c.0),
|
45
|
+
..Default::default()
|
46
|
+
};
|
47
|
+
self.inner.clone().rolling_min(options).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn rolling_max(
|
51
|
+
&self,
|
52
|
+
window_size: String,
|
53
|
+
weights: Option<Vec<f64>>,
|
54
|
+
min_periods: usize,
|
55
|
+
center: bool,
|
56
|
+
by: Option<String>,
|
57
|
+
closed: Option<Wrap<ClosedWindow>>,
|
58
|
+
) -> Self {
|
59
|
+
let options = RollingOptions {
|
60
|
+
window_size: Duration::parse(&window_size),
|
61
|
+
weights,
|
62
|
+
min_periods,
|
63
|
+
center,
|
64
|
+
by,
|
65
|
+
closed_window: closed.map(|c| c.0),
|
66
|
+
..Default::default()
|
67
|
+
};
|
68
|
+
self.inner.clone().rolling_max(options).into()
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn rolling_mean(
|
72
|
+
&self,
|
73
|
+
window_size: String,
|
74
|
+
weights: Option<Vec<f64>>,
|
75
|
+
min_periods: usize,
|
76
|
+
center: bool,
|
77
|
+
by: Option<String>,
|
78
|
+
closed: Option<Wrap<ClosedWindow>>,
|
79
|
+
) -> Self {
|
80
|
+
let options = RollingOptions {
|
81
|
+
window_size: Duration::parse(&window_size),
|
82
|
+
weights,
|
83
|
+
min_periods,
|
84
|
+
center,
|
85
|
+
by,
|
86
|
+
closed_window: closed.map(|c| c.0),
|
87
|
+
..Default::default()
|
88
|
+
};
|
89
|
+
|
90
|
+
self.inner.clone().rolling_mean(options).into()
|
91
|
+
}
|
92
|
+
|
93
|
+
#[allow(clippy::too_many_arguments)]
|
94
|
+
pub fn rolling_std(
|
95
|
+
&self,
|
96
|
+
window_size: String,
|
97
|
+
weights: Option<Vec<f64>>,
|
98
|
+
min_periods: usize,
|
99
|
+
center: bool,
|
100
|
+
by: Option<String>,
|
101
|
+
closed: Option<Wrap<ClosedWindow>>,
|
102
|
+
ddof: u8,
|
103
|
+
) -> Self {
|
104
|
+
let options = RollingOptions {
|
105
|
+
window_size: Duration::parse(&window_size),
|
106
|
+
weights,
|
107
|
+
min_periods,
|
108
|
+
center,
|
109
|
+
by,
|
110
|
+
closed_window: closed.map(|c| c.0),
|
111
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
112
|
+
};
|
113
|
+
|
114
|
+
self.inner.clone().rolling_std(options).into()
|
115
|
+
}
|
116
|
+
|
117
|
+
#[allow(clippy::too_many_arguments)]
|
118
|
+
pub fn rolling_var(
|
119
|
+
&self,
|
120
|
+
window_size: String,
|
121
|
+
weights: Option<Vec<f64>>,
|
122
|
+
min_periods: usize,
|
123
|
+
center: bool,
|
124
|
+
by: Option<String>,
|
125
|
+
closed: Option<Wrap<ClosedWindow>>,
|
126
|
+
ddof: u8,
|
127
|
+
) -> Self {
|
128
|
+
let options = RollingOptions {
|
129
|
+
window_size: Duration::parse(&window_size),
|
130
|
+
weights,
|
131
|
+
min_periods,
|
132
|
+
center,
|
133
|
+
by,
|
134
|
+
closed_window: closed.map(|c| c.0),
|
135
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
136
|
+
};
|
137
|
+
|
138
|
+
self.inner.clone().rolling_var(options).into()
|
139
|
+
}
|
140
|
+
|
141
|
+
pub fn rolling_median(
|
142
|
+
&self,
|
143
|
+
window_size: String,
|
144
|
+
weights: Option<Vec<f64>>,
|
145
|
+
min_periods: usize,
|
146
|
+
center: bool,
|
147
|
+
by: Option<String>,
|
148
|
+
closed: Option<Wrap<ClosedWindow>>,
|
149
|
+
) -> Self {
|
150
|
+
let options = RollingOptions {
|
151
|
+
window_size: Duration::parse(&window_size),
|
152
|
+
weights,
|
153
|
+
min_periods,
|
154
|
+
center,
|
155
|
+
by,
|
156
|
+
closed_window: closed.map(|c| c.0),
|
157
|
+
fn_params: Some(Arc::new(RollingQuantileParams {
|
158
|
+
prob: 0.5,
|
159
|
+
interpol: QuantileInterpolOptions::Linear,
|
160
|
+
}) as Arc<dyn Any + Send + Sync>),
|
161
|
+
};
|
162
|
+
self.inner.clone().rolling_quantile(options).into()
|
163
|
+
}
|
164
|
+
|
165
|
+
#[allow(clippy::too_many_arguments)]
|
166
|
+
pub fn rolling_quantile(
|
167
|
+
&self,
|
168
|
+
quantile: f64,
|
169
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
170
|
+
window_size: String,
|
171
|
+
weights: Option<Vec<f64>>,
|
172
|
+
min_periods: usize,
|
173
|
+
center: bool,
|
174
|
+
by: Option<String>,
|
175
|
+
closed: Option<Wrap<ClosedWindow>>,
|
176
|
+
) -> Self {
|
177
|
+
let options = RollingOptions {
|
178
|
+
window_size: Duration::parse(&window_size),
|
179
|
+
weights,
|
180
|
+
min_periods,
|
181
|
+
center,
|
182
|
+
by,
|
183
|
+
closed_window: closed.map(|c| c.0),
|
184
|
+
fn_params: Some(Arc::new(RollingQuantileParams {
|
185
|
+
prob: quantile,
|
186
|
+
interpol: interpolation.0,
|
187
|
+
}) as Arc<dyn Any + Send + Sync>),
|
188
|
+
};
|
189
|
+
|
190
|
+
self.inner.clone().rolling_quantile(options).into()
|
191
|
+
}
|
192
|
+
|
193
|
+
pub fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {
|
194
|
+
self.inner.clone().rolling_skew(window_size, bias).into()
|
195
|
+
}
|
196
|
+
}
|
@@ -4,8 +4,12 @@ use crate::conversion::Wrap;
|
|
4
4
|
use crate::RbExpr;
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
|
-
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
-
self.inner
|
7
|
+
pub fn str_concat(&self, delimiter: String, ignore_nulls: bool) -> Self {
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.str()
|
11
|
+
.concat(&delimiter, ignore_nulls)
|
12
|
+
.into()
|
9
13
|
}
|
10
14
|
|
11
15
|
pub fn str_to_date(
|
@@ -20,7 +24,6 @@ impl RbExpr {
|
|
20
24
|
strict,
|
21
25
|
exact,
|
22
26
|
cache,
|
23
|
-
..Default::default()
|
24
27
|
};
|
25
28
|
self.inner.clone().str().to_date(options).into()
|
26
29
|
}
|
@@ -34,21 +37,23 @@ impl RbExpr {
|
|
34
37
|
strict: bool,
|
35
38
|
exact: bool,
|
36
39
|
cache: bool,
|
37
|
-
|
38
|
-
tz_aware: bool,
|
40
|
+
ambiguous: &Self,
|
39
41
|
) -> Self {
|
40
42
|
let options = StrptimeOptions {
|
41
43
|
format,
|
42
44
|
strict,
|
43
45
|
exact,
|
44
46
|
cache,
|
45
|
-
tz_aware,
|
46
|
-
utc,
|
47
47
|
};
|
48
48
|
self.inner
|
49
49
|
.clone()
|
50
50
|
.str()
|
51
|
-
.to_datetime(
|
51
|
+
.to_datetime(
|
52
|
+
time_unit.map(|tu| tu.0),
|
53
|
+
time_zone,
|
54
|
+
options,
|
55
|
+
ambiguous.inner.clone(),
|
56
|
+
)
|
52
57
|
.into()
|
53
58
|
}
|
54
59
|
|
@@ -58,35 +63,58 @@ impl RbExpr {
|
|
58
63
|
strict,
|
59
64
|
cache,
|
60
65
|
exact: true,
|
61
|
-
..Default::default()
|
62
66
|
};
|
63
67
|
self.inner.clone().str().to_time(options).into()
|
64
68
|
}
|
65
69
|
|
66
|
-
pub fn
|
67
|
-
self.inner
|
70
|
+
pub fn str_strip_chars(&self, matches: &Self) -> Self {
|
71
|
+
self.inner
|
72
|
+
.clone()
|
73
|
+
.str()
|
74
|
+
.strip_chars(matches.inner.clone())
|
75
|
+
.into()
|
68
76
|
}
|
69
77
|
|
70
|
-
pub fn
|
71
|
-
self.inner
|
78
|
+
pub fn str_strip_chars_start(&self, matches: &Self) -> Self {
|
79
|
+
self.inner
|
80
|
+
.clone()
|
81
|
+
.str()
|
82
|
+
.strip_chars_start(matches.inner.clone())
|
83
|
+
.into()
|
72
84
|
}
|
73
85
|
|
74
|
-
pub fn
|
75
|
-
self.inner
|
86
|
+
pub fn str_strip_chars_end(&self, matches: &Self) -> Self {
|
87
|
+
self.inner
|
88
|
+
.clone()
|
89
|
+
.str()
|
90
|
+
.strip_chars_end(matches.inner.clone())
|
91
|
+
.into()
|
76
92
|
}
|
77
93
|
|
78
|
-
pub fn
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
94
|
+
pub fn str_strip_prefix(&self, prefix: &Self) -> Self {
|
95
|
+
self.inner
|
96
|
+
.clone()
|
97
|
+
.str()
|
98
|
+
.strip_prefix(prefix.inner.clone())
|
99
|
+
.into()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn str_strip_suffix(&self, suffix: &Self) -> Self {
|
103
|
+
self.inner
|
104
|
+
.clone()
|
105
|
+
.str()
|
106
|
+
.strip_suffix(suffix.inner.clone())
|
87
107
|
.into()
|
88
108
|
}
|
89
109
|
|
110
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
111
|
+
self.inner.clone().str().slice(start, length).into()
|
112
|
+
}
|
113
|
+
|
114
|
+
pub fn str_explode(&self) -> Self {
|
115
|
+
self.inner.clone().str().explode().into()
|
116
|
+
}
|
117
|
+
|
90
118
|
pub fn str_to_uppercase(&self) -> Self {
|
91
119
|
self.inner.clone().str().to_uppercase().into()
|
92
120
|
}
|
@@ -95,28 +123,12 @@ impl RbExpr {
|
|
95
123
|
self.inner.clone().str().to_lowercase().into()
|
96
124
|
}
|
97
125
|
|
98
|
-
pub fn
|
99
|
-
|
100
|
-
let ca = s.utf8()?;
|
101
|
-
Ok(Some(ca.str_lengths().into_series()))
|
102
|
-
};
|
103
|
-
self.clone()
|
104
|
-
.inner
|
105
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
106
|
-
.with_fmt("str.lengths")
|
107
|
-
.into()
|
126
|
+
pub fn str_len_bytes(&self) -> Self {
|
127
|
+
self.inner.clone().str().len_bytes().into()
|
108
128
|
}
|
109
129
|
|
110
|
-
pub fn
|
111
|
-
|
112
|
-
let ca = s.utf8()?;
|
113
|
-
Ok(Some(ca.str_n_chars().into_series()))
|
114
|
-
};
|
115
|
-
self.clone()
|
116
|
-
.inner
|
117
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
118
|
-
.with_fmt("str.n_chars")
|
119
|
-
.into()
|
130
|
+
pub fn str_len_chars(&self) -> Self {
|
131
|
+
self.inner.clone().str().len_chars().into()
|
120
132
|
}
|
121
133
|
|
122
134
|
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
@@ -135,16 +147,16 @@ impl RbExpr {
|
|
135
147
|
.into()
|
136
148
|
}
|
137
149
|
|
138
|
-
pub fn
|
139
|
-
self.clone().inner.str().
|
150
|
+
pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
|
151
|
+
self.clone().inner.str().pad_start(length, fillchar).into()
|
140
152
|
}
|
141
153
|
|
142
|
-
pub fn
|
143
|
-
self.clone().inner.str().
|
154
|
+
pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
|
155
|
+
self.clone().inner.str().pad_end(length, fillchar).into()
|
144
156
|
}
|
145
157
|
|
146
|
-
pub fn
|
147
|
-
self.clone().inner.str().
|
158
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
159
|
+
self.clone().inner.str().zfill(alignment).into()
|
148
160
|
}
|
149
161
|
|
150
162
|
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
@@ -224,16 +236,20 @@ impl RbExpr {
|
|
224
236
|
.into()
|
225
237
|
}
|
226
238
|
|
227
|
-
pub fn
|
239
|
+
pub fn str_to_integer(&self, base: u32, strict: bool) -> Self {
|
228
240
|
self.inner
|
229
241
|
.clone()
|
230
242
|
.str()
|
231
|
-
.
|
243
|
+
.to_integer(base, strict)
|
232
244
|
.with_fmt("str.parse_int")
|
233
245
|
.into()
|
234
246
|
}
|
235
247
|
|
236
|
-
pub fn str_json_extract(
|
248
|
+
pub fn str_json_extract(
|
249
|
+
&self,
|
250
|
+
dtype: Option<Wrap<DataType>>,
|
251
|
+
infer_schema_len: Option<usize>,
|
252
|
+
) -> Self {
|
237
253
|
let dtype = dtype.map(|wrap| wrap.0);
|
238
254
|
|
239
255
|
let output_type = match dtype.clone() {
|
@@ -243,7 +259,7 @@ impl RbExpr {
|
|
243
259
|
|
244
260
|
let function = move |s: Series| {
|
245
261
|
let ca = s.utf8()?;
|
246
|
-
match ca.json_extract(dtype.clone()) {
|
262
|
+
match ca.json_extract(dtype.clone(), infer_schema_len) {
|
247
263
|
Ok(ca) => Ok(Some(ca.into_series())),
|
248
264
|
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
249
265
|
}
|
@@ -283,31 +299,43 @@ impl RbExpr {
|
|
283
299
|
.into()
|
284
300
|
}
|
285
301
|
|
286
|
-
pub fn
|
287
|
-
self.inner
|
302
|
+
pub fn str_count_matches(&self, pat: &Self, literal: bool) -> Self {
|
303
|
+
self.inner
|
304
|
+
.clone()
|
305
|
+
.str()
|
306
|
+
.count_matches(pat.inner.clone(), literal)
|
307
|
+
.into()
|
288
308
|
}
|
289
309
|
|
290
|
-
pub fn str_split(&self, by:
|
291
|
-
self.inner.clone().str().split(
|
310
|
+
pub fn str_split(&self, by: &Self) -> Self {
|
311
|
+
self.inner.clone().str().split(by.inner.clone()).into()
|
292
312
|
}
|
293
313
|
|
294
|
-
pub fn str_split_inclusive(&self, by:
|
295
|
-
self.inner
|
314
|
+
pub fn str_split_inclusive(&self, by: &Self) -> Self {
|
315
|
+
self.inner
|
316
|
+
.clone()
|
317
|
+
.str()
|
318
|
+
.split_inclusive(by.inner.clone())
|
319
|
+
.into()
|
296
320
|
}
|
297
321
|
|
298
|
-
pub fn str_split_exact(&self, by:
|
299
|
-
self.inner
|
322
|
+
pub fn str_split_exact(&self, by: &Self, n: usize) -> Self {
|
323
|
+
self.inner
|
324
|
+
.clone()
|
325
|
+
.str()
|
326
|
+
.split_exact(by.inner.clone(), n)
|
327
|
+
.into()
|
300
328
|
}
|
301
329
|
|
302
|
-
pub fn str_split_exact_inclusive(&self, by:
|
330
|
+
pub fn str_split_exact_inclusive(&self, by: &Self, n: usize) -> Self {
|
303
331
|
self.inner
|
304
332
|
.clone()
|
305
333
|
.str()
|
306
|
-
.split_exact_inclusive(
|
334
|
+
.split_exact_inclusive(by.inner.clone(), n)
|
307
335
|
.into()
|
308
336
|
}
|
309
337
|
|
310
|
-
pub fn str_splitn(&self, by:
|
311
|
-
self.inner.clone().str().splitn(
|
338
|
+
pub fn str_splitn(&self, by: &Self, n: usize) -> Self {
|
339
|
+
self.inner.clone().str().splitn(by.inner.clone(), n).into()
|
312
340
|
}
|
313
341
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{exception, Error, RString, Value};
|
1
|
+
use magnus::{exception, prelude::*, Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
@@ -7,7 +7,7 @@ use std::path::PathBuf;
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
let str_slice =
|
10
|
+
let str_slice = PathBuf::try_convert(f)?;
|
11
11
|
let f = if truncate {
|
12
12
|
File::create(str_slice)
|
13
13
|
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
@@ -23,7 +23,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
|
|
23
23
|
// TODO avoid copy
|
24
24
|
Ok(Box::new(Cursor::new(bytes.to_vec())))
|
25
25
|
} else {
|
26
|
-
let p =
|
26
|
+
let p = PathBuf::try_convert(rb_f)?;
|
27
27
|
let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
28
28
|
Ok(Box::new(f))
|
29
29
|
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
|
4
|
+
use crate::rb_exprs_to_exprs;
|
5
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
6
|
+
|
7
|
+
pub fn all_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
8
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
9
|
+
let e = dsl::all_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
10
|
+
Ok(e.into())
|
11
|
+
}
|
12
|
+
|
13
|
+
pub fn any_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
14
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
15
|
+
let e = dsl::any_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
16
|
+
Ok(e.into())
|
17
|
+
}
|
18
|
+
|
19
|
+
pub fn max_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
20
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
21
|
+
let e = dsl::max_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
22
|
+
Ok(e.into())
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
26
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
27
|
+
let e = dsl::min_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
28
|
+
Ok(e.into())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
32
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
34
|
+
Ok(e.into())
|
35
|
+
}
|
@@ -1,11 +1,9 @@
|
|
1
1
|
use magnus::RArray;
|
2
|
-
use polars::
|
3
|
-
use polars_core::
|
4
|
-
use polars_core::prelude::{DataFrame, IntoSeries};
|
2
|
+
use polars::functions;
|
3
|
+
use polars_core::prelude::DataFrame;
|
5
4
|
|
6
|
-
use crate::conversion::{get_df, get_series
|
5
|
+
use crate::conversion::{get_df, get_series};
|
7
6
|
use crate::error::RbPolarsErr;
|
8
|
-
use crate::prelude::{ClosedWindow, Duration};
|
9
7
|
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
8
|
|
11
9
|
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
@@ -52,42 +50,20 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
|
52
50
|
Ok(s.into())
|
53
51
|
}
|
54
52
|
|
55
|
-
pub fn
|
56
|
-
start: i64,
|
57
|
-
stop: i64,
|
58
|
-
every: String,
|
59
|
-
closed: Wrap<ClosedWindow>,
|
60
|
-
name: String,
|
61
|
-
tu: Wrap<TimeUnit>,
|
62
|
-
tz: Option<TimeZone>,
|
63
|
-
) -> RbResult<RbSeries> {
|
64
|
-
let date_range = time::date_range_impl(
|
65
|
-
&name,
|
66
|
-
start,
|
67
|
-
stop,
|
68
|
-
Duration::parse(&every),
|
69
|
-
closed.0,
|
70
|
-
tu.0,
|
71
|
-
tz.as_ref(),
|
72
|
-
)
|
73
|
-
.map_err(RbPolarsErr::from)?;
|
74
|
-
Ok(date_range.into_series().into())
|
75
|
-
}
|
76
|
-
|
77
|
-
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
53
|
+
pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
78
54
|
let mut dfs = Vec::new();
|
79
55
|
for item in seq.each() {
|
80
56
|
dfs.push(get_df(item?)?);
|
81
57
|
}
|
82
|
-
let df = functions::
|
58
|
+
let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
|
83
59
|
Ok(df.into())
|
84
60
|
}
|
85
61
|
|
86
|
-
pub fn
|
62
|
+
pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
87
63
|
let mut dfs = Vec::new();
|
88
64
|
for item in seq.each() {
|
89
65
|
dfs.push(get_df(item?)?);
|
90
66
|
}
|
91
|
-
let df = functions::
|
67
|
+
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
92
68
|
Ok(df.into())
|
93
69
|
}
|
@@ -5,30 +5,30 @@ use crate::file::get_file_like;
|
|
5
5
|
use crate::prelude::DataType;
|
6
6
|
use crate::{RbPolarsErr, RbResult};
|
7
7
|
|
8
|
-
pub fn read_ipc_schema(rb_f: Value) -> RbResult<
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
9
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
10
|
let mut r = get_file_like(rb_f, false)?;
|
11
|
-
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in metadata.schema.fields {
|
14
|
+
for field in &metadata.schema.fields {
|
15
15
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
-
dict.aset(field.name, dt)?;
|
16
|
+
dict.aset(field.name.clone(), dt)?;
|
17
17
|
}
|
18
|
-
Ok(dict
|
18
|
+
Ok(dict)
|
19
19
|
}
|
20
20
|
|
21
|
-
pub fn read_parquet_schema(rb_f: Value) -> RbResult<
|
22
|
-
use
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
|
+
use polars_parquet::read::{infer_schema, read_metadata};
|
23
23
|
|
24
24
|
let mut r = get_file_like(rb_f, false)?;
|
25
|
-
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::
|
26
|
-
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
29
|
for field in arrow_schema.fields {
|
30
30
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
31
|
dict.aset(field.name, dt)?;
|
32
32
|
}
|
33
|
-
Ok(dict
|
33
|
+
Ok(dict)
|
34
34
|
}
|