polars-df 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -0,0 +1,44 @@
|
|
1
|
+
use magnus::{block::Proc, value::Opaque, Ruby};
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
use crate::RbExpr;
|
5
|
+
|
6
|
+
impl RbExpr {
|
7
|
+
pub fn name_keep(&self) -> Self {
|
8
|
+
self.inner.clone().name().keep().into()
|
9
|
+
}
|
10
|
+
|
11
|
+
pub fn name_map(&self, lambda: Proc) -> Self {
|
12
|
+
let lambda = Opaque::from(lambda);
|
13
|
+
self.inner
|
14
|
+
.clone()
|
15
|
+
.name()
|
16
|
+
.map(move |name| {
|
17
|
+
let lambda = Ruby::get().unwrap().get_inner(lambda);
|
18
|
+
let out = lambda.call::<_, String>((name,));
|
19
|
+
match out {
|
20
|
+
Ok(out) => Ok(out),
|
21
|
+
Err(e) => Err(PolarsError::ComputeError(
|
22
|
+
format!("Ruby function in 'name.map' produced an error: {}.", e).into(),
|
23
|
+
)),
|
24
|
+
}
|
25
|
+
})
|
26
|
+
.into()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn name_prefix(&self, prefix: String) -> Self {
|
30
|
+
self.inner.clone().name().prefix(&prefix).into()
|
31
|
+
}
|
32
|
+
|
33
|
+
pub fn name_suffix(&self, suffix: String) -> Self {
|
34
|
+
self.inner.clone().name().suffix(&suffix).into()
|
35
|
+
}
|
36
|
+
|
37
|
+
pub fn name_to_lowercase(&self) -> Self {
|
38
|
+
self.inner.clone().name().to_lowercase().into()
|
39
|
+
}
|
40
|
+
|
41
|
+
pub fn name_to_uppercase(&self) -> Self {
|
42
|
+
self.inner.clone().name().to_uppercase().into()
|
43
|
+
}
|
44
|
+
}
|
@@ -0,0 +1,196 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
use std::any::Any;
|
3
|
+
|
4
|
+
use crate::conversion::Wrap;
|
5
|
+
use crate::RbExpr;
|
6
|
+
|
7
|
+
impl RbExpr {
|
8
|
+
pub fn rolling_sum(
|
9
|
+
&self,
|
10
|
+
window_size: String,
|
11
|
+
weights: Option<Vec<f64>>,
|
12
|
+
min_periods: usize,
|
13
|
+
center: bool,
|
14
|
+
by: Option<String>,
|
15
|
+
closed: Option<Wrap<ClosedWindow>>,
|
16
|
+
) -> Self {
|
17
|
+
let options = RollingOptions {
|
18
|
+
window_size: Duration::parse(&window_size),
|
19
|
+
weights,
|
20
|
+
min_periods,
|
21
|
+
center,
|
22
|
+
by,
|
23
|
+
closed_window: closed.map(|c| c.0),
|
24
|
+
..Default::default()
|
25
|
+
};
|
26
|
+
self.inner.clone().rolling_sum(options).into()
|
27
|
+
}
|
28
|
+
|
29
|
+
pub fn rolling_min(
|
30
|
+
&self,
|
31
|
+
window_size: String,
|
32
|
+
weights: Option<Vec<f64>>,
|
33
|
+
min_periods: usize,
|
34
|
+
center: bool,
|
35
|
+
by: Option<String>,
|
36
|
+
closed: Option<Wrap<ClosedWindow>>,
|
37
|
+
) -> Self {
|
38
|
+
let options = RollingOptions {
|
39
|
+
window_size: Duration::parse(&window_size),
|
40
|
+
weights,
|
41
|
+
min_periods,
|
42
|
+
center,
|
43
|
+
by,
|
44
|
+
closed_window: closed.map(|c| c.0),
|
45
|
+
..Default::default()
|
46
|
+
};
|
47
|
+
self.inner.clone().rolling_min(options).into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn rolling_max(
|
51
|
+
&self,
|
52
|
+
window_size: String,
|
53
|
+
weights: Option<Vec<f64>>,
|
54
|
+
min_periods: usize,
|
55
|
+
center: bool,
|
56
|
+
by: Option<String>,
|
57
|
+
closed: Option<Wrap<ClosedWindow>>,
|
58
|
+
) -> Self {
|
59
|
+
let options = RollingOptions {
|
60
|
+
window_size: Duration::parse(&window_size),
|
61
|
+
weights,
|
62
|
+
min_periods,
|
63
|
+
center,
|
64
|
+
by,
|
65
|
+
closed_window: closed.map(|c| c.0),
|
66
|
+
..Default::default()
|
67
|
+
};
|
68
|
+
self.inner.clone().rolling_max(options).into()
|
69
|
+
}
|
70
|
+
|
71
|
+
pub fn rolling_mean(
|
72
|
+
&self,
|
73
|
+
window_size: String,
|
74
|
+
weights: Option<Vec<f64>>,
|
75
|
+
min_periods: usize,
|
76
|
+
center: bool,
|
77
|
+
by: Option<String>,
|
78
|
+
closed: Option<Wrap<ClosedWindow>>,
|
79
|
+
) -> Self {
|
80
|
+
let options = RollingOptions {
|
81
|
+
window_size: Duration::parse(&window_size),
|
82
|
+
weights,
|
83
|
+
min_periods,
|
84
|
+
center,
|
85
|
+
by,
|
86
|
+
closed_window: closed.map(|c| c.0),
|
87
|
+
..Default::default()
|
88
|
+
};
|
89
|
+
|
90
|
+
self.inner.clone().rolling_mean(options).into()
|
91
|
+
}
|
92
|
+
|
93
|
+
#[allow(clippy::too_many_arguments)]
|
94
|
+
pub fn rolling_std(
|
95
|
+
&self,
|
96
|
+
window_size: String,
|
97
|
+
weights: Option<Vec<f64>>,
|
98
|
+
min_periods: usize,
|
99
|
+
center: bool,
|
100
|
+
by: Option<String>,
|
101
|
+
closed: Option<Wrap<ClosedWindow>>,
|
102
|
+
ddof: u8,
|
103
|
+
) -> Self {
|
104
|
+
let options = RollingOptions {
|
105
|
+
window_size: Duration::parse(&window_size),
|
106
|
+
weights,
|
107
|
+
min_periods,
|
108
|
+
center,
|
109
|
+
by,
|
110
|
+
closed_window: closed.map(|c| c.0),
|
111
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
112
|
+
};
|
113
|
+
|
114
|
+
self.inner.clone().rolling_std(options).into()
|
115
|
+
}
|
116
|
+
|
117
|
+
#[allow(clippy::too_many_arguments)]
|
118
|
+
pub fn rolling_var(
|
119
|
+
&self,
|
120
|
+
window_size: String,
|
121
|
+
weights: Option<Vec<f64>>,
|
122
|
+
min_periods: usize,
|
123
|
+
center: bool,
|
124
|
+
by: Option<String>,
|
125
|
+
closed: Option<Wrap<ClosedWindow>>,
|
126
|
+
ddof: u8,
|
127
|
+
) -> Self {
|
128
|
+
let options = RollingOptions {
|
129
|
+
window_size: Duration::parse(&window_size),
|
130
|
+
weights,
|
131
|
+
min_periods,
|
132
|
+
center,
|
133
|
+
by,
|
134
|
+
closed_window: closed.map(|c| c.0),
|
135
|
+
fn_params: Some(Arc::new(RollingVarParams { ddof }) as Arc<dyn Any + Send + Sync>),
|
136
|
+
};
|
137
|
+
|
138
|
+
self.inner.clone().rolling_var(options).into()
|
139
|
+
}
|
140
|
+
|
141
|
+
pub fn rolling_median(
|
142
|
+
&self,
|
143
|
+
window_size: String,
|
144
|
+
weights: Option<Vec<f64>>,
|
145
|
+
min_periods: usize,
|
146
|
+
center: bool,
|
147
|
+
by: Option<String>,
|
148
|
+
closed: Option<Wrap<ClosedWindow>>,
|
149
|
+
) -> Self {
|
150
|
+
let options = RollingOptions {
|
151
|
+
window_size: Duration::parse(&window_size),
|
152
|
+
weights,
|
153
|
+
min_periods,
|
154
|
+
center,
|
155
|
+
by,
|
156
|
+
closed_window: closed.map(|c| c.0),
|
157
|
+
fn_params: Some(Arc::new(RollingQuantileParams {
|
158
|
+
prob: 0.5,
|
159
|
+
interpol: QuantileInterpolOptions::Linear,
|
160
|
+
}) as Arc<dyn Any + Send + Sync>),
|
161
|
+
};
|
162
|
+
self.inner.clone().rolling_quantile(options).into()
|
163
|
+
}
|
164
|
+
|
165
|
+
#[allow(clippy::too_many_arguments)]
|
166
|
+
pub fn rolling_quantile(
|
167
|
+
&self,
|
168
|
+
quantile: f64,
|
169
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
170
|
+
window_size: String,
|
171
|
+
weights: Option<Vec<f64>>,
|
172
|
+
min_periods: usize,
|
173
|
+
center: bool,
|
174
|
+
by: Option<String>,
|
175
|
+
closed: Option<Wrap<ClosedWindow>>,
|
176
|
+
) -> Self {
|
177
|
+
let options = RollingOptions {
|
178
|
+
window_size: Duration::parse(&window_size),
|
179
|
+
weights,
|
180
|
+
min_periods,
|
181
|
+
center,
|
182
|
+
by,
|
183
|
+
closed_window: closed.map(|c| c.0),
|
184
|
+
fn_params: Some(Arc::new(RollingQuantileParams {
|
185
|
+
prob: quantile,
|
186
|
+
interpol: interpolation.0,
|
187
|
+
}) as Arc<dyn Any + Send + Sync>),
|
188
|
+
};
|
189
|
+
|
190
|
+
self.inner.clone().rolling_quantile(options).into()
|
191
|
+
}
|
192
|
+
|
193
|
+
pub fn rolling_skew(&self, window_size: usize, bias: bool) -> Self {
|
194
|
+
self.inner.clone().rolling_skew(window_size, bias).into()
|
195
|
+
}
|
196
|
+
}
|
@@ -4,8 +4,12 @@ use crate::conversion::Wrap;
|
|
4
4
|
use crate::RbExpr;
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
|
-
pub fn str_concat(&self, delimiter: String) -> Self {
|
8
|
-
self.inner
|
7
|
+
pub fn str_concat(&self, delimiter: String, ignore_nulls: bool) -> Self {
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.str()
|
11
|
+
.concat(&delimiter, ignore_nulls)
|
12
|
+
.into()
|
9
13
|
}
|
10
14
|
|
11
15
|
pub fn str_to_date(
|
@@ -20,7 +24,6 @@ impl RbExpr {
|
|
20
24
|
strict,
|
21
25
|
exact,
|
22
26
|
cache,
|
23
|
-
..Default::default()
|
24
27
|
};
|
25
28
|
self.inner.clone().str().to_date(options).into()
|
26
29
|
}
|
@@ -34,21 +37,23 @@ impl RbExpr {
|
|
34
37
|
strict: bool,
|
35
38
|
exact: bool,
|
36
39
|
cache: bool,
|
37
|
-
|
38
|
-
tz_aware: bool,
|
40
|
+
ambiguous: &Self,
|
39
41
|
) -> Self {
|
40
42
|
let options = StrptimeOptions {
|
41
43
|
format,
|
42
44
|
strict,
|
43
45
|
exact,
|
44
46
|
cache,
|
45
|
-
tz_aware,
|
46
|
-
utc,
|
47
47
|
};
|
48
48
|
self.inner
|
49
49
|
.clone()
|
50
50
|
.str()
|
51
|
-
.to_datetime(
|
51
|
+
.to_datetime(
|
52
|
+
time_unit.map(|tu| tu.0),
|
53
|
+
time_zone,
|
54
|
+
options,
|
55
|
+
ambiguous.inner.clone(),
|
56
|
+
)
|
52
57
|
.into()
|
53
58
|
}
|
54
59
|
|
@@ -58,35 +63,58 @@ impl RbExpr {
|
|
58
63
|
strict,
|
59
64
|
cache,
|
60
65
|
exact: true,
|
61
|
-
..Default::default()
|
62
66
|
};
|
63
67
|
self.inner.clone().str().to_time(options).into()
|
64
68
|
}
|
65
69
|
|
66
|
-
pub fn
|
67
|
-
self.inner
|
70
|
+
pub fn str_strip_chars(&self, matches: &Self) -> Self {
|
71
|
+
self.inner
|
72
|
+
.clone()
|
73
|
+
.str()
|
74
|
+
.strip_chars(matches.inner.clone())
|
75
|
+
.into()
|
68
76
|
}
|
69
77
|
|
70
|
-
pub fn
|
71
|
-
self.inner
|
78
|
+
pub fn str_strip_chars_start(&self, matches: &Self) -> Self {
|
79
|
+
self.inner
|
80
|
+
.clone()
|
81
|
+
.str()
|
82
|
+
.strip_chars_start(matches.inner.clone())
|
83
|
+
.into()
|
72
84
|
}
|
73
85
|
|
74
|
-
pub fn
|
75
|
-
self.inner
|
86
|
+
pub fn str_strip_chars_end(&self, matches: &Self) -> Self {
|
87
|
+
self.inner
|
88
|
+
.clone()
|
89
|
+
.str()
|
90
|
+
.strip_chars_end(matches.inner.clone())
|
91
|
+
.into()
|
76
92
|
}
|
77
93
|
|
78
|
-
pub fn
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
94
|
+
pub fn str_strip_prefix(&self, prefix: &Self) -> Self {
|
95
|
+
self.inner
|
96
|
+
.clone()
|
97
|
+
.str()
|
98
|
+
.strip_prefix(prefix.inner.clone())
|
99
|
+
.into()
|
100
|
+
}
|
101
|
+
|
102
|
+
pub fn str_strip_suffix(&self, suffix: &Self) -> Self {
|
103
|
+
self.inner
|
104
|
+
.clone()
|
105
|
+
.str()
|
106
|
+
.strip_suffix(suffix.inner.clone())
|
87
107
|
.into()
|
88
108
|
}
|
89
109
|
|
110
|
+
pub fn str_slice(&self, start: i64, length: Option<u64>) -> Self {
|
111
|
+
self.inner.clone().str().slice(start, length).into()
|
112
|
+
}
|
113
|
+
|
114
|
+
pub fn str_explode(&self) -> Self {
|
115
|
+
self.inner.clone().str().explode().into()
|
116
|
+
}
|
117
|
+
|
90
118
|
pub fn str_to_uppercase(&self) -> Self {
|
91
119
|
self.inner.clone().str().to_uppercase().into()
|
92
120
|
}
|
@@ -95,28 +123,12 @@ impl RbExpr {
|
|
95
123
|
self.inner.clone().str().to_lowercase().into()
|
96
124
|
}
|
97
125
|
|
98
|
-
pub fn
|
99
|
-
|
100
|
-
let ca = s.utf8()?;
|
101
|
-
Ok(Some(ca.str_lengths().into_series()))
|
102
|
-
};
|
103
|
-
self.clone()
|
104
|
-
.inner
|
105
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
106
|
-
.with_fmt("str.lengths")
|
107
|
-
.into()
|
126
|
+
pub fn str_len_bytes(&self) -> Self {
|
127
|
+
self.inner.clone().str().len_bytes().into()
|
108
128
|
}
|
109
129
|
|
110
|
-
pub fn
|
111
|
-
|
112
|
-
let ca = s.utf8()?;
|
113
|
-
Ok(Some(ca.str_n_chars().into_series()))
|
114
|
-
};
|
115
|
-
self.clone()
|
116
|
-
.inner
|
117
|
-
.map(function, GetOutput::from_type(DataType::UInt32))
|
118
|
-
.with_fmt("str.n_chars")
|
119
|
-
.into()
|
130
|
+
pub fn str_len_chars(&self) -> Self {
|
131
|
+
self.inner.clone().str().len_chars().into()
|
120
132
|
}
|
121
133
|
|
122
134
|
pub fn str_replace_n(&self, pat: &RbExpr, val: &RbExpr, literal: bool, n: i64) -> Self {
|
@@ -135,16 +147,16 @@ impl RbExpr {
|
|
135
147
|
.into()
|
136
148
|
}
|
137
149
|
|
138
|
-
pub fn
|
139
|
-
self.clone().inner.str().
|
150
|
+
pub fn str_pad_start(&self, length: usize, fillchar: char) -> Self {
|
151
|
+
self.clone().inner.str().pad_start(length, fillchar).into()
|
140
152
|
}
|
141
153
|
|
142
|
-
pub fn
|
143
|
-
self.clone().inner.str().
|
154
|
+
pub fn str_pad_end(&self, length: usize, fillchar: char) -> Self {
|
155
|
+
self.clone().inner.str().pad_end(length, fillchar).into()
|
144
156
|
}
|
145
157
|
|
146
|
-
pub fn
|
147
|
-
self.clone().inner.str().
|
158
|
+
pub fn str_zfill(&self, alignment: usize) -> Self {
|
159
|
+
self.clone().inner.str().zfill(alignment).into()
|
148
160
|
}
|
149
161
|
|
150
162
|
pub fn str_contains(&self, pat: &RbExpr, literal: Option<bool>, strict: bool) -> Self {
|
@@ -224,16 +236,20 @@ impl RbExpr {
|
|
224
236
|
.into()
|
225
237
|
}
|
226
238
|
|
227
|
-
pub fn
|
239
|
+
pub fn str_to_integer(&self, base: u32, strict: bool) -> Self {
|
228
240
|
self.inner
|
229
241
|
.clone()
|
230
242
|
.str()
|
231
|
-
.
|
243
|
+
.to_integer(base, strict)
|
232
244
|
.with_fmt("str.parse_int")
|
233
245
|
.into()
|
234
246
|
}
|
235
247
|
|
236
|
-
pub fn str_json_extract(
|
248
|
+
pub fn str_json_extract(
|
249
|
+
&self,
|
250
|
+
dtype: Option<Wrap<DataType>>,
|
251
|
+
infer_schema_len: Option<usize>,
|
252
|
+
) -> Self {
|
237
253
|
let dtype = dtype.map(|wrap| wrap.0);
|
238
254
|
|
239
255
|
let output_type = match dtype.clone() {
|
@@ -243,7 +259,7 @@ impl RbExpr {
|
|
243
259
|
|
244
260
|
let function = move |s: Series| {
|
245
261
|
let ca = s.utf8()?;
|
246
|
-
match ca.json_extract(dtype.clone()) {
|
262
|
+
match ca.json_extract(dtype.clone(), infer_schema_len) {
|
247
263
|
Ok(ca) => Ok(Some(ca.into_series())),
|
248
264
|
Err(e) => Err(PolarsError::ComputeError(format!("{e:?}").into())),
|
249
265
|
}
|
@@ -283,31 +299,43 @@ impl RbExpr {
|
|
283
299
|
.into()
|
284
300
|
}
|
285
301
|
|
286
|
-
pub fn
|
287
|
-
self.inner
|
302
|
+
pub fn str_count_matches(&self, pat: &Self, literal: bool) -> Self {
|
303
|
+
self.inner
|
304
|
+
.clone()
|
305
|
+
.str()
|
306
|
+
.count_matches(pat.inner.clone(), literal)
|
307
|
+
.into()
|
288
308
|
}
|
289
309
|
|
290
|
-
pub fn str_split(&self, by:
|
291
|
-
self.inner.clone().str().split(
|
310
|
+
pub fn str_split(&self, by: &Self) -> Self {
|
311
|
+
self.inner.clone().str().split(by.inner.clone()).into()
|
292
312
|
}
|
293
313
|
|
294
|
-
pub fn str_split_inclusive(&self, by:
|
295
|
-
self.inner
|
314
|
+
pub fn str_split_inclusive(&self, by: &Self) -> Self {
|
315
|
+
self.inner
|
316
|
+
.clone()
|
317
|
+
.str()
|
318
|
+
.split_inclusive(by.inner.clone())
|
319
|
+
.into()
|
296
320
|
}
|
297
321
|
|
298
|
-
pub fn str_split_exact(&self, by:
|
299
|
-
self.inner
|
322
|
+
pub fn str_split_exact(&self, by: &Self, n: usize) -> Self {
|
323
|
+
self.inner
|
324
|
+
.clone()
|
325
|
+
.str()
|
326
|
+
.split_exact(by.inner.clone(), n)
|
327
|
+
.into()
|
300
328
|
}
|
301
329
|
|
302
|
-
pub fn str_split_exact_inclusive(&self, by:
|
330
|
+
pub fn str_split_exact_inclusive(&self, by: &Self, n: usize) -> Self {
|
303
331
|
self.inner
|
304
332
|
.clone()
|
305
333
|
.str()
|
306
|
-
.split_exact_inclusive(
|
334
|
+
.split_exact_inclusive(by.inner.clone(), n)
|
307
335
|
.into()
|
308
336
|
}
|
309
337
|
|
310
|
-
pub fn str_splitn(&self, by:
|
311
|
-
self.inner.clone().str().splitn(
|
338
|
+
pub fn str_splitn(&self, by: &Self, n: usize) -> Self {
|
339
|
+
self.inner.clone().str().splitn(by.inner.clone(), n).into()
|
312
340
|
}
|
313
341
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{exception, Error, RString, Value};
|
1
|
+
use magnus::{exception, prelude::*, Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
3
|
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
@@ -7,7 +7,7 @@ use std::path::PathBuf;
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
let str_slice =
|
10
|
+
let str_slice = PathBuf::try_convert(f)?;
|
11
11
|
let f = if truncate {
|
12
12
|
File::create(str_slice)
|
13
13
|
.map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?
|
@@ -23,7 +23,7 @@ pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>>
|
|
23
23
|
// TODO avoid copy
|
24
24
|
Ok(Box::new(Cursor::new(bytes.to_vec())))
|
25
25
|
} else {
|
26
|
-
let p =
|
26
|
+
let p = PathBuf::try_convert(rb_f)?;
|
27
27
|
let f = File::open(p).map_err(|e| Error::new(exception::runtime_error(), e.to_string()))?;
|
28
28
|
Ok(Box::new(f))
|
29
29
|
}
|
@@ -0,0 +1,35 @@
|
|
1
|
+
use magnus::RArray;
|
2
|
+
use polars::lazy::dsl;
|
3
|
+
|
4
|
+
use crate::rb_exprs_to_exprs;
|
5
|
+
use crate::{RbExpr, RbPolarsErr, RbResult};
|
6
|
+
|
7
|
+
pub fn all_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
8
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
9
|
+
let e = dsl::all_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
10
|
+
Ok(e.into())
|
11
|
+
}
|
12
|
+
|
13
|
+
pub fn any_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
14
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
15
|
+
let e = dsl::any_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
16
|
+
Ok(e.into())
|
17
|
+
}
|
18
|
+
|
19
|
+
pub fn max_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
20
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
21
|
+
let e = dsl::max_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
22
|
+
Ok(e.into())
|
23
|
+
}
|
24
|
+
|
25
|
+
pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
26
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
27
|
+
let e = dsl::min_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
28
|
+
Ok(e.into())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
32
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
34
|
+
Ok(e.into())
|
35
|
+
}
|
@@ -1,11 +1,9 @@
|
|
1
1
|
use magnus::RArray;
|
2
|
-
use polars::
|
3
|
-
use polars_core::
|
4
|
-
use polars_core::prelude::{DataFrame, IntoSeries};
|
2
|
+
use polars::functions;
|
3
|
+
use polars_core::prelude::DataFrame;
|
5
4
|
|
6
|
-
use crate::conversion::{get_df, get_series
|
5
|
+
use crate::conversion::{get_df, get_series};
|
7
6
|
use crate::error::RbPolarsErr;
|
8
|
-
use crate::prelude::{ClosedWindow, Duration};
|
9
7
|
use crate::{RbDataFrame, RbResult, RbSeries};
|
10
8
|
|
11
9
|
pub fn concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
@@ -52,42 +50,20 @@ pub fn concat_series(seq: RArray) -> RbResult<RbSeries> {
|
|
52
50
|
Ok(s.into())
|
53
51
|
}
|
54
52
|
|
55
|
-
pub fn
|
56
|
-
start: i64,
|
57
|
-
stop: i64,
|
58
|
-
every: String,
|
59
|
-
closed: Wrap<ClosedWindow>,
|
60
|
-
name: String,
|
61
|
-
tu: Wrap<TimeUnit>,
|
62
|
-
tz: Option<TimeZone>,
|
63
|
-
) -> RbResult<RbSeries> {
|
64
|
-
let date_range = time::date_range_impl(
|
65
|
-
&name,
|
66
|
-
start,
|
67
|
-
stop,
|
68
|
-
Duration::parse(&every),
|
69
|
-
closed.0,
|
70
|
-
tu.0,
|
71
|
-
tz.as_ref(),
|
72
|
-
)
|
73
|
-
.map_err(RbPolarsErr::from)?;
|
74
|
-
Ok(date_range.into_series().into())
|
75
|
-
}
|
76
|
-
|
77
|
-
pub fn diag_concat_df(seq: RArray) -> RbResult<RbDataFrame> {
|
53
|
+
pub fn concat_df_diagonal(seq: RArray) -> RbResult<RbDataFrame> {
|
78
54
|
let mut dfs = Vec::new();
|
79
55
|
for item in seq.each() {
|
80
56
|
dfs.push(get_df(item?)?);
|
81
57
|
}
|
82
|
-
let df = functions::
|
58
|
+
let df = functions::concat_df_diagonal(&dfs).map_err(RbPolarsErr::from)?;
|
83
59
|
Ok(df.into())
|
84
60
|
}
|
85
61
|
|
86
|
-
pub fn
|
62
|
+
pub fn concat_df_horizontal(seq: RArray) -> RbResult<RbDataFrame> {
|
87
63
|
let mut dfs = Vec::new();
|
88
64
|
for item in seq.each() {
|
89
65
|
dfs.push(get_df(item?)?);
|
90
66
|
}
|
91
|
-
let df = functions::
|
67
|
+
let df = functions::concat_df_horizontal(&dfs).map_err(RbPolarsErr::from)?;
|
92
68
|
Ok(df.into())
|
93
69
|
}
|
@@ -5,30 +5,30 @@ use crate::file::get_file_like;
|
|
5
5
|
use crate::prelude::DataType;
|
6
6
|
use crate::{RbPolarsErr, RbResult};
|
7
7
|
|
8
|
-
pub fn read_ipc_schema(rb_f: Value) -> RbResult<
|
8
|
+
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
9
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
10
|
let mut r = get_file_like(rb_f, false)?;
|
11
|
-
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::
|
11
|
+
let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
12
12
|
|
13
13
|
let dict = RHash::new();
|
14
|
-
for field in metadata.schema.fields {
|
14
|
+
for field in &metadata.schema.fields {
|
15
15
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
16
|
-
dict.aset(field.name, dt)?;
|
16
|
+
dict.aset(field.name.clone(), dt)?;
|
17
17
|
}
|
18
|
-
Ok(dict
|
18
|
+
Ok(dict)
|
19
19
|
}
|
20
20
|
|
21
|
-
pub fn read_parquet_schema(rb_f: Value) -> RbResult<
|
22
|
-
use
|
21
|
+
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
|
+
use polars_parquet::read::{infer_schema, read_metadata};
|
23
23
|
|
24
24
|
let mut r = get_file_like(rb_f, false)?;
|
25
|
-
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::
|
26
|
-
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::
|
25
|
+
let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
|
26
|
+
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
27
|
|
28
28
|
let dict = RHash::new();
|
29
29
|
for field in arrow_schema.fields {
|
30
30
|
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
|
31
31
|
dict.aset(field.name, dt)?;
|
32
32
|
}
|
33
|
-
Ok(dict
|
33
|
+
Ok(dict)
|
34
34
|
}
|