polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -6,135 +6,132 @@ use polars::prelude::pivot::{pivot, pivot_stable};
|
|
|
6
6
|
use polars::prelude::*;
|
|
7
7
|
|
|
8
8
|
use crate::conversion::*;
|
|
9
|
+
use crate::exceptions::RbIndexError;
|
|
9
10
|
use crate::map::dataframe::{
|
|
10
11
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
|
11
12
|
apply_lambda_with_utf8_out_type,
|
|
12
13
|
};
|
|
13
14
|
use crate::prelude::strings_to_pl_smallstr;
|
|
14
|
-
use crate::series::
|
|
15
|
+
use crate::series::ToRbSeries;
|
|
16
|
+
use crate::series::to_series;
|
|
17
|
+
use crate::utils::EnterPolarsExt;
|
|
15
18
|
use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
|
16
19
|
|
|
17
20
|
impl RbDataFrame {
|
|
18
21
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
|
19
22
|
let mut cols = Vec::new();
|
|
20
23
|
for i in columns.into_iter() {
|
|
21
|
-
cols.push(<&RbSeries>::try_convert(i)?.series.
|
|
24
|
+
cols.push(<&RbSeries>::try_convert(i)?.series.read().clone().into());
|
|
22
25
|
}
|
|
23
26
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
|
24
27
|
Ok(RbDataFrame::new(df))
|
|
25
28
|
}
|
|
26
29
|
|
|
27
30
|
pub fn estimated_size(&self) -> usize {
|
|
28
|
-
self.df.
|
|
31
|
+
self.df.read().estimated_size()
|
|
29
32
|
}
|
|
30
33
|
|
|
31
34
|
pub fn dtype_strings(&self) -> Vec<String> {
|
|
32
35
|
self.df
|
|
33
|
-
.
|
|
36
|
+
.read()
|
|
34
37
|
.get_columns()
|
|
35
38
|
.iter()
|
|
36
39
|
.map(|s| format!("{}", s.dtype()))
|
|
37
40
|
.collect()
|
|
38
41
|
}
|
|
39
42
|
|
|
40
|
-
pub fn add(&
|
|
41
|
-
|
|
42
|
-
Ok(df.into())
|
|
43
|
+
pub fn add(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
|
|
44
|
+
rb.enter_polars_df(|| &*self_.df.read() + &*s.series.read())
|
|
43
45
|
}
|
|
44
46
|
|
|
45
|
-
pub fn sub(&
|
|
46
|
-
|
|
47
|
-
Ok(df.into())
|
|
47
|
+
pub fn sub(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
|
|
48
|
+
rb.enter_polars_df(|| &*self_.df.read() - &*s.series.read())
|
|
48
49
|
}
|
|
49
50
|
|
|
50
|
-
pub fn div(&
|
|
51
|
-
|
|
52
|
-
Ok(df.into())
|
|
51
|
+
pub fn div(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
|
|
52
|
+
rb.enter_polars_df(|| &*self_.df.read() / &*s.series.read())
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
-
pub fn mul(&
|
|
56
|
-
|
|
57
|
-
Ok(df.into())
|
|
55
|
+
pub fn mul(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
|
|
56
|
+
rb.enter_polars_df(|| &*self_.df.read() * &*s.series.read())
|
|
58
57
|
}
|
|
59
58
|
|
|
60
|
-
pub fn rem(&
|
|
61
|
-
|
|
62
|
-
Ok(df.into())
|
|
59
|
+
pub fn rem(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
|
|
60
|
+
rb.enter_polars_df(|| &*self_.df.read() % &*s.series.read())
|
|
63
61
|
}
|
|
64
62
|
|
|
65
|
-
pub fn add_df(&
|
|
66
|
-
|
|
67
|
-
Ok(df.into())
|
|
63
|
+
pub fn add_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
|
|
64
|
+
rb.enter_polars_df(|| &*self_.df.read() + &*s.df.read())
|
|
68
65
|
}
|
|
69
66
|
|
|
70
|
-
pub fn sub_df(&
|
|
71
|
-
|
|
72
|
-
Ok(df.into())
|
|
67
|
+
pub fn sub_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
|
|
68
|
+
rb.enter_polars_df(|| &*self_.df.read() - &*s.df.read())
|
|
73
69
|
}
|
|
74
70
|
|
|
75
|
-
pub fn div_df(&
|
|
76
|
-
|
|
77
|
-
Ok(df.into())
|
|
71
|
+
pub fn div_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
|
|
72
|
+
rb.enter_polars_df(|| &*self_.df.read() / &*s.df.read())
|
|
78
73
|
}
|
|
79
74
|
|
|
80
|
-
pub fn mul_df(&
|
|
81
|
-
|
|
82
|
-
Ok(df.into())
|
|
75
|
+
pub fn mul_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
|
|
76
|
+
rb.enter_polars_df(|| &*self_.df.read() * &*s.df.read())
|
|
83
77
|
}
|
|
84
78
|
|
|
85
|
-
pub fn rem_df(&
|
|
86
|
-
|
|
87
|
-
Ok(df.into())
|
|
79
|
+
pub fn rem_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
|
|
80
|
+
rb.enter_polars_df(|| &*self_.df.read() % &*s.df.read())
|
|
88
81
|
}
|
|
89
82
|
|
|
90
83
|
pub fn sample_n(
|
|
91
|
-
&
|
|
84
|
+
rb: &Ruby,
|
|
85
|
+
self_: &Self,
|
|
92
86
|
n: &RbSeries,
|
|
93
87
|
with_replacement: bool,
|
|
94
88
|
shuffle: bool,
|
|
95
89
|
seed: Option<u64>,
|
|
96
90
|
) -> RbResult<Self> {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
91
|
+
rb.enter_polars_df(|| {
|
|
92
|
+
self_
|
|
93
|
+
.df
|
|
94
|
+
.read()
|
|
95
|
+
.sample_n(&n.series.read(), with_replacement, shuffle, seed)
|
|
96
|
+
})
|
|
103
97
|
}
|
|
104
98
|
|
|
105
99
|
pub fn sample_frac(
|
|
106
|
-
&
|
|
100
|
+
rb: &Ruby,
|
|
101
|
+
self_: &Self,
|
|
107
102
|
frac: &RbSeries,
|
|
108
103
|
with_replacement: bool,
|
|
109
104
|
shuffle: bool,
|
|
110
105
|
seed: Option<u64>,
|
|
111
106
|
) -> RbResult<Self> {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
107
|
+
rb.enter_polars_df(|| {
|
|
108
|
+
self_
|
|
109
|
+
.df
|
|
110
|
+
.read()
|
|
111
|
+
.sample_frac(&frac.series.read(), with_replacement, shuffle, seed)
|
|
112
|
+
})
|
|
118
113
|
}
|
|
119
114
|
|
|
120
|
-
pub fn rechunk(&
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
115
|
+
pub fn rechunk(rb: &Ruby, self_: &Self) -> RbResult<Self> {
|
|
116
|
+
rb.enter_polars_df(|| {
|
|
117
|
+
let mut df = self_.df.write().clone();
|
|
118
|
+
df.as_single_chunk_par();
|
|
119
|
+
Ok(df)
|
|
120
|
+
})
|
|
124
121
|
}
|
|
125
122
|
|
|
126
123
|
pub fn as_str(&self) -> String {
|
|
127
|
-
format!("{}", self.df.
|
|
124
|
+
format!("{}", self.df.read())
|
|
128
125
|
}
|
|
129
126
|
|
|
130
|
-
pub fn get_columns(&
|
|
131
|
-
let cols =
|
|
132
|
-
to_rbseries(
|
|
127
|
+
pub fn get_columns(rb: &Ruby, self_: &Self) -> RArray {
|
|
128
|
+
let cols = self_.df.read().get_columns().to_vec();
|
|
129
|
+
cols.to_rbseries(rb)
|
|
133
130
|
}
|
|
134
131
|
|
|
135
132
|
pub fn columns(&self) -> Vec<String> {
|
|
136
133
|
self.df
|
|
137
|
-
.
|
|
134
|
+
.read()
|
|
138
135
|
.get_column_names()
|
|
139
136
|
.iter()
|
|
140
137
|
.map(|v| v.to_string())
|
|
@@ -143,214 +140,207 @@ impl RbDataFrame {
|
|
|
143
140
|
|
|
144
141
|
pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
|
|
145
142
|
self.df
|
|
146
|
-
.
|
|
143
|
+
.write()
|
|
147
144
|
.set_column_names(&names)
|
|
148
145
|
.map_err(RbPolarsErr::from)?;
|
|
149
146
|
Ok(())
|
|
150
147
|
}
|
|
151
148
|
|
|
152
|
-
pub fn dtypes(ruby: &Ruby,
|
|
149
|
+
pub fn dtypes(ruby: &Ruby, self_: &Self) -> RArray {
|
|
153
150
|
ruby.ary_from_iter(
|
|
154
|
-
|
|
151
|
+
self_
|
|
155
152
|
.df
|
|
156
|
-
.
|
|
153
|
+
.read()
|
|
157
154
|
.iter()
|
|
158
155
|
.map(|s| Wrap(s.dtype().clone()).into_value_with(ruby)),
|
|
159
156
|
)
|
|
160
157
|
}
|
|
161
158
|
|
|
162
159
|
pub fn n_chunks(&self) -> usize {
|
|
163
|
-
self.df.
|
|
160
|
+
self.df.read().first_col_n_chunks()
|
|
164
161
|
}
|
|
165
162
|
|
|
166
163
|
pub fn shape(&self) -> (usize, usize) {
|
|
167
|
-
self.df.
|
|
164
|
+
self.df.read().shape()
|
|
168
165
|
}
|
|
169
166
|
|
|
170
167
|
pub fn height(&self) -> usize {
|
|
171
|
-
self.df.
|
|
168
|
+
self.df.read().height()
|
|
172
169
|
}
|
|
173
170
|
|
|
174
171
|
pub fn width(&self) -> usize {
|
|
175
|
-
self.df.
|
|
172
|
+
self.df.read().width()
|
|
176
173
|
}
|
|
177
174
|
|
|
178
|
-
pub fn hstack(&
|
|
175
|
+
pub fn hstack(rb: &Ruby, self_: &Self, columns: RArray) -> RbResult<Self> {
|
|
179
176
|
let columns = to_series(columns)?;
|
|
180
177
|
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
|
181
|
-
|
|
182
|
-
.df
|
|
183
|
-
.borrow()
|
|
184
|
-
.hstack(&columns)
|
|
185
|
-
.map_err(RbPolarsErr::from)?;
|
|
186
|
-
Ok(df.into())
|
|
178
|
+
rb.enter_polars_df(|| self_.df.read().hstack(&columns))
|
|
187
179
|
}
|
|
188
180
|
|
|
189
|
-
pub fn hstack_mut(&
|
|
181
|
+
pub fn hstack_mut(rb: &Ruby, self_: &Self, columns: RArray) -> RbResult<()> {
|
|
190
182
|
let columns = to_series(columns)?;
|
|
191
183
|
let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
|
|
192
|
-
|
|
193
|
-
.borrow_mut()
|
|
194
|
-
.hstack_mut(&columns)
|
|
195
|
-
.map_err(RbPolarsErr::from)?;
|
|
184
|
+
rb.enter_polars(|| self_.df.write().hstack_mut(&columns).map(drop))?;
|
|
196
185
|
Ok(())
|
|
197
186
|
}
|
|
198
187
|
|
|
199
|
-
pub fn vstack(&
|
|
200
|
-
|
|
201
|
-
.df
|
|
202
|
-
.borrow()
|
|
203
|
-
.vstack(&df.df.borrow())
|
|
204
|
-
.map_err(RbPolarsErr::from)?;
|
|
205
|
-
Ok(df.into())
|
|
188
|
+
pub fn vstack(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<Self> {
|
|
189
|
+
rb.enter_polars_df(|| self_.df.read().vstack(&other.df.read()))
|
|
206
190
|
}
|
|
207
191
|
|
|
208
|
-
pub fn vstack_mut(&
|
|
209
|
-
|
|
210
|
-
.
|
|
211
|
-
.
|
|
212
|
-
.
|
|
192
|
+
pub fn vstack_mut(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<()> {
|
|
193
|
+
rb.enter_polars(|| {
|
|
194
|
+
// Prevent self-vstack deadlocks.
|
|
195
|
+
let other = other.df.read().clone();
|
|
196
|
+
self_.df.write().vstack_mut(&other)?;
|
|
197
|
+
PolarsResult::Ok(())
|
|
198
|
+
})?;
|
|
213
199
|
Ok(())
|
|
214
200
|
}
|
|
215
201
|
|
|
216
|
-
pub fn extend(&
|
|
217
|
-
|
|
218
|
-
.
|
|
219
|
-
.
|
|
220
|
-
.
|
|
202
|
+
pub fn extend(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<()> {
|
|
203
|
+
rb.enter_polars(|| {
|
|
204
|
+
// Prevent self-extend deadlocks.
|
|
205
|
+
let other = other.df.read().clone();
|
|
206
|
+
self_.df.write().extend(&other)
|
|
207
|
+
})?;
|
|
221
208
|
Ok(())
|
|
222
209
|
}
|
|
223
210
|
|
|
224
211
|
pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
|
|
225
212
|
let s = self
|
|
226
213
|
.df
|
|
227
|
-
.
|
|
214
|
+
.write()
|
|
228
215
|
.drop_in_place(&name)
|
|
229
216
|
.map_err(RbPolarsErr::from)?;
|
|
230
217
|
let s = s.take_materialized_series();
|
|
231
218
|
Ok(RbSeries::new(s))
|
|
232
219
|
}
|
|
233
220
|
|
|
234
|
-
pub fn
|
|
235
|
-
self.df
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
.
|
|
221
|
+
pub fn to_series(&self, index: isize) -> RbResult<RbSeries> {
|
|
222
|
+
let df = &self.df.read();
|
|
223
|
+
|
|
224
|
+
let index_adjusted = if index < 0 {
|
|
225
|
+
df.width().checked_sub(index.unsigned_abs())
|
|
226
|
+
} else {
|
|
227
|
+
Some(usize::try_from(index).unwrap())
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
let s = index_adjusted.and_then(|i| df.select_at_idx(i));
|
|
231
|
+
match s {
|
|
232
|
+
Some(s) => Ok(RbSeries::new(s.as_materialized_series().clone())),
|
|
233
|
+
None => Err(RbIndexError::new_err(
|
|
234
|
+
polars_err!(oob = index, df.width()).to_string(),
|
|
235
|
+
)),
|
|
236
|
+
}
|
|
239
237
|
}
|
|
240
238
|
|
|
241
239
|
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
|
242
|
-
self.df.
|
|
240
|
+
self.df.read().get_column_index(&name)
|
|
243
241
|
}
|
|
244
242
|
|
|
245
243
|
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
|
246
244
|
let series = self
|
|
247
245
|
.df
|
|
248
|
-
.
|
|
246
|
+
.read()
|
|
249
247
|
.column(&name)
|
|
250
248
|
.map(|s| RbSeries::new(s.as_materialized_series().clone()))
|
|
251
249
|
.map_err(RbPolarsErr::from)?;
|
|
252
250
|
Ok(series)
|
|
253
251
|
}
|
|
254
252
|
|
|
255
|
-
pub fn select(&
|
|
256
|
-
|
|
257
|
-
.df
|
|
258
|
-
.borrow()
|
|
259
|
-
.select(selection)
|
|
260
|
-
.map_err(RbPolarsErr::from)?;
|
|
261
|
-
Ok(RbDataFrame::new(df))
|
|
253
|
+
pub fn select(rb: &Ruby, self_: &Self, columns: Vec<String>) -> RbResult<Self> {
|
|
254
|
+
rb.enter_polars_df(|| self_.df.read().select(columns.iter().map(|x| &**x)))
|
|
262
255
|
}
|
|
263
256
|
|
|
264
|
-
pub fn gather(&
|
|
257
|
+
pub fn gather(rb: &Ruby, self_: &Self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
|
265
258
|
let indices = IdxCa::from_vec("".into(), indices);
|
|
266
|
-
|
|
267
|
-
Ok(RbDataFrame::new(df))
|
|
259
|
+
rb.enter_polars_df(|| self_.df.read().take(&indices))
|
|
268
260
|
}
|
|
269
261
|
|
|
270
|
-
pub fn
|
|
271
|
-
let
|
|
272
|
-
let
|
|
273
|
-
|
|
274
|
-
Ok(RbDataFrame::new(df))
|
|
262
|
+
pub fn gather_with_series(rb: &Ruby, self_: &Self, indices: &RbSeries) -> RbResult<Self> {
|
|
263
|
+
let idx_s = indices.series.read();
|
|
264
|
+
let indices = idx_s.idx().map_err(RbPolarsErr::from)?;
|
|
265
|
+
rb.enter_polars_df(|| self_.df.read().take(indices))
|
|
275
266
|
}
|
|
276
267
|
|
|
277
268
|
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
|
278
269
|
self.df
|
|
279
|
-
.
|
|
280
|
-
.replace(&column, new_col.series.
|
|
270
|
+
.write()
|
|
271
|
+
.replace(&column, new_col.series.read().clone())
|
|
281
272
|
.map_err(RbPolarsErr::from)?;
|
|
282
273
|
Ok(())
|
|
283
274
|
}
|
|
284
275
|
|
|
285
276
|
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
|
286
277
|
self.df
|
|
287
|
-
.
|
|
288
|
-
.replace_column(index, new_col.series.
|
|
278
|
+
.write()
|
|
279
|
+
.replace_column(index, new_col.series.read().clone())
|
|
289
280
|
.map_err(RbPolarsErr::from)?;
|
|
290
281
|
Ok(())
|
|
291
282
|
}
|
|
292
283
|
|
|
293
284
|
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
|
294
285
|
self.df
|
|
295
|
-
.
|
|
296
|
-
.insert_column(index, new_col.series.
|
|
286
|
+
.write()
|
|
287
|
+
.insert_column(index, new_col.series.read().clone())
|
|
297
288
|
.map_err(RbPolarsErr::from)?;
|
|
298
289
|
Ok(())
|
|
299
290
|
}
|
|
300
291
|
|
|
301
|
-
pub fn slice(&
|
|
302
|
-
|
|
303
|
-
.df
|
|
304
|
-
.
|
|
305
|
-
|
|
306
|
-
df.into()
|
|
292
|
+
pub fn slice(rb: &Ruby, self_: &Self, offset: i64, length: Option<usize>) -> RbResult<Self> {
|
|
293
|
+
rb.enter_polars_df(|| {
|
|
294
|
+
let df = self_.df.read();
|
|
295
|
+
Ok(df.slice(offset, length.unwrap_or_else(|| df.height())))
|
|
296
|
+
})
|
|
307
297
|
}
|
|
308
298
|
|
|
309
|
-
pub fn head(&
|
|
310
|
-
|
|
299
|
+
pub fn head(rb: &Ruby, self_: &Self, n: usize) -> RbResult<Self> {
|
|
300
|
+
rb.enter_polars_df(|| Ok(self_.df.read().head(Some(n))))
|
|
311
301
|
}
|
|
312
302
|
|
|
313
|
-
pub fn tail(&
|
|
314
|
-
|
|
303
|
+
pub fn tail(rb: &Ruby, self_: &Self, n: usize) -> RbResult<Self> {
|
|
304
|
+
rb.enter_polars_df(|| Ok(self_.df.read().tail(Some(n))))
|
|
315
305
|
}
|
|
316
306
|
|
|
317
|
-
pub fn is_unique(&
|
|
318
|
-
|
|
319
|
-
Ok(mask.into_series().into())
|
|
307
|
+
pub fn is_unique(rb: &Ruby, self_: &Self) -> RbResult<RbSeries> {
|
|
308
|
+
rb.enter_polars_series(|| self_.df.read().is_unique())
|
|
320
309
|
}
|
|
321
310
|
|
|
322
|
-
pub fn is_duplicated(&
|
|
323
|
-
|
|
324
|
-
.df
|
|
325
|
-
.borrow()
|
|
326
|
-
.is_duplicated()
|
|
327
|
-
.map_err(RbPolarsErr::from)?;
|
|
328
|
-
Ok(mask.into_series().into())
|
|
311
|
+
pub fn is_duplicated(rb: &Ruby, self_: &Self) -> RbResult<RbSeries> {
|
|
312
|
+
rb.enter_polars_series(|| self_.df.read().is_duplicated())
|
|
329
313
|
}
|
|
330
314
|
|
|
331
|
-
pub fn equals(
|
|
315
|
+
pub fn equals(
|
|
316
|
+
rb: &Ruby,
|
|
317
|
+
self_: &Self,
|
|
318
|
+
other: &RbDataFrame,
|
|
319
|
+
null_equal: bool,
|
|
320
|
+
) -> RbResult<bool> {
|
|
332
321
|
if null_equal {
|
|
333
|
-
|
|
322
|
+
rb.enter_polars_ok(|| self_.df.read().equals_missing(&other.df.read()))
|
|
334
323
|
} else {
|
|
335
|
-
|
|
324
|
+
rb.enter_polars_ok(|| self_.df.read().equals(&other.df.read()))
|
|
336
325
|
}
|
|
337
326
|
}
|
|
338
327
|
|
|
339
|
-
pub fn with_row_index(
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
328
|
+
pub fn with_row_index(
|
|
329
|
+
rb: &Ruby,
|
|
330
|
+
self_: &Self,
|
|
331
|
+
name: String,
|
|
332
|
+
offset: Option<IdxSize>,
|
|
333
|
+
) -> RbResult<Self> {
|
|
334
|
+
rb.enter_polars_df(|| self_.df.read().with_row_index(name.into(), offset))
|
|
346
335
|
}
|
|
347
336
|
|
|
348
337
|
pub fn clone(&self) -> Self {
|
|
349
|
-
|
|
338
|
+
Clone::clone(self)
|
|
350
339
|
}
|
|
351
340
|
|
|
352
341
|
pub fn unpivot(
|
|
353
|
-
&
|
|
342
|
+
rb: &Ruby,
|
|
343
|
+
self_: &Self,
|
|
354
344
|
on: Vec<String>,
|
|
355
345
|
index: Vec<String>,
|
|
356
346
|
value_name: Option<String>,
|
|
@@ -363,13 +353,12 @@ impl RbDataFrame {
|
|
|
363
353
|
variable_name: variable_name.map(|s| s.into()),
|
|
364
354
|
};
|
|
365
355
|
|
|
366
|
-
|
|
367
|
-
Ok(RbDataFrame::new(df))
|
|
356
|
+
rb.enter_polars_df(|| self_.df.read().unpivot2(args))
|
|
368
357
|
}
|
|
369
358
|
|
|
370
|
-
#[allow(clippy::too_many_arguments)]
|
|
371
359
|
pub fn pivot_expr(
|
|
372
|
-
&
|
|
360
|
+
rb: &Ruby,
|
|
361
|
+
self_: &Self,
|
|
373
362
|
on: Vec<String>,
|
|
374
363
|
index: Option<Vec<String>>,
|
|
375
364
|
values: Option<Vec<String>>,
|
|
@@ -378,77 +367,76 @@ impl RbDataFrame {
|
|
|
378
367
|
aggregate_expr: Option<&RbExpr>,
|
|
379
368
|
separator: Option<String>,
|
|
380
369
|
) -> RbResult<Self> {
|
|
370
|
+
let df = self_.df.read().clone(); // Clone to avoid dead lock on re-entrance in aggregate_expr.
|
|
381
371
|
let fun = if maintain_order { pivot_stable } else { pivot };
|
|
382
|
-
let agg_expr = aggregate_expr.map(|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
372
|
+
let agg_expr = aggregate_expr.map(|expr| expr.inner.clone());
|
|
373
|
+
rb.enter_polars_df(|| {
|
|
374
|
+
fun(
|
|
375
|
+
&df,
|
|
376
|
+
on,
|
|
377
|
+
index,
|
|
378
|
+
values,
|
|
379
|
+
sort_columns,
|
|
380
|
+
agg_expr,
|
|
381
|
+
separator.as_deref(),
|
|
382
|
+
)
|
|
383
|
+
})
|
|
394
384
|
}
|
|
395
385
|
|
|
396
386
|
pub fn partition_by(
|
|
397
|
-
|
|
398
|
-
|
|
387
|
+
rb: &Ruby,
|
|
388
|
+
self_: &Self,
|
|
399
389
|
by: Vec<String>,
|
|
400
390
|
maintain_order: bool,
|
|
401
391
|
include_key: bool,
|
|
402
392
|
) -> RbResult<RArray> {
|
|
403
393
|
let out = if maintain_order {
|
|
404
|
-
|
|
394
|
+
self_.df.read().partition_by_stable(by, include_key)
|
|
405
395
|
} else {
|
|
406
|
-
|
|
396
|
+
self_.df.read().partition_by(by, include_key)
|
|
407
397
|
}
|
|
408
398
|
.map_err(RbPolarsErr::from)?;
|
|
409
|
-
Ok(
|
|
399
|
+
Ok(rb.ary_from_iter(out.into_iter().map(RbDataFrame::new)))
|
|
410
400
|
}
|
|
411
401
|
|
|
412
402
|
pub fn lazy(&self) -> RbLazyFrame {
|
|
413
|
-
self.df.
|
|
403
|
+
self.df.read().clone().lazy().into()
|
|
414
404
|
}
|
|
415
405
|
|
|
416
406
|
pub fn to_dummies(
|
|
417
|
-
&
|
|
407
|
+
rb: &Ruby,
|
|
408
|
+
self_: &Self,
|
|
418
409
|
columns: Option<Vec<String>>,
|
|
419
410
|
separator: Option<String>,
|
|
420
411
|
drop_first: bool,
|
|
421
412
|
drop_nulls: bool,
|
|
422
413
|
) -> RbResult<Self> {
|
|
423
|
-
|
|
424
|
-
Some(cols) =>
|
|
414
|
+
rb.enter_polars_df(|| match columns {
|
|
415
|
+
Some(cols) => self_.df.read().columns_to_dummies(
|
|
425
416
|
cols.iter().map(|x| x as &str).collect(),
|
|
426
417
|
separator.as_deref(),
|
|
427
418
|
drop_first,
|
|
428
419
|
drop_nulls,
|
|
429
420
|
),
|
|
430
|
-
None =>
|
|
421
|
+
None => self_
|
|
431
422
|
.df
|
|
432
|
-
.
|
|
423
|
+
.read()
|
|
433
424
|
.to_dummies(separator.as_deref(), drop_first, drop_nulls),
|
|
434
|
-
}
|
|
435
|
-
.map_err(RbPolarsErr::from)?;
|
|
436
|
-
Ok(df.into())
|
|
425
|
+
})
|
|
437
426
|
}
|
|
438
427
|
|
|
439
|
-
pub fn null_count(&
|
|
440
|
-
|
|
441
|
-
df.into()
|
|
428
|
+
pub fn null_count(rb: &Ruby, self_: &Self) -> RbResult<Self> {
|
|
429
|
+
rb.enter_polars_df(|| Ok(self_.df.read().null_count()))
|
|
442
430
|
}
|
|
443
431
|
|
|
444
432
|
pub fn map_rows(
|
|
445
433
|
ruby: &Ruby,
|
|
446
|
-
|
|
434
|
+
self_: &Self,
|
|
447
435
|
lambda: Value,
|
|
448
436
|
output_type: Option<Wrap<DataType>>,
|
|
449
437
|
inference_size: usize,
|
|
450
438
|
) -> RbResult<(Value, bool)> {
|
|
451
|
-
let df = &
|
|
439
|
+
let df = &self_.df.read();
|
|
452
440
|
|
|
453
441
|
let output_type = output_type.map(|dt| dt.0);
|
|
454
442
|
let out = match output_type {
|
|
@@ -496,22 +484,29 @@ impl RbDataFrame {
|
|
|
496
484
|
Ok((ruby.obj_wrap(RbSeries::from(out)).as_value(), false))
|
|
497
485
|
}
|
|
498
486
|
|
|
499
|
-
pub fn shrink_to_fit(&
|
|
500
|
-
|
|
487
|
+
pub fn shrink_to_fit(rb: &Ruby, self_: &Self) -> RbResult<()> {
|
|
488
|
+
rb.enter_polars_ok(|| self_.df.write().shrink_to_fit())
|
|
501
489
|
}
|
|
502
490
|
|
|
503
|
-
pub fn hash_rows(
|
|
491
|
+
pub fn hash_rows(
|
|
492
|
+
rb: &Ruby,
|
|
493
|
+
self_: &Self,
|
|
494
|
+
k0: u64,
|
|
495
|
+
k1: u64,
|
|
496
|
+
k2: u64,
|
|
497
|
+
k3: u64,
|
|
498
|
+
) -> RbResult<RbSeries> {
|
|
504
499
|
let seed = PlFixedStateQuality::default().hash_one((k0, k1, k2, k3));
|
|
505
500
|
let hb = PlSeedableRandomStateQuality::seed_from_u64(seed);
|
|
506
|
-
|
|
507
|
-
.df
|
|
508
|
-
.borrow_mut()
|
|
509
|
-
.hash_rows(Some(hb))
|
|
510
|
-
.map_err(RbPolarsErr::from)?;
|
|
511
|
-
Ok(hash.into_series().into())
|
|
501
|
+
rb.enter_polars_series(|| self_.df.write().hash_rows(Some(hb)))
|
|
512
502
|
}
|
|
513
503
|
|
|
514
|
-
pub fn transpose(
|
|
504
|
+
pub fn transpose(
|
|
505
|
+
rb: &Ruby,
|
|
506
|
+
self_: &Self,
|
|
507
|
+
keep_names_as: Option<String>,
|
|
508
|
+
column_names: Value,
|
|
509
|
+
) -> RbResult<Self> {
|
|
515
510
|
let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
|
|
516
511
|
Some(Either::Right(name))
|
|
517
512
|
} else if let Ok(name) = String::try_convert(column_names) {
|
|
@@ -519,40 +514,45 @@ impl RbDataFrame {
|
|
|
519
514
|
} else {
|
|
520
515
|
None
|
|
521
516
|
};
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
517
|
+
rb.enter_polars_df(|| {
|
|
518
|
+
self_
|
|
519
|
+
.df
|
|
520
|
+
.write()
|
|
521
|
+
.transpose(keep_names_as.as_deref(), new_col_names)
|
|
522
|
+
})
|
|
528
523
|
}
|
|
529
524
|
|
|
530
525
|
pub fn upsample(
|
|
531
|
-
&
|
|
526
|
+
rb: &Ruby,
|
|
527
|
+
self_: &Self,
|
|
532
528
|
by: Vec<String>,
|
|
533
529
|
index_column: String,
|
|
534
530
|
every: String,
|
|
535
531
|
stable: bool,
|
|
536
532
|
) -> RbResult<Self> {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
533
|
+
rb.enter_polars_df(|| {
|
|
534
|
+
if stable {
|
|
535
|
+
self_
|
|
536
|
+
.df
|
|
537
|
+
.read()
|
|
538
|
+
.upsample_stable(by, &index_column, Duration::parse(&every))
|
|
539
|
+
} else {
|
|
540
|
+
self_
|
|
541
|
+
.df
|
|
542
|
+
.read()
|
|
543
|
+
.upsample(by, &index_column, Duration::parse(&every))
|
|
544
|
+
}
|
|
545
|
+
})
|
|
548
546
|
}
|
|
549
547
|
|
|
550
|
-
pub fn to_struct(&
|
|
551
|
-
|
|
552
|
-
|
|
548
|
+
pub fn to_struct(rb: &Ruby, self_: &Self, name: String) -> RbResult<RbSeries> {
|
|
549
|
+
rb.enter_polars_series(|| {
|
|
550
|
+
let ca = self_.df.read().clone().into_struct(name.into());
|
|
551
|
+
Ok(ca)
|
|
552
|
+
})
|
|
553
553
|
}
|
|
554
554
|
|
|
555
|
-
pub fn clear(&
|
|
556
|
-
|
|
555
|
+
pub fn clear(rb: &Ruby, self_: &Self) -> RbResult<Self> {
|
|
556
|
+
rb.enter_polars_df(|| Ok(self_.df.read().clear()))
|
|
557
557
|
}
|
|
558
558
|
}
|