polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
use arrow::array::Array;
|
|
2
|
+
use arrow::ffi;
|
|
2
3
|
use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
|
|
3
4
|
use magnus::Value;
|
|
4
5
|
use magnus::prelude::*;
|
|
@@ -9,6 +10,22 @@ use super::RbSeries;
|
|
|
9
10
|
use crate::RbResult;
|
|
10
11
|
use crate::exceptions::RbValueError;
|
|
11
12
|
|
|
13
|
+
pub(crate) fn import_schema_rbcapsule(schema_capsule: Value) -> RbResult<arrow::datatypes::Field> {
|
|
14
|
+
let capsule_pointer: usize = schema_capsule.funcall("to_i", ())?;
|
|
15
|
+
|
|
16
|
+
// # Safety
|
|
17
|
+
// schema_capsule holds a valid C ArrowSchema pointer, as defined by the Arrow PyCapsule
|
|
18
|
+
// Interface
|
|
19
|
+
unsafe {
|
|
20
|
+
let schema_ptr = (capsule_pointer as *const ffi::ArrowSchema)
|
|
21
|
+
.as_ref()
|
|
22
|
+
.unwrap();
|
|
23
|
+
let field = ffi::import_field_from_c(schema_ptr).unwrap();
|
|
24
|
+
|
|
25
|
+
Ok(field)
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
12
29
|
/// Import `arrow_c_stream` across Ruby boundary.
|
|
13
30
|
fn call_arrow_c_stream(ob: Value) -> RbResult<Value> {
|
|
14
31
|
let capsule = ob.funcall("arrow_c_stream", ())?;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
use magnus::Value;
|
|
1
|
+
use magnus::{Ruby, Value};
|
|
2
2
|
|
|
3
3
|
use super::RbSeries;
|
|
4
4
|
use crate::map::check_nested_object;
|
|
5
5
|
use crate::map::series::{ApplyLambda, call_lambda_and_extract};
|
|
6
6
|
use crate::prelude::*;
|
|
7
|
+
use crate::utils::RubyAttach;
|
|
7
8
|
use crate::{RbPolarsErr, RbResult};
|
|
8
9
|
use crate::{apply_method_all_arrow_series2, raise_err};
|
|
9
10
|
|
|
@@ -14,7 +15,7 @@ impl RbSeries {
|
|
|
14
15
|
return_dtype: Option<Wrap<DataType>>,
|
|
15
16
|
skip_nulls: bool,
|
|
16
17
|
) -> RbResult<Self> {
|
|
17
|
-
let series = &self.series.
|
|
18
|
+
let series = &self.series.read().clone(); // Clone so we don't deadlock on re-entrance.
|
|
18
19
|
|
|
19
20
|
if return_dtype.is_none() {
|
|
20
21
|
polars_warn!(
|
|
@@ -58,170 +59,187 @@ impl RbSeries {
|
|
|
58
59
|
|
|
59
60
|
}
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
62
|
+
Ruby::attach(|_rb| {
|
|
63
|
+
if matches!(
|
|
64
|
+
series.dtype(),
|
|
65
|
+
DataType::Datetime(_, _)
|
|
66
|
+
| DataType::Date
|
|
67
|
+
| DataType::Duration(_)
|
|
68
|
+
| DataType::Categorical(_, _)
|
|
69
|
+
| DataType::Enum(_, _)
|
|
70
|
+
| DataType::Binary
|
|
71
|
+
| DataType::Array(_, _)
|
|
72
|
+
| DataType::Time
|
|
73
|
+
| DataType::Decimal(_, _)
|
|
74
|
+
) || !skip_nulls
|
|
75
|
+
{
|
|
76
|
+
let mut avs = Vec::with_capacity(series.len());
|
|
77
|
+
let s = series.rechunk();
|
|
76
78
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
79
|
+
for av in s.iter() {
|
|
80
|
+
let out = match (skip_nulls, av) {
|
|
81
|
+
(true, AnyValue::Null) => AnyValue::Null,
|
|
82
|
+
(_, av) => {
|
|
83
|
+
let av: Option<Wrap<AnyValue>> =
|
|
84
|
+
call_lambda_and_extract(function, Wrap(av))?;
|
|
85
|
+
match av {
|
|
86
|
+
None => AnyValue::Null,
|
|
87
|
+
Some(av) => av.0,
|
|
88
|
+
}
|
|
86
89
|
}
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
return Ok(out.into());
|
|
98
|
-
}
|
|
90
|
+
};
|
|
91
|
+
avs.push(out)
|
|
92
|
+
}
|
|
93
|
+
let out = Series::new(series.name().clone(), &avs);
|
|
94
|
+
let dtype = out.dtype();
|
|
95
|
+
if dtype.is_nested() {
|
|
96
|
+
check_nested_object(dtype)?;
|
|
97
|
+
}
|
|
99
98
|
|
|
100
|
-
|
|
101
|
-
Some(DataType::Int8) => {
|
|
102
|
-
let ca: Int8Chunked = dispatch_apply!(
|
|
103
|
-
series,
|
|
104
|
-
apply_lambda_with_primitive_out_type,
|
|
105
|
-
function,
|
|
106
|
-
0,
|
|
107
|
-
None
|
|
108
|
-
)?;
|
|
109
|
-
ca.into_series()
|
|
110
|
-
}
|
|
111
|
-
Some(DataType::Int16) => {
|
|
112
|
-
let ca: Int16Chunked = dispatch_apply!(
|
|
113
|
-
series,
|
|
114
|
-
apply_lambda_with_primitive_out_type,
|
|
115
|
-
function,
|
|
116
|
-
0,
|
|
117
|
-
None
|
|
118
|
-
)?;
|
|
119
|
-
ca.into_series()
|
|
120
|
-
}
|
|
121
|
-
Some(DataType::Int32) => {
|
|
122
|
-
let ca: Int32Chunked = dispatch_apply!(
|
|
123
|
-
series,
|
|
124
|
-
apply_lambda_with_primitive_out_type,
|
|
125
|
-
function,
|
|
126
|
-
0,
|
|
127
|
-
None
|
|
128
|
-
)?;
|
|
129
|
-
ca.into_series()
|
|
99
|
+
return Ok(out.into());
|
|
130
100
|
}
|
|
131
|
-
Some(DataType::Int64) => {
|
|
132
|
-
let ca: Int64Chunked = dispatch_apply!(
|
|
133
|
-
series,
|
|
134
|
-
apply_lambda_with_primitive_out_type,
|
|
135
|
-
function,
|
|
136
|
-
0,
|
|
137
|
-
None
|
|
138
|
-
)?;
|
|
139
|
-
ca.into_series()
|
|
140
|
-
}
|
|
141
|
-
Some(DataType::UInt8) => {
|
|
142
|
-
let ca: UInt8Chunked = dispatch_apply!(
|
|
143
|
-
series,
|
|
144
|
-
apply_lambda_with_primitive_out_type,
|
|
145
|
-
function,
|
|
146
|
-
0,
|
|
147
|
-
None
|
|
148
|
-
)?;
|
|
149
|
-
ca.into_series()
|
|
150
|
-
}
|
|
151
|
-
Some(DataType::UInt16) => {
|
|
152
|
-
let ca: UInt16Chunked = dispatch_apply!(
|
|
153
|
-
series,
|
|
154
|
-
apply_lambda_with_primitive_out_type,
|
|
155
|
-
function,
|
|
156
|
-
0,
|
|
157
|
-
None
|
|
158
|
-
)?;
|
|
159
|
-
ca.into_series()
|
|
160
|
-
}
|
|
161
|
-
Some(DataType::UInt32) => {
|
|
162
|
-
let ca: UInt32Chunked = dispatch_apply!(
|
|
163
|
-
series,
|
|
164
|
-
apply_lambda_with_primitive_out_type,
|
|
165
|
-
function,
|
|
166
|
-
0,
|
|
167
|
-
None
|
|
168
|
-
)?;
|
|
169
|
-
ca.into_series()
|
|
170
|
-
}
|
|
171
|
-
Some(DataType::UInt64) => {
|
|
172
|
-
let ca: UInt64Chunked = dispatch_apply!(
|
|
173
|
-
series,
|
|
174
|
-
apply_lambda_with_primitive_out_type,
|
|
175
|
-
function,
|
|
176
|
-
0,
|
|
177
|
-
None
|
|
178
|
-
)?;
|
|
179
|
-
ca.into_series()
|
|
180
|
-
}
|
|
181
|
-
Some(DataType::Float32) => {
|
|
182
|
-
let ca: Float32Chunked = dispatch_apply!(
|
|
183
|
-
series,
|
|
184
|
-
apply_lambda_with_primitive_out_type,
|
|
185
|
-
function,
|
|
186
|
-
0,
|
|
187
|
-
None
|
|
188
|
-
)?;
|
|
189
|
-
ca.into_series()
|
|
190
|
-
}
|
|
191
|
-
Some(DataType::Float64) => {
|
|
192
|
-
let ca: Float64Chunked = dispatch_apply!(
|
|
193
|
-
series,
|
|
194
|
-
apply_lambda_with_primitive_out_type,
|
|
195
|
-
function,
|
|
196
|
-
0,
|
|
197
|
-
None
|
|
198
|
-
)?;
|
|
199
|
-
ca.into_series()
|
|
200
|
-
}
|
|
201
|
-
Some(DataType::Boolean) => {
|
|
202
|
-
let ca: BooleanChunked =
|
|
203
|
-
dispatch_apply!(series, apply_lambda_with_bool_out_type, function, 0, None)?;
|
|
204
|
-
ca.into_series()
|
|
205
|
-
}
|
|
206
|
-
Some(DataType::String) => {
|
|
207
|
-
let ca =
|
|
208
|
-
dispatch_apply!(series, apply_lambda_with_utf8_out_type, function, 0, None)?;
|
|
209
101
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
102
|
+
let out = match return_dtype {
|
|
103
|
+
Some(DataType::Int8) => {
|
|
104
|
+
let ca: Int8Chunked = dispatch_apply!(
|
|
105
|
+
series,
|
|
106
|
+
apply_lambda_with_primitive_out_type,
|
|
107
|
+
function,
|
|
108
|
+
0,
|
|
109
|
+
None
|
|
110
|
+
)?;
|
|
111
|
+
ca.into_series()
|
|
112
|
+
}
|
|
113
|
+
Some(DataType::Int16) => {
|
|
114
|
+
let ca: Int16Chunked = dispatch_apply!(
|
|
115
|
+
series,
|
|
116
|
+
apply_lambda_with_primitive_out_type,
|
|
117
|
+
function,
|
|
118
|
+
0,
|
|
119
|
+
None
|
|
120
|
+
)?;
|
|
121
|
+
ca.into_series()
|
|
122
|
+
}
|
|
123
|
+
Some(DataType::Int32) => {
|
|
124
|
+
let ca: Int32Chunked = dispatch_apply!(
|
|
125
|
+
series,
|
|
126
|
+
apply_lambda_with_primitive_out_type,
|
|
127
|
+
function,
|
|
128
|
+
0,
|
|
129
|
+
None
|
|
130
|
+
)?;
|
|
131
|
+
ca.into_series()
|
|
132
|
+
}
|
|
133
|
+
Some(DataType::Int64) => {
|
|
134
|
+
let ca: Int64Chunked = dispatch_apply!(
|
|
135
|
+
series,
|
|
136
|
+
apply_lambda_with_primitive_out_type,
|
|
137
|
+
function,
|
|
138
|
+
0,
|
|
139
|
+
None
|
|
140
|
+
)?;
|
|
141
|
+
ca.into_series()
|
|
142
|
+
}
|
|
143
|
+
Some(DataType::UInt8) => {
|
|
144
|
+
let ca: UInt8Chunked = dispatch_apply!(
|
|
145
|
+
series,
|
|
146
|
+
apply_lambda_with_primitive_out_type,
|
|
147
|
+
function,
|
|
148
|
+
0,
|
|
149
|
+
None
|
|
150
|
+
)?;
|
|
151
|
+
ca.into_series()
|
|
152
|
+
}
|
|
153
|
+
Some(DataType::UInt16) => {
|
|
154
|
+
let ca: UInt16Chunked = dispatch_apply!(
|
|
155
|
+
series,
|
|
156
|
+
apply_lambda_with_primitive_out_type,
|
|
157
|
+
function,
|
|
158
|
+
0,
|
|
159
|
+
None
|
|
160
|
+
)?;
|
|
161
|
+
ca.into_series()
|
|
162
|
+
}
|
|
163
|
+
Some(DataType::UInt32) => {
|
|
164
|
+
let ca: UInt32Chunked = dispatch_apply!(
|
|
165
|
+
series,
|
|
166
|
+
apply_lambda_with_primitive_out_type,
|
|
167
|
+
function,
|
|
168
|
+
0,
|
|
169
|
+
None
|
|
170
|
+
)?;
|
|
171
|
+
ca.into_series()
|
|
172
|
+
}
|
|
173
|
+
Some(DataType::UInt64) => {
|
|
174
|
+
let ca: UInt64Chunked = dispatch_apply!(
|
|
175
|
+
series,
|
|
176
|
+
apply_lambda_with_primitive_out_type,
|
|
177
|
+
function,
|
|
178
|
+
0,
|
|
179
|
+
None
|
|
180
|
+
)?;
|
|
181
|
+
ca.into_series()
|
|
182
|
+
}
|
|
183
|
+
Some(DataType::Float32) => {
|
|
184
|
+
let ca: Float32Chunked = dispatch_apply!(
|
|
185
|
+
series,
|
|
186
|
+
apply_lambda_with_primitive_out_type,
|
|
187
|
+
function,
|
|
188
|
+
0,
|
|
189
|
+
None
|
|
190
|
+
)?;
|
|
191
|
+
ca.into_series()
|
|
192
|
+
}
|
|
193
|
+
Some(DataType::Float64) => {
|
|
194
|
+
let ca: Float64Chunked = dispatch_apply!(
|
|
195
|
+
series,
|
|
196
|
+
apply_lambda_with_primitive_out_type,
|
|
197
|
+
function,
|
|
198
|
+
0,
|
|
199
|
+
None
|
|
200
|
+
)?;
|
|
201
|
+
ca.into_series()
|
|
202
|
+
}
|
|
203
|
+
Some(DataType::Boolean) => {
|
|
204
|
+
let ca: BooleanChunked = dispatch_apply!(
|
|
205
|
+
series,
|
|
206
|
+
apply_lambda_with_bool_out_type,
|
|
207
|
+
function,
|
|
208
|
+
0,
|
|
209
|
+
None
|
|
210
|
+
)?;
|
|
211
|
+
ca.into_series()
|
|
212
|
+
}
|
|
213
|
+
Some(DataType::String) => {
|
|
214
|
+
let ca = dispatch_apply!(
|
|
215
|
+
series,
|
|
216
|
+
apply_lambda_with_utf8_out_type,
|
|
217
|
+
function,
|
|
218
|
+
0,
|
|
219
|
+
None
|
|
220
|
+
)?;
|
|
221
|
+
|
|
222
|
+
ca.into_series()
|
|
223
|
+
}
|
|
224
|
+
Some(DataType::List(_inner)) => {
|
|
225
|
+
todo!()
|
|
226
|
+
}
|
|
227
|
+
Some(DataType::Object(_)) => {
|
|
228
|
+
let ca = dispatch_apply!(
|
|
229
|
+
series,
|
|
230
|
+
apply_lambda_with_object_out_type,
|
|
231
|
+
function,
|
|
232
|
+
0,
|
|
233
|
+
None
|
|
234
|
+
)?;
|
|
235
|
+
ca.into_series()
|
|
236
|
+
}
|
|
237
|
+
None => return dispatch_apply!(series, apply_lambda_unknown, function),
|
|
221
238
|
|
|
222
|
-
|
|
223
|
-
|
|
239
|
+
_ => return dispatch_apply!(series, apply_lambda_unknown, function),
|
|
240
|
+
};
|
|
224
241
|
|
|
225
|
-
|
|
242
|
+
Ok(RbSeries::new(out))
|
|
243
|
+
})
|
|
226
244
|
}
|
|
227
245
|
}
|
|
@@ -8,28 +8,38 @@ mod import;
|
|
|
8
8
|
mod map;
|
|
9
9
|
mod scatter;
|
|
10
10
|
|
|
11
|
+
pub(crate) use import::import_schema_rbcapsule;
|
|
12
|
+
|
|
11
13
|
use magnus::{DataTypeFunctions, RArray, Ruby, TypedData, gc, prelude::*};
|
|
14
|
+
use parking_lot::RwLock;
|
|
12
15
|
use polars::prelude::*;
|
|
13
|
-
use std::cell::RefCell;
|
|
14
16
|
|
|
15
17
|
use crate::{ObjectValue, RbResult};
|
|
16
18
|
|
|
17
19
|
#[derive(TypedData)]
|
|
18
20
|
#[magnus(class = "Polars::RbSeries", mark)]
|
|
19
21
|
pub struct RbSeries {
|
|
20
|
-
pub series:
|
|
22
|
+
pub series: RwLock<Series>,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
impl Clone for RbSeries {
|
|
26
|
+
fn clone(&self) -> Self {
|
|
27
|
+
Self {
|
|
28
|
+
series: RwLock::new(self.series.read().clone()),
|
|
29
|
+
}
|
|
30
|
+
}
|
|
21
31
|
}
|
|
22
32
|
|
|
23
33
|
impl From<Series> for RbSeries {
|
|
24
34
|
fn from(series: Series) -> Self {
|
|
25
|
-
|
|
35
|
+
Self::new(series)
|
|
26
36
|
}
|
|
27
37
|
}
|
|
28
38
|
|
|
29
39
|
impl RbSeries {
|
|
30
40
|
pub fn new(series: Series) -> Self {
|
|
31
41
|
RbSeries {
|
|
32
|
-
series:
|
|
42
|
+
series: RwLock::new(series),
|
|
33
43
|
}
|
|
34
44
|
}
|
|
35
45
|
}
|
|
@@ -37,17 +47,23 @@ impl RbSeries {
|
|
|
37
47
|
pub fn to_series(rs: RArray) -> RbResult<Vec<Series>> {
|
|
38
48
|
let mut series = Vec::new();
|
|
39
49
|
for item in rs.into_iter() {
|
|
40
|
-
series.push(<&RbSeries>::try_convert(item)?.series.
|
|
50
|
+
series.push(<&RbSeries>::try_convert(item)?.series.read().clone());
|
|
41
51
|
}
|
|
42
52
|
Ok(series)
|
|
43
53
|
}
|
|
44
54
|
|
|
45
|
-
pub
|
|
46
|
-
Ruby
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
)
|
|
55
|
+
pub(crate) trait ToRbSeries {
|
|
56
|
+
fn to_rbseries(self, rb: &Ruby) -> RArray;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
impl ToRbSeries for Vec<Column> {
|
|
60
|
+
fn to_rbseries(self, rb: &Ruby) -> RArray {
|
|
61
|
+
rb.ary_from_iter(
|
|
62
|
+
self.into_iter()
|
|
63
|
+
.map(|c| c.take_materialized_series())
|
|
64
|
+
.map(RbSeries::new),
|
|
65
|
+
)
|
|
66
|
+
}
|
|
51
67
|
}
|
|
52
68
|
|
|
53
69
|
pub fn mark_series(marker: &gc::Marker, series: &Series) {
|
|
@@ -66,7 +82,7 @@ impl DataTypeFunctions for RbSeries {
|
|
|
66
82
|
// this is not ideal, as objects will not be marked if unable to borrow
|
|
67
83
|
// this should never happen, but log for now to avoid panic,
|
|
68
84
|
// as most series will not use Object datatype
|
|
69
|
-
if let
|
|
85
|
+
if let Some(s) = &self.series.try_read() {
|
|
70
86
|
mark_series(marker, s);
|
|
71
87
|
} else {
|
|
72
88
|
eprintln!("[polars] Could not borrow!");
|
|
@@ -2,18 +2,21 @@ use arrow::array::Array;
|
|
|
2
2
|
use polars::prelude::*;
|
|
3
3
|
|
|
4
4
|
use crate::error::RbPolarsErr;
|
|
5
|
-
use crate::
|
|
5
|
+
use crate::utils::EnterPolarsExt;
|
|
6
|
+
use crate::{RbErr, RbResult, RbSeries, Ruby};
|
|
6
7
|
|
|
7
8
|
impl RbSeries {
|
|
8
|
-
pub fn scatter(&
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
pub fn scatter(rb: &Ruby, self_: &Self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
|
10
|
+
rb.enter_polars(|| {
|
|
11
|
+
let mut s = self_.series.write();
|
|
12
|
+
match scatter(s.clone(), &idx.series.read(), &values.series.read()) {
|
|
13
|
+
Ok(out) => {
|
|
14
|
+
*s = out;
|
|
15
|
+
Ok(())
|
|
16
|
+
}
|
|
17
|
+
Err(e) => Err(RbErr::from(RbPolarsErr::from(e))),
|
|
14
18
|
}
|
|
15
|
-
|
|
16
|
-
}
|
|
19
|
+
})
|
|
17
20
|
}
|
|
18
21
|
}
|
|
19
22
|
|
data/ext/polars/src/sql.rs
CHANGED
|
@@ -1,13 +1,20 @@
|
|
|
1
|
+
use parking_lot::RwLock;
|
|
1
2
|
use polars::sql::SQLContext;
|
|
2
|
-
use std::cell::RefCell;
|
|
3
3
|
|
|
4
4
|
use crate::{RbLazyFrame, RbPolarsErr, RbResult};
|
|
5
5
|
|
|
6
6
|
#[magnus::wrap(class = "Polars::RbSQLContext")]
|
|
7
7
|
#[repr(transparent)]
|
|
8
|
-
#[derive(Clone)]
|
|
9
8
|
pub struct RbSQLContext {
|
|
10
|
-
pub context:
|
|
9
|
+
pub context: RwLock<SQLContext>,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
impl Clone for RbSQLContext {
|
|
13
|
+
fn clone(&self) -> Self {
|
|
14
|
+
Self {
|
|
15
|
+
context: RwLock::new(self.context.read().clone()),
|
|
16
|
+
}
|
|
17
|
+
}
|
|
11
18
|
}
|
|
12
19
|
|
|
13
20
|
#[allow(
|
|
@@ -19,30 +26,30 @@ impl RbSQLContext {
|
|
|
19
26
|
#[allow(clippy::new_without_default)]
|
|
20
27
|
pub fn new() -> RbSQLContext {
|
|
21
28
|
RbSQLContext {
|
|
22
|
-
context: SQLContext::new()
|
|
29
|
+
context: RwLock::new(SQLContext::new()),
|
|
23
30
|
}
|
|
24
31
|
}
|
|
25
32
|
|
|
26
33
|
pub fn execute(&self, query: String) -> RbResult<RbLazyFrame> {
|
|
27
34
|
Ok(self
|
|
28
35
|
.context
|
|
29
|
-
.
|
|
36
|
+
.write()
|
|
30
37
|
.execute(&query)
|
|
31
38
|
.map_err(RbPolarsErr::from)?
|
|
32
39
|
.into())
|
|
33
40
|
}
|
|
34
41
|
|
|
35
42
|
pub fn get_tables(&self) -> RbResult<Vec<String>> {
|
|
36
|
-
Ok(self.context.
|
|
43
|
+
Ok(self.context.read().get_tables())
|
|
37
44
|
}
|
|
38
45
|
|
|
39
46
|
pub fn register(&self, name: String, lf: &RbLazyFrame) {
|
|
40
47
|
self.context
|
|
41
|
-
.
|
|
42
|
-
.register(&name, lf.
|
|
48
|
+
.write()
|
|
49
|
+
.register(&name, lf.clone().ldf.into_inner())
|
|
43
50
|
}
|
|
44
51
|
|
|
45
52
|
pub fn unregister(&self, name: String) {
|
|
46
|
-
self.context.
|
|
53
|
+
self.context.write().unregister(&name)
|
|
47
54
|
}
|
|
48
55
|
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
use polars_testing::asserts::{DataFrameEqualOptions, assert_dataframe_equal};
|
|
2
|
+
|
|
3
|
+
use crate::error::RbPolarsErr;
|
|
4
|
+
use crate::{RbDataFrame, RbResult};
|
|
5
|
+
|
|
6
|
+
pub fn assert_dataframe_equal_rb(
|
|
7
|
+
left: &RbDataFrame,
|
|
8
|
+
right: &RbDataFrame,
|
|
9
|
+
check_row_order: bool,
|
|
10
|
+
check_column_order: bool,
|
|
11
|
+
check_dtypes: bool,
|
|
12
|
+
check_exact: bool,
|
|
13
|
+
rel_tol: f64,
|
|
14
|
+
abs_tol: f64,
|
|
15
|
+
categorical_as_str: bool,
|
|
16
|
+
) -> RbResult<()> {
|
|
17
|
+
let left_df = &left.df.read();
|
|
18
|
+
let right_df = &right.df.read();
|
|
19
|
+
|
|
20
|
+
let options = DataFrameEqualOptions {
|
|
21
|
+
check_row_order,
|
|
22
|
+
check_column_order,
|
|
23
|
+
check_dtypes,
|
|
24
|
+
check_exact,
|
|
25
|
+
rel_tol,
|
|
26
|
+
abs_tol,
|
|
27
|
+
categorical_as_str,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
assert_dataframe_equal(left_df, right_df, options).map_err(|e| RbPolarsErr::from(e).into())
|
|
31
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
use polars_testing::asserts::{SeriesEqualOptions, assert_series_equal};
|
|
2
|
+
|
|
3
|
+
use crate::error::RbPolarsErr;
|
|
4
|
+
use crate::{RbResult, RbSeries};
|
|
5
|
+
|
|
6
|
+
pub fn assert_series_equal_rb(
|
|
7
|
+
left: &RbSeries,
|
|
8
|
+
right: &RbSeries,
|
|
9
|
+
check_dtypes: bool,
|
|
10
|
+
check_names: bool,
|
|
11
|
+
check_order: bool,
|
|
12
|
+
check_exact: bool,
|
|
13
|
+
rel_tol: f64,
|
|
14
|
+
abs_tol: f64,
|
|
15
|
+
categorical_as_str: bool,
|
|
16
|
+
) -> RbResult<()> {
|
|
17
|
+
let left_series = &left.series.read();
|
|
18
|
+
let right_series = &right.series.read();
|
|
19
|
+
|
|
20
|
+
let options = SeriesEqualOptions {
|
|
21
|
+
check_dtypes,
|
|
22
|
+
check_names,
|
|
23
|
+
check_order,
|
|
24
|
+
check_exact,
|
|
25
|
+
rel_tol,
|
|
26
|
+
abs_tol,
|
|
27
|
+
categorical_as_str,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
assert_series_equal(left_series, right_series, options).map_err(|e| RbPolarsErr::from(e).into())
|
|
31
|
+
}
|