polars-df 0.13.0 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +177 -141
- data/ext/polars/Cargo.toml +5 -6
- data/ext/polars/src/batched_csv.rs +3 -3
- data/ext/polars/src/conversion/any_value.rs +10 -4
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +36 -20
- data/ext/polars/src/dataframe/construction.rs +4 -4
- data/ext/polars/src/dataframe/general.rs +6 -5
- data/ext/polars/src/dataframe/io.rs +6 -6
- data/ext/polars/src/expr/datetime.rs +11 -3
- data/ext/polars/src/expr/general.rs +1 -1
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/string.rs +8 -1
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/lazyframe/mod.rs +18 -16
- data/ext/polars/src/lib.rs +1 -1
- data/ext/polars/src/map/dataframe.rs +36 -8
- data/ext/polars/src/map/mod.rs +8 -8
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/construction.rs +50 -23
- data/ext/polars/src/series/mod.rs +4 -4
- data/lib/polars/data_frame.rb +10 -10
- data/lib/polars/expr.rb +6 -6
- data/lib/polars/io/ipc.rb +0 -8
- data/lib/polars/series.rb +5 -5
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{prelude::*, RArray};
|
1
|
+
use magnus::{prelude::*, RArray, RString};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
4
|
use crate::any_value::rb_object_to_any_value;
|
@@ -10,7 +10,7 @@ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
|
10
10
|
impl RbSeries {
|
11
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
12
12
|
let len = obj.len();
|
13
|
-
let mut builder = BooleanChunkedBuilder::new(
|
13
|
+
let mut builder = BooleanChunkedBuilder::new(name.into(), len);
|
14
14
|
|
15
15
|
unsafe {
|
16
16
|
for item in obj.as_slice().iter() {
|
@@ -43,7 +43,7 @@ where
|
|
43
43
|
T::Native: magnus::TryConvert,
|
44
44
|
{
|
45
45
|
let len = values.len();
|
46
|
-
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
|
+
let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
|
47
47
|
|
48
48
|
for res in values.into_iter() {
|
49
49
|
let value = res;
|
@@ -96,7 +96,7 @@ impl RbSeries {
|
|
96
96
|
// from anyvalues is fallible
|
97
97
|
let result = any_values_result.and_then(|avs| {
|
98
98
|
let avs = slice_extract_wrapped(&avs);
|
99
|
-
let s = Series::from_any_values(
|
99
|
+
let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
|
100
100
|
RbTypeError::new_err(format!(
|
101
101
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
102
|
))
|
@@ -122,44 +122,71 @@ impl RbSeries {
|
|
122
122
|
.into_iter()
|
123
123
|
.map(|v| rb_object_to_any_value(v, strict))
|
124
124
|
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
-
let s =
|
126
|
-
.
|
127
|
-
|
125
|
+
let s =
|
126
|
+
Series::from_any_values_and_dtype(name.into(), any_values.as_slice(), &dtype.0, strict)
|
127
|
+
.map_err(|e| {
|
128
|
+
RbTypeError::new_err(format!(
|
128
129
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
130
|
))
|
130
|
-
|
131
|
+
})?;
|
131
132
|
Ok(s.into())
|
132
133
|
}
|
133
134
|
|
134
|
-
pub fn new_str(name: String,
|
135
|
-
let
|
136
|
-
|
137
|
-
|
138
|
-
|
135
|
+
pub fn new_str(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
136
|
+
let len = values.len();
|
137
|
+
let mut builder = StringChunkedBuilder::new(name.into(), len);
|
138
|
+
|
139
|
+
for res in values.into_iter() {
|
140
|
+
let value = res;
|
141
|
+
if value.is_nil() {
|
142
|
+
builder.append_null()
|
143
|
+
} else {
|
144
|
+
let v = String::try_convert(value)?;
|
145
|
+
builder.append_value(v)
|
146
|
+
}
|
147
|
+
}
|
139
148
|
|
140
|
-
|
141
|
-
let
|
142
|
-
s.
|
143
|
-
RbSeries::new(s)
|
149
|
+
let ca = builder.finish();
|
150
|
+
let s = ca.into_series();
|
151
|
+
Ok(s.into())
|
144
152
|
}
|
145
153
|
|
146
|
-
pub fn
|
147
|
-
let
|
154
|
+
pub fn new_binary(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
155
|
+
let len = values.len();
|
156
|
+
let mut builder = BinaryChunkedBuilder::new(name.into(), len);
|
157
|
+
|
158
|
+
for res in values.into_iter() {
|
159
|
+
let value = res;
|
160
|
+
if value.is_nil() {
|
161
|
+
builder.append_null()
|
162
|
+
} else {
|
163
|
+
let v = RString::try_convert(value)?;
|
164
|
+
builder.append_value(unsafe { v.as_slice() })
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
let ca = builder.finish();
|
169
|
+
let s = ca.into_series();
|
148
170
|
Ok(s.into())
|
149
171
|
}
|
150
172
|
|
173
|
+
pub fn new_null(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
174
|
+
let len = values.len();
|
175
|
+
Ok(Series::new_null(name.into(), len).into())
|
176
|
+
}
|
177
|
+
|
151
178
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
152
179
|
let val = val
|
153
180
|
.into_iter()
|
154
181
|
.map(ObjectValue::from)
|
155
182
|
.collect::<Vec<ObjectValue>>();
|
156
|
-
let s = ObjectChunked::<ObjectValue>::new_from_vec(
|
183
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(name.into(), val).into_series();
|
157
184
|
Ok(s.into())
|
158
185
|
}
|
159
186
|
|
160
187
|
pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
161
188
|
let series_vec = to_series_collection(val)?;
|
162
|
-
Ok(Series::new(
|
189
|
+
Ok(Series::new(name.into(), &series_vec).into())
|
163
190
|
}
|
164
191
|
|
165
192
|
pub fn new_array(
|
@@ -171,7 +198,7 @@ impl RbSeries {
|
|
171
198
|
) -> RbResult<Self> {
|
172
199
|
let val = vec_wrap_any_value(val)?;
|
173
200
|
let val = vec_extract_wrapped(val);
|
174
|
-
let out = Series::new(
|
201
|
+
let out = Series::new(name.into(), &val);
|
175
202
|
match out.dtype() {
|
176
203
|
DataType::List(list_inner) => {
|
177
204
|
let out = out
|
@@ -199,7 +226,7 @@ impl RbSeries {
|
|
199
226
|
dtype: Wrap<DataType>,
|
200
227
|
) -> RbResult<Self> {
|
201
228
|
let av = val.0;
|
202
|
-
Ok(Series::new(
|
229
|
+
Ok(Series::new(name.into(), &[av])
|
203
230
|
.cast(&dtype.0)
|
204
231
|
.map_err(RbPolarsErr::from)?
|
205
232
|
.new_from_index(0, n)
|
@@ -161,11 +161,11 @@ impl RbSeries {
|
|
161
161
|
}
|
162
162
|
|
163
163
|
pub fn name(&self) -> String {
|
164
|
-
self.series.borrow().name().
|
164
|
+
self.series.borrow().name().to_string()
|
165
165
|
}
|
166
166
|
|
167
167
|
pub fn rename(&self, name: String) {
|
168
|
-
self.series.borrow_mut().rename(
|
168
|
+
self.series.borrow_mut().rename(name.into());
|
169
169
|
}
|
170
170
|
|
171
171
|
pub fn dtype(&self) -> Value {
|
@@ -257,7 +257,7 @@ impl RbSeries {
|
|
257
257
|
let out = self
|
258
258
|
.series
|
259
259
|
.borrow()
|
260
|
-
.value_counts(sort, parallel, name, normalize)
|
260
|
+
.value_counts(sort, parallel, name.into(), normalize)
|
261
261
|
.map_err(RbPolarsErr::from)?;
|
262
262
|
Ok(out.into())
|
263
263
|
}
|
@@ -395,7 +395,7 @@ impl RbSeries {
|
|
395
395
|
.0
|
396
396
|
});
|
397
397
|
avs.extend(iter);
|
398
|
-
return Ok(Series::new(
|
398
|
+
return Ok(Series::new(self.name().into(), &avs).into());
|
399
399
|
}
|
400
400
|
|
401
401
|
let out = match output_type {
|
data/lib/polars/data_frame.rb
CHANGED
@@ -2426,15 +2426,15 @@ module Polars
|
|
2426
2426
|
# df.map_rows { |t| t[0] * 2 + t[1] }
|
2427
2427
|
# # =>
|
2428
2428
|
# # shape: (3, 1)
|
2429
|
-
# #
|
2430
|
-
# # │
|
2431
|
-
# # │ ---
|
2432
|
-
# # │ i64
|
2433
|
-
# #
|
2434
|
-
# # │ 1
|
2435
|
-
# # │ 9
|
2436
|
-
# # │ 14
|
2437
|
-
# #
|
2429
|
+
# # ┌─────┐
|
2430
|
+
# # │ map │
|
2431
|
+
# # │ --- │
|
2432
|
+
# # │ i64 │
|
2433
|
+
# # ╞═════╡
|
2434
|
+
# # │ 1 │
|
2435
|
+
# # │ 9 │
|
2436
|
+
# # │ 14 │
|
2437
|
+
# # └─────┘
|
2438
2438
|
def map_rows(return_dtype: nil, inference_size: 256, &f)
|
2439
2439
|
out, is_df = _df.map_rows(f, return_dtype, inference_size)
|
2440
2440
|
if is_df
|
@@ -4234,7 +4234,7 @@ module Polars
|
|
4234
4234
|
if n.nil? && !frac.nil?
|
4235
4235
|
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4236
4236
|
|
4237
|
-
_from_rbdf(
|
4237
|
+
return _from_rbdf(
|
4238
4238
|
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4239
4239
|
)
|
4240
4240
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -1182,7 +1182,7 @@ module Polars
|
|
1182
1182
|
# "b" => [1, 1, 2, 2]
|
1183
1183
|
# }
|
1184
1184
|
# )
|
1185
|
-
# df.select(Polars.all.mode)
|
1185
|
+
# df.select(Polars.all.mode.first)
|
1186
1186
|
# # =>
|
1187
1187
|
# # shape: (2, 2)
|
1188
1188
|
# # ┌─────┬─────┐
|
@@ -6015,12 +6015,12 @@ module Polars
|
|
6015
6015
|
# # ┌──────┐
|
6016
6016
|
# # │ a │
|
6017
6017
|
# # │ --- │
|
6018
|
-
# # │
|
6018
|
+
# # │ f64 │
|
6019
6019
|
# # ╞══════╡
|
6020
|
-
# # │ -1
|
6021
|
-
# # │ 0
|
6022
|
-
# # │ 0
|
6023
|
-
# # │ 1
|
6020
|
+
# # │ -1.0 │
|
6021
|
+
# # │ -0.0 │
|
6022
|
+
# # │ 0.0 │
|
6023
|
+
# # │ 1.0 │
|
6024
6024
|
# # │ null │
|
6025
6025
|
# # └──────┘
|
6026
6026
|
def sign
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -189,10 +189,6 @@ module Polars
|
|
189
189
|
# Offset to start the row_count column (only use if the name is set).
|
190
190
|
# @param storage_options [Hash]
|
191
191
|
# Extra options that make sense for a particular storage connection.
|
192
|
-
# @param memory_map [Boolean]
|
193
|
-
# Try to memory map the file. This can greatly improve performance on repeated
|
194
|
-
# queries as the OS may cache pages.
|
195
|
-
# Only uncompressed IPC files can be memory mapped.
|
196
192
|
# @param hive_partitioning [Boolean]
|
197
193
|
# Infer statistics and schema from Hive partitioned URL and use them
|
198
194
|
# to prune reads. This is unset by default (i.e. `nil`), meaning it is
|
@@ -215,7 +211,6 @@ module Polars
|
|
215
211
|
row_count_name: nil,
|
216
212
|
row_count_offset: 0,
|
217
213
|
storage_options: nil,
|
218
|
-
memory_map: true,
|
219
214
|
hive_partitioning: nil,
|
220
215
|
hive_schema: nil,
|
221
216
|
try_parse_hive_dates: true,
|
@@ -229,7 +224,6 @@ module Polars
|
|
229
224
|
row_count_name: row_count_name,
|
230
225
|
row_count_offset: row_count_offset,
|
231
226
|
storage_options: storage_options,
|
232
|
-
memory_map: memory_map,
|
233
227
|
hive_partitioning: hive_partitioning,
|
234
228
|
hive_schema: hive_schema,
|
235
229
|
try_parse_hive_dates: try_parse_hive_dates,
|
@@ -246,7 +240,6 @@ module Polars
|
|
246
240
|
row_count_name: nil,
|
247
241
|
row_count_offset: 0,
|
248
242
|
storage_options: nil,
|
249
|
-
memory_map: true,
|
250
243
|
hive_partitioning: nil,
|
251
244
|
hive_schema: nil,
|
252
245
|
try_parse_hive_dates: true,
|
@@ -263,7 +256,6 @@ module Polars
|
|
263
256
|
cache,
|
264
257
|
rechunk,
|
265
258
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
266
|
-
memory_map,
|
267
259
|
hive_partitioning,
|
268
260
|
hive_schema,
|
269
261
|
try_parse_hive_dates,
|
data/lib/polars/series.rb
CHANGED
@@ -2606,12 +2606,12 @@ module Polars
|
|
2606
2606
|
# s.sign
|
2607
2607
|
# # =>
|
2608
2608
|
# # shape: (5,)
|
2609
|
-
# # Series: 'a' [
|
2609
|
+
# # Series: 'a' [f64]
|
2610
2610
|
# # [
|
2611
|
-
# # -1
|
2612
|
-
# # 0
|
2613
|
-
# # 0
|
2614
|
-
# # 1
|
2611
|
+
# # -1.0
|
2612
|
+
# # -0.0
|
2613
|
+
# # 0.0
|
2614
|
+
# # 1.0
|
2615
2615
|
# # null
|
2616
2616
|
# # ]
|
2617
2617
|
def sign
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
199
|
- !ruby/object:Gem::Version
|
200
200
|
version: '0'
|
201
201
|
requirements: []
|
202
|
-
rubygems_version: 3.5.
|
202
|
+
rubygems_version: 3.5.16
|
203
203
|
signing_key:
|
204
204
|
specification_version: 4
|
205
205
|
summary: Blazingly fast DataFrames for Ruby
|