polars-df 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Cargo.lock +177 -141
- data/ext/polars/Cargo.toml +5 -6
- data/ext/polars/src/batched_csv.rs +3 -3
- data/ext/polars/src/conversion/any_value.rs +10 -4
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +36 -20
- data/ext/polars/src/dataframe/construction.rs +4 -4
- data/ext/polars/src/dataframe/general.rs +6 -5
- data/ext/polars/src/dataframe/io.rs +6 -6
- data/ext/polars/src/expr/datetime.rs +11 -3
- data/ext/polars/src/expr/general.rs +1 -1
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/string.rs +8 -1
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/lazyframe/mod.rs +18 -16
- data/ext/polars/src/lib.rs +1 -1
- data/ext/polars/src/map/dataframe.rs +36 -8
- data/ext/polars/src/map/mod.rs +8 -8
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/construction.rs +50 -23
- data/ext/polars/src/series/mod.rs +4 -4
- data/lib/polars/data_frame.rb +10 -10
- data/lib/polars/expr.rb +6 -6
- data/lib/polars/io/ipc.rb +0 -8
- data/lib/polars/series.rb +5 -5
- data/lib/polars/version.rb +1 -1
- metadata +3 -3
@@ -1,4 +1,4 @@
|
|
1
|
-
use magnus::{prelude::*, RArray};
|
1
|
+
use magnus::{prelude::*, RArray, RString};
|
2
2
|
use polars_core::prelude::*;
|
3
3
|
|
4
4
|
use crate::any_value::rb_object_to_any_value;
|
@@ -10,7 +10,7 @@ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
|
10
10
|
impl RbSeries {
|
11
11
|
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
|
12
12
|
let len = obj.len();
|
13
|
-
let mut builder = BooleanChunkedBuilder::new(
|
13
|
+
let mut builder = BooleanChunkedBuilder::new(name.into(), len);
|
14
14
|
|
15
15
|
unsafe {
|
16
16
|
for item in obj.as_slice().iter() {
|
@@ -43,7 +43,7 @@ where
|
|
43
43
|
T::Native: magnus::TryConvert,
|
44
44
|
{
|
45
45
|
let len = values.len();
|
46
|
-
let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
|
46
|
+
let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
|
47
47
|
|
48
48
|
for res in values.into_iter() {
|
49
49
|
let value = res;
|
@@ -96,7 +96,7 @@ impl RbSeries {
|
|
96
96
|
// from anyvalues is fallible
|
97
97
|
let result = any_values_result.and_then(|avs| {
|
98
98
|
let avs = slice_extract_wrapped(&avs);
|
99
|
-
let s = Series::from_any_values(
|
99
|
+
let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
|
100
100
|
RbTypeError::new_err(format!(
|
101
101
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
102
102
|
))
|
@@ -122,44 +122,71 @@ impl RbSeries {
|
|
122
122
|
.into_iter()
|
123
123
|
.map(|v| rb_object_to_any_value(v, strict))
|
124
124
|
.collect::<RbResult<Vec<AnyValue>>>()?;
|
125
|
-
let s =
|
126
|
-
.
|
127
|
-
|
125
|
+
let s =
|
126
|
+
Series::from_any_values_and_dtype(name.into(), any_values.as_slice(), &dtype.0, strict)
|
127
|
+
.map_err(|e| {
|
128
|
+
RbTypeError::new_err(format!(
|
128
129
|
"{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
|
129
130
|
))
|
130
|
-
|
131
|
+
})?;
|
131
132
|
Ok(s.into())
|
132
133
|
}
|
133
134
|
|
134
|
-
pub fn new_str(name: String,
|
135
|
-
let
|
136
|
-
|
137
|
-
|
138
|
-
|
135
|
+
pub fn new_str(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
136
|
+
let len = values.len();
|
137
|
+
let mut builder = StringChunkedBuilder::new(name.into(), len);
|
138
|
+
|
139
|
+
for res in values.into_iter() {
|
140
|
+
let value = res;
|
141
|
+
if value.is_nil() {
|
142
|
+
builder.append_null()
|
143
|
+
} else {
|
144
|
+
let v = String::try_convert(value)?;
|
145
|
+
builder.append_value(v)
|
146
|
+
}
|
147
|
+
}
|
139
148
|
|
140
|
-
|
141
|
-
let
|
142
|
-
s.
|
143
|
-
RbSeries::new(s)
|
149
|
+
let ca = builder.finish();
|
150
|
+
let s = ca.into_series();
|
151
|
+
Ok(s.into())
|
144
152
|
}
|
145
153
|
|
146
|
-
pub fn
|
147
|
-
let
|
154
|
+
pub fn new_binary(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
155
|
+
let len = values.len();
|
156
|
+
let mut builder = BinaryChunkedBuilder::new(name.into(), len);
|
157
|
+
|
158
|
+
for res in values.into_iter() {
|
159
|
+
let value = res;
|
160
|
+
if value.is_nil() {
|
161
|
+
builder.append_null()
|
162
|
+
} else {
|
163
|
+
let v = RString::try_convert(value)?;
|
164
|
+
builder.append_value(unsafe { v.as_slice() })
|
165
|
+
}
|
166
|
+
}
|
167
|
+
|
168
|
+
let ca = builder.finish();
|
169
|
+
let s = ca.into_series();
|
148
170
|
Ok(s.into())
|
149
171
|
}
|
150
172
|
|
173
|
+
pub fn new_null(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
|
174
|
+
let len = values.len();
|
175
|
+
Ok(Series::new_null(name.into(), len).into())
|
176
|
+
}
|
177
|
+
|
151
178
|
pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
152
179
|
let val = val
|
153
180
|
.into_iter()
|
154
181
|
.map(ObjectValue::from)
|
155
182
|
.collect::<Vec<ObjectValue>>();
|
156
|
-
let s = ObjectChunked::<ObjectValue>::new_from_vec(
|
183
|
+
let s = ObjectChunked::<ObjectValue>::new_from_vec(name.into(), val).into_series();
|
157
184
|
Ok(s.into())
|
158
185
|
}
|
159
186
|
|
160
187
|
pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
161
188
|
let series_vec = to_series_collection(val)?;
|
162
|
-
Ok(Series::new(
|
189
|
+
Ok(Series::new(name.into(), &series_vec).into())
|
163
190
|
}
|
164
191
|
|
165
192
|
pub fn new_array(
|
@@ -171,7 +198,7 @@ impl RbSeries {
|
|
171
198
|
) -> RbResult<Self> {
|
172
199
|
let val = vec_wrap_any_value(val)?;
|
173
200
|
let val = vec_extract_wrapped(val);
|
174
|
-
let out = Series::new(
|
201
|
+
let out = Series::new(name.into(), &val);
|
175
202
|
match out.dtype() {
|
176
203
|
DataType::List(list_inner) => {
|
177
204
|
let out = out
|
@@ -199,7 +226,7 @@ impl RbSeries {
|
|
199
226
|
dtype: Wrap<DataType>,
|
200
227
|
) -> RbResult<Self> {
|
201
228
|
let av = val.0;
|
202
|
-
Ok(Series::new(
|
229
|
+
Ok(Series::new(name.into(), &[av])
|
203
230
|
.cast(&dtype.0)
|
204
231
|
.map_err(RbPolarsErr::from)?
|
205
232
|
.new_from_index(0, n)
|
@@ -161,11 +161,11 @@ impl RbSeries {
|
|
161
161
|
}
|
162
162
|
|
163
163
|
pub fn name(&self) -> String {
|
164
|
-
self.series.borrow().name().
|
164
|
+
self.series.borrow().name().to_string()
|
165
165
|
}
|
166
166
|
|
167
167
|
pub fn rename(&self, name: String) {
|
168
|
-
self.series.borrow_mut().rename(
|
168
|
+
self.series.borrow_mut().rename(name.into());
|
169
169
|
}
|
170
170
|
|
171
171
|
pub fn dtype(&self) -> Value {
|
@@ -257,7 +257,7 @@ impl RbSeries {
|
|
257
257
|
let out = self
|
258
258
|
.series
|
259
259
|
.borrow()
|
260
|
-
.value_counts(sort, parallel, name, normalize)
|
260
|
+
.value_counts(sort, parallel, name.into(), normalize)
|
261
261
|
.map_err(RbPolarsErr::from)?;
|
262
262
|
Ok(out.into())
|
263
263
|
}
|
@@ -395,7 +395,7 @@ impl RbSeries {
|
|
395
395
|
.0
|
396
396
|
});
|
397
397
|
avs.extend(iter);
|
398
|
-
return Ok(Series::new(
|
398
|
+
return Ok(Series::new(self.name().into(), &avs).into());
|
399
399
|
}
|
400
400
|
|
401
401
|
let out = match output_type {
|
data/lib/polars/data_frame.rb
CHANGED
@@ -2426,15 +2426,15 @@ module Polars
|
|
2426
2426
|
# df.map_rows { |t| t[0] * 2 + t[1] }
|
2427
2427
|
# # =>
|
2428
2428
|
# # shape: (3, 1)
|
2429
|
-
# #
|
2430
|
-
# # │
|
2431
|
-
# # │ ---
|
2432
|
-
# # │ i64
|
2433
|
-
# #
|
2434
|
-
# # │ 1
|
2435
|
-
# # │ 9
|
2436
|
-
# # │ 14
|
2437
|
-
# #
|
2429
|
+
# # ┌─────┐
|
2430
|
+
# # │ map │
|
2431
|
+
# # │ --- │
|
2432
|
+
# # │ i64 │
|
2433
|
+
# # ╞═════╡
|
2434
|
+
# # │ 1 │
|
2435
|
+
# # │ 9 │
|
2436
|
+
# # │ 14 │
|
2437
|
+
# # └─────┘
|
2438
2438
|
def map_rows(return_dtype: nil, inference_size: 256, &f)
|
2439
2439
|
out, is_df = _df.map_rows(f, return_dtype, inference_size)
|
2440
2440
|
if is_df
|
@@ -4234,7 +4234,7 @@ module Polars
|
|
4234
4234
|
if n.nil? && !frac.nil?
|
4235
4235
|
frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
|
4236
4236
|
|
4237
|
-
_from_rbdf(
|
4237
|
+
return _from_rbdf(
|
4238
4238
|
_df.sample_frac(frac._s, with_replacement, shuffle, seed)
|
4239
4239
|
)
|
4240
4240
|
end
|
data/lib/polars/expr.rb
CHANGED
@@ -1182,7 +1182,7 @@ module Polars
|
|
1182
1182
|
# "b" => [1, 1, 2, 2]
|
1183
1183
|
# }
|
1184
1184
|
# )
|
1185
|
-
# df.select(Polars.all.mode)
|
1185
|
+
# df.select(Polars.all.mode.first)
|
1186
1186
|
# # =>
|
1187
1187
|
# # shape: (2, 2)
|
1188
1188
|
# # ┌─────┬─────┐
|
@@ -6015,12 +6015,12 @@ module Polars
|
|
6015
6015
|
# # ┌──────┐
|
6016
6016
|
# # │ a │
|
6017
6017
|
# # │ --- │
|
6018
|
-
# # │
|
6018
|
+
# # │ f64 │
|
6019
6019
|
# # ╞══════╡
|
6020
|
-
# # │ -1
|
6021
|
-
# # │ 0
|
6022
|
-
# # │ 0
|
6023
|
-
# # │ 1
|
6020
|
+
# # │ -1.0 │
|
6021
|
+
# # │ -0.0 │
|
6022
|
+
# # │ 0.0 │
|
6023
|
+
# # │ 1.0 │
|
6024
6024
|
# # │ null │
|
6025
6025
|
# # └──────┘
|
6026
6026
|
def sign
|
data/lib/polars/io/ipc.rb
CHANGED
@@ -189,10 +189,6 @@ module Polars
|
|
189
189
|
# Offset to start the row_count column (only use if the name is set).
|
190
190
|
# @param storage_options [Hash]
|
191
191
|
# Extra options that make sense for a particular storage connection.
|
192
|
-
# @param memory_map [Boolean]
|
193
|
-
# Try to memory map the file. This can greatly improve performance on repeated
|
194
|
-
# queries as the OS may cache pages.
|
195
|
-
# Only uncompressed IPC files can be memory mapped.
|
196
192
|
# @param hive_partitioning [Boolean]
|
197
193
|
# Infer statistics and schema from Hive partitioned URL and use them
|
198
194
|
# to prune reads. This is unset by default (i.e. `nil`), meaning it is
|
@@ -215,7 +211,6 @@ module Polars
|
|
215
211
|
row_count_name: nil,
|
216
212
|
row_count_offset: 0,
|
217
213
|
storage_options: nil,
|
218
|
-
memory_map: true,
|
219
214
|
hive_partitioning: nil,
|
220
215
|
hive_schema: nil,
|
221
216
|
try_parse_hive_dates: true,
|
@@ -229,7 +224,6 @@ module Polars
|
|
229
224
|
row_count_name: row_count_name,
|
230
225
|
row_count_offset: row_count_offset,
|
231
226
|
storage_options: storage_options,
|
232
|
-
memory_map: memory_map,
|
233
227
|
hive_partitioning: hive_partitioning,
|
234
228
|
hive_schema: hive_schema,
|
235
229
|
try_parse_hive_dates: try_parse_hive_dates,
|
@@ -246,7 +240,6 @@ module Polars
|
|
246
240
|
row_count_name: nil,
|
247
241
|
row_count_offset: 0,
|
248
242
|
storage_options: nil,
|
249
|
-
memory_map: true,
|
250
243
|
hive_partitioning: nil,
|
251
244
|
hive_schema: nil,
|
252
245
|
try_parse_hive_dates: true,
|
@@ -263,7 +256,6 @@ module Polars
|
|
263
256
|
cache,
|
264
257
|
rechunk,
|
265
258
|
Utils.parse_row_index_args(row_count_name, row_count_offset),
|
266
|
-
memory_map,
|
267
259
|
hive_partitioning,
|
268
260
|
hive_schema,
|
269
261
|
try_parse_hive_dates,
|
data/lib/polars/series.rb
CHANGED
@@ -2606,12 +2606,12 @@ module Polars
|
|
2606
2606
|
# s.sign
|
2607
2607
|
# # =>
|
2608
2608
|
# # shape: (5,)
|
2609
|
-
# # Series: 'a' [
|
2609
|
+
# # Series: 'a' [f64]
|
2610
2610
|
# # [
|
2611
|
-
# # -1
|
2612
|
-
# # 0
|
2613
|
-
# # 0
|
2614
|
-
# # 1
|
2611
|
+
# # -1.0
|
2612
|
+
# # -0.0
|
2613
|
+
# # 0.0
|
2614
|
+
# # 1.0
|
2615
2615
|
# # null
|
2616
2616
|
# # ]
|
2617
2617
|
def sign
|
data/lib/polars/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polars-df
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-09-
|
11
|
+
date: 2024-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bigdecimal
|
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
199
199
|
- !ruby/object:Gem::Version
|
200
200
|
version: '0'
|
201
201
|
requirements: []
|
202
|
-
rubygems_version: 3.5.
|
202
|
+
rubygems_version: 3.5.16
|
203
203
|
signing_key:
|
204
204
|
specification_version: 4
|
205
205
|
summary: Blazingly fast DataFrames for Ruby
|