polars-df 0.13.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
- use magnus::{prelude::*, RArray};
1
+ use magnus::{prelude::*, RArray, RString};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::any_value::rb_object_to_any_value;
@@ -10,7 +10,7 @@ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
10
10
  impl RbSeries {
11
11
  pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
12
12
  let len = obj.len();
13
- let mut builder = BooleanChunkedBuilder::new(&name, len);
13
+ let mut builder = BooleanChunkedBuilder::new(name.into(), len);
14
14
 
15
15
  unsafe {
16
16
  for item in obj.as_slice().iter() {
@@ -43,7 +43,7 @@ where
43
43
  T::Native: magnus::TryConvert,
44
44
  {
45
45
  let len = values.len();
46
- let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
46
+ let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
47
47
 
48
48
  for res in values.into_iter() {
49
49
  let value = res;
@@ -96,7 +96,7 @@ impl RbSeries {
96
96
  // from anyvalues is fallible
97
97
  let result = any_values_result.and_then(|avs| {
98
98
  let avs = slice_extract_wrapped(&avs);
99
- let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
99
+ let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
100
100
  RbTypeError::new_err(format!(
101
101
  "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
102
102
  ))
@@ -122,44 +122,71 @@ impl RbSeries {
122
122
  .into_iter()
123
123
  .map(|v| rb_object_to_any_value(v, strict))
124
124
  .collect::<RbResult<Vec<AnyValue>>>()?;
125
- let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
126
- .map_err(|e| {
127
- RbTypeError::new_err(format!(
125
+ let s =
126
+ Series::from_any_values_and_dtype(name.into(), any_values.as_slice(), &dtype.0, strict)
127
+ .map_err(|e| {
128
+ RbTypeError::new_err(format!(
128
129
  "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
129
130
  ))
130
- })?;
131
+ })?;
131
132
  Ok(s.into())
132
133
  }
133
134
 
134
- pub fn new_str(name: String, val: Wrap<StringChunked>, _strict: bool) -> Self {
135
- let mut s = val.0.into_series();
136
- s.rename(&name);
137
- RbSeries::new(s)
138
- }
135
+ pub fn new_str(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
136
+ let len = values.len();
137
+ let mut builder = StringChunkedBuilder::new(name.into(), len);
138
+
139
+ for res in values.into_iter() {
140
+ let value = res;
141
+ if value.is_nil() {
142
+ builder.append_null()
143
+ } else {
144
+ let v = String::try_convert(value)?;
145
+ builder.append_value(v)
146
+ }
147
+ }
139
148
 
140
- pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
141
- let mut s = val.0.into_series();
142
- s.rename(&name);
143
- RbSeries::new(s)
149
+ let ca = builder.finish();
150
+ let s = ca.into_series();
151
+ Ok(s.into())
144
152
  }
145
153
 
146
- pub fn new_null(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
147
- let s = Series::new_null(&name, val.len());
154
+ pub fn new_binary(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
155
+ let len = values.len();
156
+ let mut builder = BinaryChunkedBuilder::new(name.into(), len);
157
+
158
+ for res in values.into_iter() {
159
+ let value = res;
160
+ if value.is_nil() {
161
+ builder.append_null()
162
+ } else {
163
+ let v = RString::try_convert(value)?;
164
+ builder.append_value(unsafe { v.as_slice() })
165
+ }
166
+ }
167
+
168
+ let ca = builder.finish();
169
+ let s = ca.into_series();
148
170
  Ok(s.into())
149
171
  }
150
172
 
173
+ pub fn new_null(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
174
+ let len = values.len();
175
+ Ok(Series::new_null(name.into(), len).into())
176
+ }
177
+
151
178
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
152
179
  let val = val
153
180
  .into_iter()
154
181
  .map(ObjectValue::from)
155
182
  .collect::<Vec<ObjectValue>>();
156
- let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
183
+ let s = ObjectChunked::<ObjectValue>::new_from_vec(name.into(), val).into_series();
157
184
  Ok(s.into())
158
185
  }
159
186
 
160
187
  pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
161
188
  let series_vec = to_series_collection(val)?;
162
- Ok(Series::new(&name, &series_vec).into())
189
+ Ok(Series::new(name.into(), &series_vec).into())
163
190
  }
164
191
 
165
192
  pub fn new_array(
@@ -171,7 +198,7 @@ impl RbSeries {
171
198
  ) -> RbResult<Self> {
172
199
  let val = vec_wrap_any_value(val)?;
173
200
  let val = vec_extract_wrapped(val);
174
- let out = Series::new(&name, &val);
201
+ let out = Series::new(name.into(), &val);
175
202
  match out.dtype() {
176
203
  DataType::List(list_inner) => {
177
204
  let out = out
@@ -199,7 +226,7 @@ impl RbSeries {
199
226
  dtype: Wrap<DataType>,
200
227
  ) -> RbResult<Self> {
201
228
  let av = val.0;
202
- Ok(Series::new(&name, &[av])
229
+ Ok(Series::new(name.into(), &[av])
203
230
  .cast(&dtype.0)
204
231
  .map_err(RbPolarsErr::from)?
205
232
  .new_from_index(0, n)
@@ -161,11 +161,11 @@ impl RbSeries {
161
161
  }
162
162
 
163
163
  pub fn name(&self) -> String {
164
- self.series.borrow().name().into()
164
+ self.series.borrow().name().to_string()
165
165
  }
166
166
 
167
167
  pub fn rename(&self, name: String) {
168
- self.series.borrow_mut().rename(&name);
168
+ self.series.borrow_mut().rename(name.into());
169
169
  }
170
170
 
171
171
  pub fn dtype(&self) -> Value {
@@ -257,7 +257,7 @@ impl RbSeries {
257
257
  let out = self
258
258
  .series
259
259
  .borrow()
260
- .value_counts(sort, parallel, name, normalize)
260
+ .value_counts(sort, parallel, name.into(), normalize)
261
261
  .map_err(RbPolarsErr::from)?;
262
262
  Ok(out.into())
263
263
  }
@@ -395,7 +395,7 @@ impl RbSeries {
395
395
  .0
396
396
  });
397
397
  avs.extend(iter);
398
- return Ok(Series::new(&self.name(), &avs).into());
398
+ return Ok(Series::new(self.name().into(), &avs).into());
399
399
  }
400
400
 
401
401
  let out = match output_type {
@@ -2426,15 +2426,15 @@ module Polars
2426
2426
  # df.map_rows { |t| t[0] * 2 + t[1] }
2427
2427
  # # =>
2428
2428
  # # shape: (3, 1)
2429
- # # ┌───────┐
2430
- # # │ apply
2431
- # # │ ---
2432
- # # │ i64
2433
- # # ╞═══════╡
2434
- # # │ 1
2435
- # # │ 9
2436
- # # │ 14
2437
- # # └───────┘
2429
+ # # ┌─────┐
2430
+ # # │ map
2431
+ # # │ ---
2432
+ # # │ i64
2433
+ # # ╞═════╡
2434
+ # # │ 1
2435
+ # # │ 9
2436
+ # # │ 14
2437
+ # # └─────┘
2438
2438
  def map_rows(return_dtype: nil, inference_size: 256, &f)
2439
2439
  out, is_df = _df.map_rows(f, return_dtype, inference_size)
2440
2440
  if is_df
@@ -4234,7 +4234,7 @@ module Polars
4234
4234
  if n.nil? && !frac.nil?
4235
4235
  frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4236
4236
 
4237
- _from_rbdf(
4237
+ return _from_rbdf(
4238
4238
  _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4239
4239
  )
4240
4240
  end
data/lib/polars/expr.rb CHANGED
@@ -1182,7 +1182,7 @@ module Polars
1182
1182
  # "b" => [1, 1, 2, 2]
1183
1183
  # }
1184
1184
  # )
1185
- # df.select(Polars.all.mode)
1185
+ # df.select(Polars.all.mode.first)
1186
1186
  # # =>
1187
1187
  # # shape: (2, 2)
1188
1188
  # # ┌─────┬─────┐
@@ -6015,12 +6015,12 @@ module Polars
6015
6015
  # # ┌──────┐
6016
6016
  # # │ a │
6017
6017
  # # │ --- │
6018
- # # │ i64
6018
+ # # │ f64
6019
6019
  # # ╞══════╡
6020
- # # │ -1
6021
- # # │ 0
6022
- # # │ 0
6023
- # # │ 1
6020
+ # # │ -1.0
6021
+ # # │ -0.0
6022
+ # # │ 0.0
6023
+ # # │ 1.0
6024
6024
  # # │ null │
6025
6025
  # # └──────┘
6026
6026
  def sign
data/lib/polars/io/ipc.rb CHANGED
@@ -189,10 +189,6 @@ module Polars
189
189
  # Offset to start the row_count column (only use if the name is set).
190
190
  # @param storage_options [Hash]
191
191
  # Extra options that make sense for a particular storage connection.
192
- # @param memory_map [Boolean]
193
- # Try to memory map the file. This can greatly improve performance on repeated
194
- # queries as the OS may cache pages.
195
- # Only uncompressed IPC files can be memory mapped.
196
192
  # @param hive_partitioning [Boolean]
197
193
  # Infer statistics and schema from Hive partitioned URL and use them
198
194
  # to prune reads. This is unset by default (i.e. `nil`), meaning it is
@@ -215,7 +211,6 @@ module Polars
215
211
  row_count_name: nil,
216
212
  row_count_offset: 0,
217
213
  storage_options: nil,
218
- memory_map: true,
219
214
  hive_partitioning: nil,
220
215
  hive_schema: nil,
221
216
  try_parse_hive_dates: true,
@@ -229,7 +224,6 @@ module Polars
229
224
  row_count_name: row_count_name,
230
225
  row_count_offset: row_count_offset,
231
226
  storage_options: storage_options,
232
- memory_map: memory_map,
233
227
  hive_partitioning: hive_partitioning,
234
228
  hive_schema: hive_schema,
235
229
  try_parse_hive_dates: try_parse_hive_dates,
@@ -246,7 +240,6 @@ module Polars
246
240
  row_count_name: nil,
247
241
  row_count_offset: 0,
248
242
  storage_options: nil,
249
- memory_map: true,
250
243
  hive_partitioning: nil,
251
244
  hive_schema: nil,
252
245
  try_parse_hive_dates: true,
@@ -263,7 +256,6 @@ module Polars
263
256
  cache,
264
257
  rechunk,
265
258
  Utils.parse_row_index_args(row_count_name, row_count_offset),
266
- memory_map,
267
259
  hive_partitioning,
268
260
  hive_schema,
269
261
  try_parse_hive_dates,
data/lib/polars/series.rb CHANGED
@@ -2606,12 +2606,12 @@ module Polars
2606
2606
  # s.sign
2607
2607
  # # =>
2608
2608
  # # shape: (5,)
2609
- # # Series: 'a' [i64]
2609
+ # # Series: 'a' [f64]
2610
2610
  # # [
2611
- # # -1
2612
- # # 0
2613
- # # 0
2614
- # # 1
2611
+ # # -1.0
2612
+ # # -0.0
2613
+ # # 0.0
2614
+ # # 1.0
2615
2615
  # # null
2616
2616
  # # ]
2617
2617
  def sign
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.13.0"
3
+ VERSION = "0.14.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-05 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0'
201
201
  requirements: []
202
- rubygems_version: 3.5.11
202
+ rubygems_version: 3.5.16
203
203
  signing_key:
204
204
  specification_version: 4
205
205
  summary: Blazingly fast DataFrames for Ruby