polars-df 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- use magnus::{prelude::*, RArray};
1
+ use magnus::{prelude::*, RArray, RString};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::any_value::rb_object_to_any_value;
@@ -10,7 +10,7 @@ use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
10
10
  impl RbSeries {
11
11
  pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
12
12
  let len = obj.len();
13
- let mut builder = BooleanChunkedBuilder::new(&name, len);
13
+ let mut builder = BooleanChunkedBuilder::new(name.into(), len);
14
14
 
15
15
  unsafe {
16
16
  for item in obj.as_slice().iter() {
@@ -43,7 +43,7 @@ where
43
43
  T::Native: magnus::TryConvert,
44
44
  {
45
45
  let len = values.len();
46
- let mut builder = PrimitiveChunkedBuilder::<T>::new(name, len);
46
+ let mut builder = PrimitiveChunkedBuilder::<T>::new(name.into(), len);
47
47
 
48
48
  for res in values.into_iter() {
49
49
  let value = res;
@@ -96,7 +96,7 @@ impl RbSeries {
96
96
  // from anyvalues is fallible
97
97
  let result = any_values_result.and_then(|avs| {
98
98
  let avs = slice_extract_wrapped(&avs);
99
- let s = Series::from_any_values(&name, avs, strict).map_err(|e| {
99
+ let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
100
100
  RbTypeError::new_err(format!(
101
101
  "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
102
102
  ))
@@ -122,44 +122,71 @@ impl RbSeries {
122
122
  .into_iter()
123
123
  .map(|v| rb_object_to_any_value(v, strict))
124
124
  .collect::<RbResult<Vec<AnyValue>>>()?;
125
- let s = Series::from_any_values_and_dtype(&name, any_values.as_slice(), &dtype.0, strict)
126
- .map_err(|e| {
127
- RbTypeError::new_err(format!(
125
+ let s =
126
+ Series::from_any_values_and_dtype(name.into(), any_values.as_slice(), &dtype.0, strict)
127
+ .map_err(|e| {
128
+ RbTypeError::new_err(format!(
128
129
  "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
129
130
  ))
130
- })?;
131
+ })?;
131
132
  Ok(s.into())
132
133
  }
133
134
 
134
- pub fn new_str(name: String, val: Wrap<StringChunked>, _strict: bool) -> Self {
135
- let mut s = val.0.into_series();
136
- s.rename(&name);
137
- RbSeries::new(s)
138
- }
135
+ pub fn new_str(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
136
+ let len = values.len();
137
+ let mut builder = StringChunkedBuilder::new(name.into(), len);
138
+
139
+ for res in values.into_iter() {
140
+ let value = res;
141
+ if value.is_nil() {
142
+ builder.append_null()
143
+ } else {
144
+ let v = String::try_convert(value)?;
145
+ builder.append_value(v)
146
+ }
147
+ }
139
148
 
140
- pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
141
- let mut s = val.0.into_series();
142
- s.rename(&name);
143
- RbSeries::new(s)
149
+ let ca = builder.finish();
150
+ let s = ca.into_series();
151
+ Ok(s.into())
144
152
  }
145
153
 
146
- pub fn new_null(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
147
- let s = Series::new_null(&name, val.len());
154
+ pub fn new_binary(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
155
+ let len = values.len();
156
+ let mut builder = BinaryChunkedBuilder::new(name.into(), len);
157
+
158
+ for res in values.into_iter() {
159
+ let value = res;
160
+ if value.is_nil() {
161
+ builder.append_null()
162
+ } else {
163
+ let v = RString::try_convert(value)?;
164
+ builder.append_value(unsafe { v.as_slice() })
165
+ }
166
+ }
167
+
168
+ let ca = builder.finish();
169
+ let s = ca.into_series();
148
170
  Ok(s.into())
149
171
  }
150
172
 
173
+ pub fn new_null(name: String, values: RArray, _strict: bool) -> RbResult<Self> {
174
+ let len = values.len();
175
+ Ok(Series::new_null(name.into(), len).into())
176
+ }
177
+
151
178
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
152
179
  let val = val
153
180
  .into_iter()
154
181
  .map(ObjectValue::from)
155
182
  .collect::<Vec<ObjectValue>>();
156
- let s = ObjectChunked::<ObjectValue>::new_from_vec(&name, val).into_series();
183
+ let s = ObjectChunked::<ObjectValue>::new_from_vec(name.into(), val).into_series();
157
184
  Ok(s.into())
158
185
  }
159
186
 
160
187
  pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
161
188
  let series_vec = to_series_collection(val)?;
162
- Ok(Series::new(&name, &series_vec).into())
189
+ Ok(Series::new(name.into(), &series_vec).into())
163
190
  }
164
191
 
165
192
  pub fn new_array(
@@ -171,7 +198,7 @@ impl RbSeries {
171
198
  ) -> RbResult<Self> {
172
199
  let val = vec_wrap_any_value(val)?;
173
200
  let val = vec_extract_wrapped(val);
174
- let out = Series::new(&name, &val);
201
+ let out = Series::new(name.into(), &val);
175
202
  match out.dtype() {
176
203
  DataType::List(list_inner) => {
177
204
  let out = out
@@ -199,7 +226,7 @@ impl RbSeries {
199
226
  dtype: Wrap<DataType>,
200
227
  ) -> RbResult<Self> {
201
228
  let av = val.0;
202
- Ok(Series::new(&name, &[av])
229
+ Ok(Series::new(name.into(), &[av])
203
230
  .cast(&dtype.0)
204
231
  .map_err(RbPolarsErr::from)?
205
232
  .new_from_index(0, n)
@@ -161,11 +161,11 @@ impl RbSeries {
161
161
  }
162
162
 
163
163
  pub fn name(&self) -> String {
164
- self.series.borrow().name().into()
164
+ self.series.borrow().name().to_string()
165
165
  }
166
166
 
167
167
  pub fn rename(&self, name: String) {
168
- self.series.borrow_mut().rename(&name);
168
+ self.series.borrow_mut().rename(name.into());
169
169
  }
170
170
 
171
171
  pub fn dtype(&self) -> Value {
@@ -257,7 +257,7 @@ impl RbSeries {
257
257
  let out = self
258
258
  .series
259
259
  .borrow()
260
- .value_counts(sort, parallel, name, normalize)
260
+ .value_counts(sort, parallel, name.into(), normalize)
261
261
  .map_err(RbPolarsErr::from)?;
262
262
  Ok(out.into())
263
263
  }
@@ -395,7 +395,7 @@ impl RbSeries {
395
395
  .0
396
396
  });
397
397
  avs.extend(iter);
398
- return Ok(Series::new(&self.name(), &avs).into());
398
+ return Ok(Series::new(self.name().into(), &avs).into());
399
399
  }
400
400
 
401
401
  let out = match output_type {
@@ -2426,15 +2426,15 @@ module Polars
2426
2426
  # df.map_rows { |t| t[0] * 2 + t[1] }
2427
2427
  # # =>
2428
2428
  # # shape: (3, 1)
2429
- # # ┌───────┐
2430
- # # │ apply
2431
- # # │ ---
2432
- # # │ i64
2433
- # # ╞═══════╡
2434
- # # │ 1
2435
- # # │ 9
2436
- # # │ 14
2437
- # # └───────┘
2429
+ # # ┌─────┐
2430
+ # # │ map
2431
+ # # │ ---
2432
+ # # │ i64
2433
+ # # ╞═════╡
2434
+ # # │ 1
2435
+ # # │ 9
2436
+ # # │ 14
2437
+ # # └─────┘
2438
2438
  def map_rows(return_dtype: nil, inference_size: 256, &f)
2439
2439
  out, is_df = _df.map_rows(f, return_dtype, inference_size)
2440
2440
  if is_df
@@ -4234,7 +4234,7 @@ module Polars
4234
4234
  if n.nil? && !frac.nil?
4235
4235
  frac = Series.new("frac", [frac]) unless frac.is_a?(Series)
4236
4236
 
4237
- _from_rbdf(
4237
+ return _from_rbdf(
4238
4238
  _df.sample_frac(frac._s, with_replacement, shuffle, seed)
4239
4239
  )
4240
4240
  end
data/lib/polars/expr.rb CHANGED
@@ -1182,7 +1182,7 @@ module Polars
1182
1182
  # "b" => [1, 1, 2, 2]
1183
1183
  # }
1184
1184
  # )
1185
- # df.select(Polars.all.mode)
1185
+ # df.select(Polars.all.mode.first)
1186
1186
  # # =>
1187
1187
  # # shape: (2, 2)
1188
1188
  # # ┌─────┬─────┐
@@ -6015,12 +6015,12 @@ module Polars
6015
6015
  # # ┌──────┐
6016
6016
  # # │ a │
6017
6017
  # # │ --- │
6018
- # # │ i64
6018
+ # # │ f64
6019
6019
  # # ╞══════╡
6020
- # # │ -1
6021
- # # │ 0
6022
- # # │ 0
6023
- # # │ 1
6020
+ # # │ -1.0
6021
+ # # │ -0.0
6022
+ # # │ 0.0
6023
+ # # │ 1.0
6024
6024
  # # │ null │
6025
6025
  # # └──────┘
6026
6026
  def sign
data/lib/polars/io/ipc.rb CHANGED
@@ -189,10 +189,6 @@ module Polars
189
189
  # Offset to start the row_count column (only use if the name is set).
190
190
  # @param storage_options [Hash]
191
191
  # Extra options that make sense for a particular storage connection.
192
- # @param memory_map [Boolean]
193
- # Try to memory map the file. This can greatly improve performance on repeated
194
- # queries as the OS may cache pages.
195
- # Only uncompressed IPC files can be memory mapped.
196
192
  # @param hive_partitioning [Boolean]
197
193
  # Infer statistics and schema from Hive partitioned URL and use them
198
194
  # to prune reads. This is unset by default (i.e. `nil`), meaning it is
@@ -215,7 +211,6 @@ module Polars
215
211
  row_count_name: nil,
216
212
  row_count_offset: 0,
217
213
  storage_options: nil,
218
- memory_map: true,
219
214
  hive_partitioning: nil,
220
215
  hive_schema: nil,
221
216
  try_parse_hive_dates: true,
@@ -229,7 +224,6 @@ module Polars
229
224
  row_count_name: row_count_name,
230
225
  row_count_offset: row_count_offset,
231
226
  storage_options: storage_options,
232
- memory_map: memory_map,
233
227
  hive_partitioning: hive_partitioning,
234
228
  hive_schema: hive_schema,
235
229
  try_parse_hive_dates: try_parse_hive_dates,
@@ -246,7 +240,6 @@ module Polars
246
240
  row_count_name: nil,
247
241
  row_count_offset: 0,
248
242
  storage_options: nil,
249
- memory_map: true,
250
243
  hive_partitioning: nil,
251
244
  hive_schema: nil,
252
245
  try_parse_hive_dates: true,
@@ -263,7 +256,6 @@ module Polars
263
256
  cache,
264
257
  rechunk,
265
258
  Utils.parse_row_index_args(row_count_name, row_count_offset),
266
- memory_map,
267
259
  hive_partitioning,
268
260
  hive_schema,
269
261
  try_parse_hive_dates,
data/lib/polars/series.rb CHANGED
@@ -2606,12 +2606,12 @@ module Polars
2606
2606
  # s.sign
2607
2607
  # # =>
2608
2608
  # # shape: (5,)
2609
- # # Series: 'a' [i64]
2609
+ # # Series: 'a' [f64]
2610
2610
  # # [
2611
- # # -1
2612
- # # 0
2613
- # # 0
2614
- # # 1
2611
+ # # -1.0
2612
+ # # -0.0
2613
+ # # 0.0
2614
+ # # 1.0
2615
2615
  # # null
2616
2616
  # # ]
2617
2617
  def sign
@@ -1,4 +1,4 @@
1
1
  module Polars
2
2
  # @private
3
- VERSION = "0.13.0"
3
+ VERSION = "0.14.0"
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: polars-df
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-05 00:00:00.000000000 Z
11
+ date: 2024-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bigdecimal
@@ -199,7 +199,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
199
199
  - !ruby/object:Gem::Version
200
200
  version: '0'
201
201
  requirements: []
202
- rubygems_version: 3.5.11
202
+ rubygems_version: 3.5.16
203
203
  signing_key:
204
204
  specification_version: 4
205
205
  summary: Blazingly fast DataFrames for Ruby