polars-df 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
- Copyright (c) 2022-2024 Andrew Kane
2
+ Copyright (c) 2022-2025 Andrew Kane
3
3
  Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
data/README.md CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
14
14
 
15
15
  ## Getting Started
16
16
 
17
- This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
17
+ This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
18
18
 
19
19
  ```ruby
20
20
  Polars.scan_csv("iris.csv")
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
24
24
  .collect
25
25
  ```
26
26
 
27
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
27
+ You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
28
 
29
29
  ## Reference
30
30
 
@@ -88,6 +88,15 @@ From Avro
88
88
  Polars.read_avro("file.avro")
89
89
  ```
90
90
 
91
+ From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
92
+
93
+ ```ruby
94
+ Polars.read_delta("./table")
95
+
96
+ # or lazily with
97
+ Polars.scan_delta("./table")
98
+ ```
99
+
91
100
  From a hash
92
101
 
93
102
  ```ruby
@@ -336,6 +345,32 @@ Parquet
336
345
  df.write_parquet("file.parquet")
337
346
  ```
338
347
 
348
+ JSON
349
+
350
+ ```ruby
351
+ df.write_json("file.json")
352
+ # or
353
+ df.write_ndjson("file.ndjson")
354
+ ```
355
+
356
+ Feather / Arrow IPC
357
+
358
+ ```ruby
359
+ df.write_ipc("file.arrow")
360
+ ```
361
+
362
+ Avro
363
+
364
+ ```ruby
365
+ df.write_avro("file.avro")
366
+ ```
367
+
368
+ Delta Lake [experimental]
369
+
370
+ ```ruby
371
+ df.write_delta("./table")
372
+ ```
373
+
339
374
  Numo array
340
375
 
341
376
  ```ruby
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.15.0"
3
+ version = "0.17.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,20 +12,21 @@ crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
14
  ahash = "0.8"
15
- arrow = { package = "polars-arrow", version = "=0.44.2" }
15
+ arrow = { package = "polars-arrow", version = "=0.46.0" }
16
16
  bytes = "1"
17
17
  chrono = "0.4"
18
18
  either = "1.8"
19
19
  magnus = "0.7"
20
- polars-core = "=0.44.2"
21
- polars-plan = "=0.44.2"
22
- polars-parquet = "=0.44.2"
23
- polars-utils = "=0.44.2"
20
+ polars-core = "=0.46.0"
21
+ polars-plan = "=0.46.0"
22
+ polars-parquet = "=0.46.0"
23
+ polars-utils = "=0.46.0"
24
+ rayon = "1.9"
24
25
  regex = "1"
25
26
  serde_json = "1"
26
27
 
27
28
  [dependencies.polars]
28
- version = "=0.44.2"
29
+ version = "=0.46.0"
29
30
  features = [
30
31
  "abs",
31
32
  "approx_unique",
@@ -33,6 +33,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
33
33
  AnyValue::Int16(v) => ruby.into_value(v),
34
34
  AnyValue::Int32(v) => ruby.into_value(v),
35
35
  AnyValue::Int64(v) => ruby.into_value(v),
36
+ AnyValue::Int128(_v) => todo!(),
36
37
  AnyValue::Float32(v) => ruby.into_value(v),
37
38
  AnyValue::Float64(v) => ruby.into_value(v),
38
39
  AnyValue::Null => ruby.qnil().as_value(),
@@ -15,7 +15,6 @@ use polars::chunked_array::object::PolarsObjectSafe;
15
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
16
16
  use polars::datatypes::AnyValue;
17
17
  use polars::frame::row::Row;
18
- use polars::frame::NullStrategy;
19
18
  use polars::io::avro::AvroCompression;
20
19
  use polars::io::cloud::CloudOptions;
21
20
  use polars::prelude::*;
@@ -23,6 +22,7 @@ use polars::series::ops::NullBehavior;
23
22
  use polars_core::utils::arrow::array::Array;
24
23
  use polars_core::utils::materialize_dyn_int;
25
24
  use polars_plan::plans::ScanSources;
25
+ use polars_utils::mmap::MemSlice;
26
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
27
27
 
28
28
  use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
@@ -146,6 +146,10 @@ impl IntoValue for Wrap<DataType> {
146
146
  let class = pl.const_get::<_, Value>("Int64").unwrap();
147
147
  class.funcall("new", ()).unwrap()
148
148
  }
149
+ DataType::Int128 => {
150
+ let class = pl.const_get::<_, Value>("Int128").unwrap();
151
+ class.funcall("new", ()).unwrap()
152
+ }
149
153
  DataType::UInt8 => {
150
154
  let class = pl.const_get::<_, Value>("UInt8").unwrap();
151
155
  class.funcall("new", ()).unwrap()
@@ -304,29 +308,31 @@ impl TryConvert for Wrap<DataType> {
304
308
  let dtype = if ob.is_kind_of(class::class()) {
305
309
  let name = ob.funcall::<_, _, String>("name", ())?;
306
310
  match name.as_str() {
307
- "Polars::UInt8" => DataType::UInt8,
308
- "Polars::UInt16" => DataType::UInt16,
309
- "Polars::UInt32" => DataType::UInt32,
310
- "Polars::UInt64" => DataType::UInt64,
311
311
  "Polars::Int8" => DataType::Int8,
312
312
  "Polars::Int16" => DataType::Int16,
313
313
  "Polars::Int32" => DataType::Int32,
314
314
  "Polars::Int64" => DataType::Int64,
315
+ "Polars::UInt8" => DataType::UInt8,
316
+ "Polars::UInt16" => DataType::UInt16,
317
+ "Polars::UInt32" => DataType::UInt32,
318
+ "Polars::UInt64" => DataType::UInt64,
319
+ "Polars::Float32" => DataType::Float32,
320
+ "Polars::Float64" => DataType::Float64,
321
+ "Polars::Boolean" => DataType::Boolean,
315
322
  "Polars::String" => DataType::String,
316
323
  "Polars::Binary" => DataType::Binary,
317
- "Polars::Boolean" => DataType::Boolean,
318
324
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
319
325
  "Polars::Enum" => DataType::Enum(None, Default::default()),
320
326
  "Polars::Date" => DataType::Date,
321
- "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
322
327
  "Polars::Time" => DataType::Time,
328
+ "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
323
329
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
324
330
  "Polars::Decimal" => DataType::Decimal(None, None),
325
- "Polars::Float32" => DataType::Float32,
326
- "Polars::Float64" => DataType::Float64,
327
- "Polars::Object" => DataType::Object(OBJECT_NAME, None),
328
331
  "Polars::List" => DataType::List(Box::new(DataType::Null)),
332
+ "Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
333
+ "Polars::Struct" => DataType::Struct(vec![]),
329
334
  "Polars::Null" => DataType::Null,
335
+ "Polars::Object" => DataType::Object(OBJECT_NAME, None),
330
336
  "Polars::Unknown" => DataType::Unknown(Default::default()),
331
337
  dt => {
332
338
  return Err(RbValueError::new_err(format!(
@@ -345,9 +351,11 @@ impl TryConvert for Wrap<DataType> {
345
351
  "Polars::UInt16" => DataType::UInt16,
346
352
  "Polars::UInt32" => DataType::UInt32,
347
353
  "Polars::UInt64" => DataType::UInt64,
354
+ "Polars::Float32" => DataType::Float32,
355
+ "Polars::Float64" => DataType::Float64,
356
+ "Polars::Boolean" => DataType::Boolean,
348
357
  "Polars::String" => DataType::String,
349
358
  "Polars::Binary" => DataType::Binary,
350
- "Polars::Boolean" => DataType::Boolean,
351
359
  "Polars::Categorical" => {
352
360
  let ordering = ob
353
361
  .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
@@ -363,21 +371,17 @@ impl TryConvert for Wrap<DataType> {
363
371
  }
364
372
  "Polars::Date" => DataType::Date,
365
373
  "Polars::Time" => DataType::Time,
366
- "Polars::Float32" => DataType::Float32,
367
- "Polars::Float64" => DataType::Float64,
368
- "Polars::Null" => DataType::Null,
369
- "Polars::Unknown" => DataType::Unknown(Default::default()),
370
- "Polars::Duration" => {
371
- let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
372
- let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
373
- DataType::Duration(time_unit)
374
- }
375
374
  "Polars::Datetime" => {
376
375
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
377
376
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
378
377
  let time_zone: Option<String> = ob.funcall("time_zone", ())?;
379
378
  DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
380
379
  }
380
+ "Polars::Duration" => {
381
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
382
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
383
+ DataType::Duration(time_unit)
384
+ }
381
385
  "Polars::Decimal" => {
382
386
  let precision = ob.funcall("precision", ())?;
383
387
  let scale = ob.funcall("scale", ())?;
@@ -388,6 +392,13 @@ impl TryConvert for Wrap<DataType> {
388
392
  let inner = Wrap::<DataType>::try_convert(inner)?;
389
393
  DataType::List(Box::new(inner.0))
390
394
  }
395
+ "Polars::Array" => {
396
+ let inner: Value = ob.funcall("inner", ()).unwrap();
397
+ let size: Value = ob.funcall("size", ()).unwrap();
398
+ let inner = Wrap::<DataType>::try_convert(inner)?;
399
+ let size = usize::try_convert(size)?;
400
+ DataType::Array(Box::new(inner.0), size)
401
+ }
391
402
  "Polars::Struct" => {
392
403
  let arr: RArray = ob.funcall("fields", ())?;
393
404
  let mut fields = Vec::with_capacity(arr.len());
@@ -396,6 +407,9 @@ impl TryConvert for Wrap<DataType> {
396
407
  }
397
408
  DataType::Struct(fields)
398
409
  }
410
+ "Polars::Null" => DataType::Null,
411
+ "Object" => DataType::Object(OBJECT_NAME, None),
412
+ "Polars::Unknown" => DataType::Unknown(Default::default()),
399
413
  dt => {
400
414
  return Err(RbTypeError::new_err(format!(
401
415
  "A {dt} object is not a correct polars DataType. \
@@ -501,7 +515,7 @@ impl TryConvert for Wrap<ScanSources> {
501
515
  enum MutableSources {
502
516
  Paths(Vec<PathBuf>),
503
517
  Files(Vec<File>),
504
- Buffers(Vec<bytes::Bytes>),
518
+ Buffers(Vec<MemSlice>),
505
519
  }
506
520
 
507
521
  let num_items = list.len();
@@ -1,6 +1,5 @@
1
1
  use either::Either;
2
2
  use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
3
- use polars::frame::NullStrategy;
4
3
  use polars::prelude::pivot::{pivot, pivot_stable};
5
4
  use polars::prelude::*;
6
5
 
@@ -158,7 +157,7 @@ impl RbDataFrame {
158
157
  }
159
158
 
160
159
  pub fn n_chunks(&self) -> usize {
161
- self.df.borrow().n_chunks()
160
+ self.df.borrow().first_col_n_chunks()
162
161
  }
163
162
 
164
163
  pub fn shape(&self) -> (usize, usize) {
@@ -410,52 +409,6 @@ impl RbDataFrame {
410
409
  self.df.borrow().clone().lazy().into()
411
410
  }
412
411
 
413
- pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
414
- let s = self
415
- .df
416
- .borrow()
417
- .max_horizontal()
418
- .map_err(RbPolarsErr::from)?;
419
- Ok(s.map(|s| s.take_materialized_series().into()))
420
- }
421
-
422
- pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
423
- let s = self
424
- .df
425
- .borrow()
426
- .min_horizontal()
427
- .map_err(RbPolarsErr::from)?;
428
- Ok(s.map(|s| s.take_materialized_series().into()))
429
- }
430
-
431
- pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
432
- let null_strategy = if ignore_nulls {
433
- NullStrategy::Ignore
434
- } else {
435
- NullStrategy::Propagate
436
- };
437
- let s = self
438
- .df
439
- .borrow()
440
- .sum_horizontal(null_strategy)
441
- .map_err(RbPolarsErr::from)?;
442
- Ok(s.map(|s| s.into()))
443
- }
444
-
445
- pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
446
- let null_strategy = if ignore_nulls {
447
- NullStrategy::Ignore
448
- } else {
449
- NullStrategy::Propagate
450
- };
451
- let s = self
452
- .df
453
- .borrow()
454
- .mean_horizontal(null_strategy)
455
- .map_err(RbPolarsErr::from)?;
456
- Ok(s.map(|s| s.into()))
457
- }
458
-
459
412
  pub fn to_dummies(
460
413
  &self,
461
414
  columns: Option<Vec<String>>,
@@ -2,15 +2,13 @@ use magnus::{prelude::*, Value};
2
2
  use polars::io::avro::AvroCompression;
3
3
  use polars::io::RowIndex;
4
4
  use polars::prelude::*;
5
- use polars_utils::mmap::ensure_not_mapped;
6
5
  use std::io::BufWriter;
7
6
  use std::num::NonZeroUsize;
8
7
 
9
8
  use super::*;
10
9
  use crate::conversion::*;
11
10
  use crate::file::{
12
- get_either_file, get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path,
13
- read_if_bytesio, EitherRustRubyFile,
11
+ get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio,
14
12
  };
15
13
  use crate::{RbPolarsErr, RbResult};
16
14
 
@@ -298,30 +296,24 @@ impl RbDataFrame {
298
296
  Ok(())
299
297
  }
300
298
 
301
- pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
299
+ pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
302
300
  let file = BufWriter::new(get_file_like(rb_f, true)?);
303
301
 
304
- let r = match (pretty, row_oriented) {
305
- (_, true) => JsonWriter::new(file)
306
- .with_json_format(JsonFormat::Json)
307
- .finish(&mut self.df.borrow_mut()),
308
- (true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
309
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
310
- (false, _) => serde_json::to_writer(file, &*self.df.borrow())
311
- .map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
312
- };
313
- r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
302
+ JsonWriter::new(file)
303
+ .with_json_format(JsonFormat::Json)
304
+ .finish(&mut self.df.borrow_mut())
305
+ .map_err(RbPolarsErr::from)?;
314
306
  Ok(())
315
307
  }
316
308
 
317
309
  pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
318
310
  let file = BufWriter::new(get_file_like(rb_f, true)?);
319
311
 
320
- let r = JsonWriter::new(file)
312
+ JsonWriter::new(file)
321
313
  .with_json_format(JsonFormat::JsonLines)
322
- .finish(&mut self.df.borrow_mut());
314
+ .finish(&mut self.df.borrow_mut())
315
+ .map_err(RbPolarsErr::from)?;
323
316
 
324
- r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
325
317
  Ok(())
326
318
  }
327
319
 
@@ -330,13 +322,19 @@ impl RbDataFrame {
330
322
  rb_f: Value,
331
323
  compression: Wrap<Option<IpcCompression>>,
332
324
  compat_level: RbCompatLevel,
325
+ cloud_options: Option<Vec<(String, String)>>,
326
+ retries: usize,
333
327
  ) -> RbResult<()> {
334
- let either = get_either_file(rb_f, true)?;
335
- if let EitherRustRubyFile::Rust(ref f) = either {
336
- ensure_not_mapped(f).map_err(RbPolarsErr::from)?;
337
- }
338
- let mut buf = either.into_dyn();
339
- IpcWriter::new(&mut buf)
328
+ let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
329
+ let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
330
+ Some(cloud_options.with_max_retries(retries))
331
+ } else {
332
+ None
333
+ };
334
+
335
+ let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
336
+
337
+ IpcWriter::new(f)
340
338
  .with_compression(compression.0)
341
339
  .with_compat_level(compat_level.0)
342
340
  .finish(&mut self.df.borrow_mut())
@@ -271,6 +271,7 @@ impl RbExpr {
271
271
  nulls_last,
272
272
  multithreaded: true,
273
273
  maintain_order: false,
274
+ limit: None,
274
275
  })
275
276
  .into()
276
277
  }
@@ -283,6 +284,7 @@ impl RbExpr {
283
284
  nulls_last,
284
285
  multithreaded: true,
285
286
  maintain_order: false,
287
+ limit: None,
286
288
  })
287
289
  .into()
288
290
  }
@@ -363,6 +365,7 @@ impl RbExpr {
363
365
  nulls_last,
364
366
  multithreaded,
365
367
  maintain_order,
368
+ limit: None,
366
369
  },
367
370
  )
368
371
  .into())
@@ -84,13 +84,17 @@ impl RbExpr {
84
84
  self.inner.clone().meta()._into_selector().into()
85
85
  }
86
86
 
87
- pub fn meta_tree_format(&self) -> RbResult<String> {
87
+ fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
88
88
  let e = self
89
89
  .inner
90
90
  .clone()
91
91
  .meta()
92
- .into_tree_formatter()
92
+ .into_tree_formatter(display_as_dot)
93
93
  .map_err(RbPolarsErr::from)?;
94
94
  Ok(format!("{e}"))
95
95
  }
96
+
97
+ pub fn meta_tree_format(&self) -> RbResult<String> {
98
+ self.compute_tree_format(false)
99
+ }
96
100
  }
@@ -4,7 +4,9 @@ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
4
4
  use std::path::PathBuf;
5
5
 
6
6
  use magnus::{exception, prelude::*, Error, RString, Value};
7
+ use polars::io::cloud::CloudOptions;
7
8
  use polars::io::mmap::MmapBytesReader;
9
+ use polars_utils::mmap::MemSlice;
8
10
 
9
11
  use crate::error::RbPolarsErr;
10
12
  use crate::prelude::resolve_homedir;
@@ -141,10 +143,17 @@ impl EitherRustRubyFile {
141
143
  EitherRustRubyFile::Rust(f) => Box::new(f),
142
144
  }
143
145
  }
146
+
147
+ pub fn into_dyn_writeable(self) -> Box<dyn Write> {
148
+ match self {
149
+ EitherRustRubyFile::Rb(f) => Box::new(f),
150
+ EitherRustRubyFile::Rust(f) => Box::new(f),
151
+ }
152
+ }
144
153
  }
145
154
 
146
155
  pub enum RubyScanSourceInput {
147
- Buffer(bytes::Bytes),
156
+ Buffer(MemSlice),
148
157
  Path(PathBuf),
149
158
  #[allow(dead_code)]
150
159
  File(File),
@@ -156,7 +165,9 @@ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScan
156
165
  Ok(RubyScanSourceInput::Path(file_path))
157
166
  } else {
158
167
  let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
159
- Ok(RubyScanSourceInput::Buffer(f.as_bytes()))
168
+ Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
169
+ f.as_bytes(),
170
+ )))
160
171
  }
161
172
  }
162
173
 
@@ -167,7 +178,7 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
167
178
  if let Ok(rstring) = RString::try_convert(rb_f) {
168
179
  let s = unsafe { rstring.as_str() }?;
169
180
  let file_path = std::path::Path::new(&s);
170
- let file_path = resolve_homedir(file_path);
181
+ let file_path = resolve_homedir(&file_path);
171
182
  let f = if truncate {
172
183
  File::create(file_path).map_err(RbPolarsErr::from)?
173
184
  } else {
@@ -212,3 +223,10 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
212
223
  }
213
224
  }
214
225
  }
226
+
227
+ pub fn try_get_writeable(
228
+ rb_f: Value,
229
+ _cloud_options: Option<&CloudOptions>,
230
+ ) -> RbResult<Box<dyn Write>> {
231
+ Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
232
+ }
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
28
28
  Ok(e.into())
29
29
  }
30
30
 
31
- pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
31
+ pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
32
32
  let exprs = rb_exprs_to_exprs(exprs)?;
33
- let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
33
+ let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
34
34
  Ok(e.into())
35
35
  }
36
36
 
37
- pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
37
+ pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
38
38
  let exprs = rb_exprs_to_exprs(exprs)?;
39
- let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
39
+ let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
40
40
  Ok(e.into())
41
41
  }
@@ -1,34 +1,55 @@
1
+ use std::io::BufReader;
2
+
3
+ use arrow::array::Utf8ViewArray;
1
4
  use magnus::{RHash, Value};
5
+ use polars::prelude::ArrowSchema;
6
+ use polars_core::datatypes::create_enum_dtype;
2
7
 
3
8
  use crate::conversion::Wrap;
4
- use crate::file::get_file_like;
5
- use crate::prelude::DataType;
9
+ use crate::file::{get_either_file, EitherRustRubyFile};
10
+ use crate::prelude::ArrowDataType;
6
11
  use crate::{RbPolarsErr, RbResult};
7
12
 
8
13
  pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
9
- use polars_core::export::arrow::io::ipc::read::read_file_metadata;
10
- let mut r = get_file_like(rb_f, false)?;
11
- let metadata = read_file_metadata(&mut r).map_err(RbPolarsErr::from)?;
14
+ use arrow::io::ipc::read::read_file_metadata;
15
+ let metadata = match get_either_file(rb_f, false)? {
16
+ EitherRustRubyFile::Rust(r) => {
17
+ read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
18
+ }
19
+ EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
20
+ };
12
21
 
13
22
  let dict = RHash::new();
14
- for field in metadata.schema.iter_values() {
15
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
16
- dict.aset(field.name.as_str(), dt)?;
17
- }
23
+ fields_to_rbdict(&metadata.schema, &dict)?;
18
24
  Ok(dict)
19
25
  }
20
26
 
21
27
  pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
22
28
  use polars_parquet::read::{infer_schema, read_metadata};
23
29
 
24
- let mut r = get_file_like(rb_f, false)?;
25
- let metadata = read_metadata(&mut r).map_err(RbPolarsErr::from)?;
30
+ let metadata = match get_either_file(rb_f, false)? {
31
+ EitherRustRubyFile::Rust(r) => {
32
+ read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
33
+ }
34
+ EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
35
+ };
26
36
  let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
27
37
 
28
38
  let dict = RHash::new();
29
- for field in arrow_schema.iter_values() {
30
- let dt: Wrap<DataType> = Wrap((&field.dtype).into());
39
+ fields_to_rbdict(&arrow_schema, &dict)?;
40
+ Ok(dict)
41
+ }
42
+
43
+ fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
44
+ for field in schema.iter_values() {
45
+ let dt = if field.is_enum() {
46
+ Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
47
+ ArrowDataType::Utf8View,
48
+ )))
49
+ } else {
50
+ Wrap(polars::prelude::DataType::from_arrow_field(field))
51
+ };
31
52
  dict.aset(field.name.as_str(), dt)?;
32
53
  }
33
- Ok(dict)
54
+ Ok(())
34
55
  }
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
70
70
  nulls_last,
71
71
  multithreaded,
72
72
  maintain_order,
73
+ limit: None,
73
74
  },
74
75
  )
75
76
  .into())
@@ -320,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
320
321
  }
321
322
  }
322
323
 
323
- pub fn pearson_corr(a: &RbExpr, b: &RbExpr, ddof: u8) -> RbExpr {
324
- dsl::pearson_corr(a.inner.clone(), b.inner.clone(), ddof).into()
324
+ pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
325
+ dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
325
326
  }
326
327
 
327
328
  pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
@@ -345,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
345
346
  Ok(dsl::repeat(value, n).into())
346
347
  }
347
348
 
348
- pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, ddof: u8, propagate_nans: bool) -> RbExpr {
349
- dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), ddof, propagate_nans).into()
349
+ pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
350
+ dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
350
351
  }
351
352
 
352
353
  pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
@@ -11,7 +11,7 @@ pub fn get_index_type() -> Value {
11
11
  Wrap(IDX_DTYPE).into_value()
12
12
  }
13
13
 
14
- pub fn threadpool_size() -> usize {
14
+ pub fn thread_pool_size() -> usize {
15
15
  POOL.current_num_threads()
16
16
  }
17
17
 
@@ -9,7 +9,7 @@ use polars_core::utils::arrow;
9
9
 
10
10
  use crate::RbResult;
11
11
 
12
- #[magnus::wrap(class = "Polars::RbArrowArrayStream")]
12
+ #[magnus::wrap(class = "Polars::ArrowArrayStream")]
13
13
  pub struct RbArrowArrayStream {
14
14
  stream: ffi::ArrowArrayStream,
15
15
  }
@@ -47,7 +47,7 @@ impl DataFrameStreamIterator {
47
47
  .collect(),
48
48
  dtype,
49
49
  idx: 0,
50
- n_chunks: df.n_chunks(),
50
+ n_chunks: df.first_col_n_chunks(),
51
51
  }
52
52
  }
53
53
 
@@ -23,7 +23,7 @@ impl RbSeries {
23
23
  .const_get::<_, RClass>("RObject")?
24
24
  .funcall("cast", (np_arr,))
25
25
  }
26
- dt if dt.is_numeric() => {
26
+ dt if dt.is_primitive_numeric() => {
27
27
  if let Some(BitRepr::Large(_)) = s.bit_repr() {
28
28
  let s = s.cast(&DataType::Float64).unwrap();
29
29
  let ca = s.f64().unwrap();