polars-df 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +588 -456
- data/README.md +37 -2
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/conversion/mod.rs +31 -21
- data/ext/polars/src/dataframe/general.rs +1 -48
- data/ext/polars/src/dataframe/io.rs +13 -9
- data/ext/polars/src/expr/general.rs +3 -0
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/file.rs +21 -3
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +5 -4
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/interop/arrow/to_ruby.rs +2 -2
- data/ext/polars/src/lazyframe/general.rs +48 -5
- data/ext/polars/src/lib.rs +11 -15
- data/ext/polars/src/series/general.rs +3 -15
- data/ext/polars/src/series/import.rs +1 -1
- data/lib/polars/data_frame.rb +179 -51
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +35 -5
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +5 -8
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.16.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,20 +12,20 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.45.1" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
|
-
polars-core = "=0.
|
21
|
-
polars-plan = "=0.
|
22
|
-
polars-parquet = "=0.
|
23
|
-
polars-utils = "=0.
|
20
|
+
polars-core = "=0.45.1"
|
21
|
+
polars-plan = "=0.45.1"
|
22
|
+
polars-parquet = "=0.45.1"
|
23
|
+
polars-utils = "=0.45.1"
|
24
24
|
regex = "1"
|
25
25
|
serde_json = "1"
|
26
26
|
|
27
27
|
[dependencies.polars]
|
28
|
-
version = "=0.
|
28
|
+
version = "=0.45.1"
|
29
29
|
features = [
|
30
30
|
"abs",
|
31
31
|
"approx_unique",
|
@@ -15,7 +15,6 @@ use polars::chunked_array::object::PolarsObjectSafe;
|
|
15
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
16
16
|
use polars::datatypes::AnyValue;
|
17
17
|
use polars::frame::row::Row;
|
18
|
-
use polars::frame::NullStrategy;
|
19
18
|
use polars::io::avro::AvroCompression;
|
20
19
|
use polars::io::cloud::CloudOptions;
|
21
20
|
use polars::prelude::*;
|
@@ -23,6 +22,7 @@ use polars::series::ops::NullBehavior;
|
|
23
22
|
use polars_core::utils::arrow::array::Array;
|
24
23
|
use polars_core::utils::materialize_dyn_int;
|
25
24
|
use polars_plan::plans::ScanSources;
|
25
|
+
use polars_utils::mmap::MemSlice;
|
26
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
27
|
|
28
28
|
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
@@ -304,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
|
|
304
304
|
let dtype = if ob.is_kind_of(class::class()) {
|
305
305
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
306
306
|
match name.as_str() {
|
307
|
-
"Polars::UInt8" => DataType::UInt8,
|
308
|
-
"Polars::UInt16" => DataType::UInt16,
|
309
|
-
"Polars::UInt32" => DataType::UInt32,
|
310
|
-
"Polars::UInt64" => DataType::UInt64,
|
311
307
|
"Polars::Int8" => DataType::Int8,
|
312
308
|
"Polars::Int16" => DataType::Int16,
|
313
309
|
"Polars::Int32" => DataType::Int32,
|
314
310
|
"Polars::Int64" => DataType::Int64,
|
311
|
+
"Polars::UInt8" => DataType::UInt8,
|
312
|
+
"Polars::UInt16" => DataType::UInt16,
|
313
|
+
"Polars::UInt32" => DataType::UInt32,
|
314
|
+
"Polars::UInt64" => DataType::UInt64,
|
315
|
+
"Polars::Float32" => DataType::Float32,
|
316
|
+
"Polars::Float64" => DataType::Float64,
|
317
|
+
"Polars::Boolean" => DataType::Boolean,
|
315
318
|
"Polars::String" => DataType::String,
|
316
319
|
"Polars::Binary" => DataType::Binary,
|
317
|
-
"Polars::Boolean" => DataType::Boolean,
|
318
320
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
319
321
|
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
320
322
|
"Polars::Date" => DataType::Date,
|
321
|
-
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
322
323
|
"Polars::Time" => DataType::Time,
|
324
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
323
325
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
324
326
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
325
|
-
"Polars::Float32" => DataType::Float32,
|
326
|
-
"Polars::Float64" => DataType::Float64,
|
327
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
328
327
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
328
|
+
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
329
|
+
"Polars::Struct" => DataType::Struct(vec![]),
|
329
330
|
"Polars::Null" => DataType::Null,
|
331
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
330
332
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
331
333
|
dt => {
|
332
334
|
return Err(RbValueError::new_err(format!(
|
@@ -345,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
|
|
345
347
|
"Polars::UInt16" => DataType::UInt16,
|
346
348
|
"Polars::UInt32" => DataType::UInt32,
|
347
349
|
"Polars::UInt64" => DataType::UInt64,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Boolean" => DataType::Boolean,
|
348
353
|
"Polars::String" => DataType::String,
|
349
354
|
"Polars::Binary" => DataType::Binary,
|
350
|
-
"Polars::Boolean" => DataType::Boolean,
|
351
355
|
"Polars::Categorical" => {
|
352
356
|
let ordering = ob
|
353
357
|
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
@@ -363,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
|
|
363
367
|
}
|
364
368
|
"Polars::Date" => DataType::Date,
|
365
369
|
"Polars::Time" => DataType::Time,
|
366
|
-
"Polars::Float32" => DataType::Float32,
|
367
|
-
"Polars::Float64" => DataType::Float64,
|
368
|
-
"Polars::Null" => DataType::Null,
|
369
|
-
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
370
|
-
"Polars::Duration" => {
|
371
|
-
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
372
|
-
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
373
|
-
DataType::Duration(time_unit)
|
374
|
-
}
|
375
370
|
"Polars::Datetime" => {
|
376
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
377
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
378
373
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
379
374
|
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
380
375
|
}
|
376
|
+
"Polars::Duration" => {
|
377
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
378
|
+
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
379
|
+
DataType::Duration(time_unit)
|
380
|
+
}
|
381
381
|
"Polars::Decimal" => {
|
382
382
|
let precision = ob.funcall("precision", ())?;
|
383
383
|
let scale = ob.funcall("scale", ())?;
|
@@ -388,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
|
|
388
388
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
389
389
|
DataType::List(Box::new(inner.0))
|
390
390
|
}
|
391
|
+
"Polars::Array" => {
|
392
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
393
|
+
let size: Value = ob.funcall("size", ()).unwrap();
|
394
|
+
let inner = Wrap::<DataType>::try_convert(inner)?;
|
395
|
+
let size = usize::try_convert(size)?;
|
396
|
+
DataType::Array(Box::new(inner.0), size)
|
397
|
+
}
|
391
398
|
"Polars::Struct" => {
|
392
399
|
let arr: RArray = ob.funcall("fields", ())?;
|
393
400
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -396,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
|
|
396
403
|
}
|
397
404
|
DataType::Struct(fields)
|
398
405
|
}
|
406
|
+
"Polars::Null" => DataType::Null,
|
407
|
+
"Object" => DataType::Object(OBJECT_NAME, None),
|
408
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
399
409
|
dt => {
|
400
410
|
return Err(RbTypeError::new_err(format!(
|
401
411
|
"A {dt} object is not a correct polars DataType. \
|
@@ -501,7 +511,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
501
511
|
enum MutableSources {
|
502
512
|
Paths(Vec<PathBuf>),
|
503
513
|
Files(Vec<File>),
|
504
|
-
Buffers(Vec<
|
514
|
+
Buffers(Vec<MemSlice>),
|
505
515
|
}
|
506
516
|
|
507
517
|
let num_items = list.len();
|
@@ -1,6 +1,5 @@
|
|
1
1
|
use either::Either;
|
2
2
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
|
-
use polars::frame::NullStrategy;
|
4
3
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
5
4
|
use polars::prelude::*;
|
6
5
|
|
@@ -158,7 +157,7 @@ impl RbDataFrame {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
pub fn n_chunks(&self) -> usize {
|
161
|
-
self.df.borrow().
|
160
|
+
self.df.borrow().first_col_n_chunks()
|
162
161
|
}
|
163
162
|
|
164
163
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -410,52 +409,6 @@ impl RbDataFrame {
|
|
410
409
|
self.df.borrow().clone().lazy().into()
|
411
410
|
}
|
412
411
|
|
413
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
414
|
-
let s = self
|
415
|
-
.df
|
416
|
-
.borrow()
|
417
|
-
.max_horizontal()
|
418
|
-
.map_err(RbPolarsErr::from)?;
|
419
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
420
|
-
}
|
421
|
-
|
422
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
423
|
-
let s = self
|
424
|
-
.df
|
425
|
-
.borrow()
|
426
|
-
.min_horizontal()
|
427
|
-
.map_err(RbPolarsErr::from)?;
|
428
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
429
|
-
}
|
430
|
-
|
431
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
432
|
-
let null_strategy = if ignore_nulls {
|
433
|
-
NullStrategy::Ignore
|
434
|
-
} else {
|
435
|
-
NullStrategy::Propagate
|
436
|
-
};
|
437
|
-
let s = self
|
438
|
-
.df
|
439
|
-
.borrow()
|
440
|
-
.sum_horizontal(null_strategy)
|
441
|
-
.map_err(RbPolarsErr::from)?;
|
442
|
-
Ok(s.map(|s| s.into()))
|
443
|
-
}
|
444
|
-
|
445
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
446
|
-
let null_strategy = if ignore_nulls {
|
447
|
-
NullStrategy::Ignore
|
448
|
-
} else {
|
449
|
-
NullStrategy::Propagate
|
450
|
-
};
|
451
|
-
let s = self
|
452
|
-
.df
|
453
|
-
.borrow()
|
454
|
-
.mean_horizontal(null_strategy)
|
455
|
-
.map_err(RbPolarsErr::from)?;
|
456
|
-
Ok(s.map(|s| s.into()))
|
457
|
-
}
|
458
|
-
|
459
412
|
pub fn to_dummies(
|
460
413
|
&self,
|
461
414
|
columns: Option<Vec<String>>,
|
@@ -2,15 +2,13 @@ use magnus::{prelude::*, Value};
|
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
3
|
use polars::io::RowIndex;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_utils::mmap::ensure_not_mapped;
|
6
5
|
use std::io::BufWriter;
|
7
6
|
use std::num::NonZeroUsize;
|
8
7
|
|
9
8
|
use super::*;
|
10
9
|
use crate::conversion::*;
|
11
10
|
use crate::file::{
|
12
|
-
|
13
|
-
read_if_bytesio, EitherRustRubyFile,
|
11
|
+
get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio,
|
14
12
|
};
|
15
13
|
use crate::{RbPolarsErr, RbResult};
|
16
14
|
|
@@ -330,13 +328,19 @@ impl RbDataFrame {
|
|
330
328
|
rb_f: Value,
|
331
329
|
compression: Wrap<Option<IpcCompression>>,
|
332
330
|
compat_level: RbCompatLevel,
|
331
|
+
cloud_options: Option<Vec<(String, String)>>,
|
332
|
+
retries: usize,
|
333
333
|
) -> RbResult<()> {
|
334
|
-
let
|
335
|
-
|
336
|
-
|
337
|
-
}
|
338
|
-
|
339
|
-
|
334
|
+
let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
|
335
|
+
let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
|
336
|
+
Some(cloud_options.with_max_retries(retries))
|
337
|
+
} else {
|
338
|
+
None
|
339
|
+
};
|
340
|
+
|
341
|
+
let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
|
342
|
+
|
343
|
+
IpcWriter::new(f)
|
340
344
|
.with_compression(compression.0)
|
341
345
|
.with_compat_level(compat_level.0)
|
342
346
|
.finish(&mut self.df.borrow_mut())
|
@@ -271,6 +271,7 @@ impl RbExpr {
|
|
271
271
|
nulls_last,
|
272
272
|
multithreaded: true,
|
273
273
|
maintain_order: false,
|
274
|
+
limit: None,
|
274
275
|
})
|
275
276
|
.into()
|
276
277
|
}
|
@@ -283,6 +284,7 @@ impl RbExpr {
|
|
283
284
|
nulls_last,
|
284
285
|
multithreaded: true,
|
285
286
|
maintain_order: false,
|
287
|
+
limit: None,
|
286
288
|
})
|
287
289
|
.into()
|
288
290
|
}
|
@@ -363,6 +365,7 @@ impl RbExpr {
|
|
363
365
|
nulls_last,
|
364
366
|
multithreaded,
|
365
367
|
maintain_order,
|
368
|
+
limit: None,
|
366
369
|
},
|
367
370
|
)
|
368
371
|
.into())
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -84,13 +84,17 @@ impl RbExpr {
|
|
84
84
|
self.inner.clone().meta()._into_selector().into()
|
85
85
|
}
|
86
86
|
|
87
|
-
|
87
|
+
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
88
88
|
let e = self
|
89
89
|
.inner
|
90
90
|
.clone()
|
91
91
|
.meta()
|
92
|
-
.into_tree_formatter()
|
92
|
+
.into_tree_formatter(display_as_dot)
|
93
93
|
.map_err(RbPolarsErr::from)?;
|
94
94
|
Ok(format!("{e}"))
|
95
95
|
}
|
96
|
+
|
97
|
+
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
+
self.compute_tree_format(false)
|
99
|
+
}
|
96
100
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -4,7 +4,9 @@ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::cloud::CloudOptions;
|
7
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars_utils::mmap::MemSlice;
|
8
10
|
|
9
11
|
use crate::error::RbPolarsErr;
|
10
12
|
use crate::prelude::resolve_homedir;
|
@@ -141,10 +143,17 @@ impl EitherRustRubyFile {
|
|
141
143
|
EitherRustRubyFile::Rust(f) => Box::new(f),
|
142
144
|
}
|
143
145
|
}
|
146
|
+
|
147
|
+
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
|
+
match self {
|
149
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
150
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
151
|
+
}
|
152
|
+
}
|
144
153
|
}
|
145
154
|
|
146
155
|
pub enum RubyScanSourceInput {
|
147
|
-
Buffer(
|
156
|
+
Buffer(MemSlice),
|
148
157
|
Path(PathBuf),
|
149
158
|
#[allow(dead_code)]
|
150
159
|
File(File),
|
@@ -156,7 +165,9 @@ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScan
|
|
156
165
|
Ok(RubyScanSourceInput::Path(file_path))
|
157
166
|
} else {
|
158
167
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
159
|
-
Ok(RubyScanSourceInput::Buffer(
|
168
|
+
Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
|
169
|
+
f.as_bytes(),
|
170
|
+
)))
|
160
171
|
}
|
161
172
|
}
|
162
173
|
|
@@ -167,7 +178,7 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
167
178
|
if let Ok(rstring) = RString::try_convert(rb_f) {
|
168
179
|
let s = unsafe { rstring.as_str() }?;
|
169
180
|
let file_path = std::path::Path::new(&s);
|
170
|
-
let file_path = resolve_homedir(file_path);
|
181
|
+
let file_path = resolve_homedir(&file_path);
|
171
182
|
let f = if truncate {
|
172
183
|
File::create(file_path).map_err(RbPolarsErr::from)?
|
173
184
|
} else {
|
@@ -212,3 +223,10 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
|
|
212
223
|
}
|
213
224
|
}
|
214
225
|
}
|
226
|
+
|
227
|
+
pub fn try_get_writeable(
|
228
|
+
rb_f: Value,
|
229
|
+
_cloud_options: Option<&CloudOptions>,
|
230
|
+
) -> RbResult<Box<dyn Write>> {
|
231
|
+
Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
|
232
|
+
}
|
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
|
28
28
|
Ok(e.into())
|
29
29
|
}
|
30
30
|
|
31
|
-
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
32
32
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
34
34
|
Ok(e.into())
|
35
35
|
}
|
36
36
|
|
37
|
-
pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
37
|
+
pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
38
38
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
39
|
-
let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
39
|
+
let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
40
40
|
Ok(e.into())
|
41
41
|
}
|
@@ -1,34 +1,55 @@
|
|
1
|
+
use std::io::BufReader;
|
2
|
+
|
1
3
|
use magnus::{RHash, Value};
|
4
|
+
use polars::prelude::ArrowSchema;
|
5
|
+
use polars_core::datatypes::create_enum_dtype;
|
6
|
+
use polars_core::export::arrow::array::Utf8ViewArray;
|
2
7
|
|
3
8
|
use crate::conversion::Wrap;
|
4
|
-
use crate::file::
|
5
|
-
use crate::prelude::
|
9
|
+
use crate::file::{get_either_file, EitherRustRubyFile};
|
10
|
+
use crate::prelude::ArrowDataType;
|
6
11
|
use crate::{RbPolarsErr, RbResult};
|
7
12
|
|
8
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
14
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
-
let
|
11
|
-
|
15
|
+
let metadata = match get_either_file(rb_f, false)? {
|
16
|
+
EitherRustRubyFile::Rust(r) => {
|
17
|
+
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
18
|
+
}
|
19
|
+
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
20
|
+
};
|
12
21
|
|
13
22
|
let dict = RHash::new();
|
14
|
-
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
-
dict.aset(field.name.as_str(), dt)?;
|
17
|
-
}
|
23
|
+
fields_to_rbdict(&metadata.schema, &dict)?;
|
18
24
|
Ok(dict)
|
19
25
|
}
|
20
26
|
|
21
27
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
28
|
use polars_parquet::read::{infer_schema, read_metadata};
|
23
29
|
|
24
|
-
let
|
25
|
-
|
30
|
+
let metadata = match get_either_file(rb_f, false)? {
|
31
|
+
EitherRustRubyFile::Rust(r) => {
|
32
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
33
|
+
}
|
34
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
35
|
+
};
|
26
36
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
37
|
|
28
38
|
let dict = RHash::new();
|
29
|
-
|
30
|
-
|
39
|
+
fields_to_rbdict(&arrow_schema, &dict)?;
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
43
|
+
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
|
+
for field in schema.iter_values() {
|
45
|
+
let dt = if field.is_enum() {
|
46
|
+
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
+
ArrowDataType::Utf8View,
|
48
|
+
)))
|
49
|
+
} else {
|
50
|
+
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
+
};
|
31
52
|
dict.aset(field.name.as_str(), dt)?;
|
32
53
|
}
|
33
|
-
Ok(
|
54
|
+
Ok(())
|
34
55
|
}
|
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
|
|
70
70
|
nulls_last,
|
71
71
|
multithreaded,
|
72
72
|
maintain_order,
|
73
|
+
limit: None,
|
73
74
|
},
|
74
75
|
)
|
75
76
|
.into())
|
@@ -320,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
320
321
|
}
|
321
322
|
}
|
322
323
|
|
323
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr
|
324
|
-
dsl::pearson_corr(a.inner.clone(), b.inner.clone()
|
324
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
325
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
|
325
326
|
}
|
326
327
|
|
327
328
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
@@ -345,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
345
346
|
Ok(dsl::repeat(value, n).into())
|
346
347
|
}
|
347
348
|
|
348
|
-
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr,
|
349
|
-
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(),
|
349
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
350
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
|
350
351
|
}
|
351
352
|
|
352
353
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
@@ -9,7 +9,7 @@ use polars_core::utils::arrow;
|
|
9
9
|
|
10
10
|
use crate::RbResult;
|
11
11
|
|
12
|
-
#[magnus::wrap(class = "Polars::
|
12
|
+
#[magnus::wrap(class = "Polars::ArrowArrayStream")]
|
13
13
|
pub struct RbArrowArrayStream {
|
14
14
|
stream: ffi::ArrowArrayStream,
|
15
15
|
}
|
@@ -47,7 +47,7 @@ impl DataFrameStreamIterator {
|
|
47
47
|
.collect(),
|
48
48
|
dtype,
|
49
49
|
idx: 0,
|
50
|
-
n_chunks: df.
|
50
|
+
n_chunks: df.first_col_n_chunks(),
|
51
51
|
}
|
52
52
|
}
|
53
53
|
|