polars-df 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +588 -456
- data/README.md +37 -2
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/conversion/mod.rs +31 -21
- data/ext/polars/src/dataframe/general.rs +1 -48
- data/ext/polars/src/dataframe/io.rs +13 -9
- data/ext/polars/src/expr/general.rs +3 -0
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/file.rs +21 -3
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +5 -4
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/interop/arrow/to_ruby.rs +2 -2
- data/ext/polars/src/lazyframe/general.rs +48 -5
- data/ext/polars/src/lib.rs +11 -15
- data/ext/polars/src/series/general.rs +3 -15
- data/ext/polars/src/series/import.rs +1 -1
- data/lib/polars/data_frame.rb +179 -51
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +35 -5
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +5 -8
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental, unreleased]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental, unreleased]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.16.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,20 +12,20 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.45.1" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
|
-
polars-core = "=0.
|
21
|
-
polars-plan = "=0.
|
22
|
-
polars-parquet = "=0.
|
23
|
-
polars-utils = "=0.
|
20
|
+
polars-core = "=0.45.1"
|
21
|
+
polars-plan = "=0.45.1"
|
22
|
+
polars-parquet = "=0.45.1"
|
23
|
+
polars-utils = "=0.45.1"
|
24
24
|
regex = "1"
|
25
25
|
serde_json = "1"
|
26
26
|
|
27
27
|
[dependencies.polars]
|
28
|
-
version = "=0.
|
28
|
+
version = "=0.45.1"
|
29
29
|
features = [
|
30
30
|
"abs",
|
31
31
|
"approx_unique",
|
@@ -15,7 +15,6 @@ use polars::chunked_array::object::PolarsObjectSafe;
|
|
15
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
16
16
|
use polars::datatypes::AnyValue;
|
17
17
|
use polars::frame::row::Row;
|
18
|
-
use polars::frame::NullStrategy;
|
19
18
|
use polars::io::avro::AvroCompression;
|
20
19
|
use polars::io::cloud::CloudOptions;
|
21
20
|
use polars::prelude::*;
|
@@ -23,6 +22,7 @@ use polars::series::ops::NullBehavior;
|
|
23
22
|
use polars_core::utils::arrow::array::Array;
|
24
23
|
use polars_core::utils::materialize_dyn_int;
|
25
24
|
use polars_plan::plans::ScanSources;
|
25
|
+
use polars_utils::mmap::MemSlice;
|
26
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
27
|
|
28
28
|
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
@@ -304,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
|
|
304
304
|
let dtype = if ob.is_kind_of(class::class()) {
|
305
305
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
306
306
|
match name.as_str() {
|
307
|
-
"Polars::UInt8" => DataType::UInt8,
|
308
|
-
"Polars::UInt16" => DataType::UInt16,
|
309
|
-
"Polars::UInt32" => DataType::UInt32,
|
310
|
-
"Polars::UInt64" => DataType::UInt64,
|
311
307
|
"Polars::Int8" => DataType::Int8,
|
312
308
|
"Polars::Int16" => DataType::Int16,
|
313
309
|
"Polars::Int32" => DataType::Int32,
|
314
310
|
"Polars::Int64" => DataType::Int64,
|
311
|
+
"Polars::UInt8" => DataType::UInt8,
|
312
|
+
"Polars::UInt16" => DataType::UInt16,
|
313
|
+
"Polars::UInt32" => DataType::UInt32,
|
314
|
+
"Polars::UInt64" => DataType::UInt64,
|
315
|
+
"Polars::Float32" => DataType::Float32,
|
316
|
+
"Polars::Float64" => DataType::Float64,
|
317
|
+
"Polars::Boolean" => DataType::Boolean,
|
315
318
|
"Polars::String" => DataType::String,
|
316
319
|
"Polars::Binary" => DataType::Binary,
|
317
|
-
"Polars::Boolean" => DataType::Boolean,
|
318
320
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
319
321
|
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
320
322
|
"Polars::Date" => DataType::Date,
|
321
|
-
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
322
323
|
"Polars::Time" => DataType::Time,
|
324
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
323
325
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
324
326
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
325
|
-
"Polars::Float32" => DataType::Float32,
|
326
|
-
"Polars::Float64" => DataType::Float64,
|
327
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
328
327
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
328
|
+
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
329
|
+
"Polars::Struct" => DataType::Struct(vec![]),
|
329
330
|
"Polars::Null" => DataType::Null,
|
331
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
330
332
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
331
333
|
dt => {
|
332
334
|
return Err(RbValueError::new_err(format!(
|
@@ -345,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
|
|
345
347
|
"Polars::UInt16" => DataType::UInt16,
|
346
348
|
"Polars::UInt32" => DataType::UInt32,
|
347
349
|
"Polars::UInt64" => DataType::UInt64,
|
350
|
+
"Polars::Float32" => DataType::Float32,
|
351
|
+
"Polars::Float64" => DataType::Float64,
|
352
|
+
"Polars::Boolean" => DataType::Boolean,
|
348
353
|
"Polars::String" => DataType::String,
|
349
354
|
"Polars::Binary" => DataType::Binary,
|
350
|
-
"Polars::Boolean" => DataType::Boolean,
|
351
355
|
"Polars::Categorical" => {
|
352
356
|
let ordering = ob
|
353
357
|
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
@@ -363,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
|
|
363
367
|
}
|
364
368
|
"Polars::Date" => DataType::Date,
|
365
369
|
"Polars::Time" => DataType::Time,
|
366
|
-
"Polars::Float32" => DataType::Float32,
|
367
|
-
"Polars::Float64" => DataType::Float64,
|
368
|
-
"Polars::Null" => DataType::Null,
|
369
|
-
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
370
|
-
"Polars::Duration" => {
|
371
|
-
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
372
|
-
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
373
|
-
DataType::Duration(time_unit)
|
374
|
-
}
|
375
370
|
"Polars::Datetime" => {
|
376
371
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
377
372
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
378
373
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
379
374
|
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
380
375
|
}
|
376
|
+
"Polars::Duration" => {
|
377
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
378
|
+
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
379
|
+
DataType::Duration(time_unit)
|
380
|
+
}
|
381
381
|
"Polars::Decimal" => {
|
382
382
|
let precision = ob.funcall("precision", ())?;
|
383
383
|
let scale = ob.funcall("scale", ())?;
|
@@ -388,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
|
|
388
388
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
389
389
|
DataType::List(Box::new(inner.0))
|
390
390
|
}
|
391
|
+
"Polars::Array" => {
|
392
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
393
|
+
let size: Value = ob.funcall("size", ()).unwrap();
|
394
|
+
let inner = Wrap::<DataType>::try_convert(inner)?;
|
395
|
+
let size = usize::try_convert(size)?;
|
396
|
+
DataType::Array(Box::new(inner.0), size)
|
397
|
+
}
|
391
398
|
"Polars::Struct" => {
|
392
399
|
let arr: RArray = ob.funcall("fields", ())?;
|
393
400
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -396,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
|
|
396
403
|
}
|
397
404
|
DataType::Struct(fields)
|
398
405
|
}
|
406
|
+
"Polars::Null" => DataType::Null,
|
407
|
+
"Object" => DataType::Object(OBJECT_NAME, None),
|
408
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
399
409
|
dt => {
|
400
410
|
return Err(RbTypeError::new_err(format!(
|
401
411
|
"A {dt} object is not a correct polars DataType. \
|
@@ -501,7 +511,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
501
511
|
enum MutableSources {
|
502
512
|
Paths(Vec<PathBuf>),
|
503
513
|
Files(Vec<File>),
|
504
|
-
Buffers(Vec<
|
514
|
+
Buffers(Vec<MemSlice>),
|
505
515
|
}
|
506
516
|
|
507
517
|
let num_items = list.len();
|
@@ -1,6 +1,5 @@
|
|
1
1
|
use either::Either;
|
2
2
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
|
-
use polars::frame::NullStrategy;
|
4
3
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
5
4
|
use polars::prelude::*;
|
6
5
|
|
@@ -158,7 +157,7 @@ impl RbDataFrame {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
pub fn n_chunks(&self) -> usize {
|
161
|
-
self.df.borrow().
|
160
|
+
self.df.borrow().first_col_n_chunks()
|
162
161
|
}
|
163
162
|
|
164
163
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -410,52 +409,6 @@ impl RbDataFrame {
|
|
410
409
|
self.df.borrow().clone().lazy().into()
|
411
410
|
}
|
412
411
|
|
413
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
414
|
-
let s = self
|
415
|
-
.df
|
416
|
-
.borrow()
|
417
|
-
.max_horizontal()
|
418
|
-
.map_err(RbPolarsErr::from)?;
|
419
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
420
|
-
}
|
421
|
-
|
422
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
423
|
-
let s = self
|
424
|
-
.df
|
425
|
-
.borrow()
|
426
|
-
.min_horizontal()
|
427
|
-
.map_err(RbPolarsErr::from)?;
|
428
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
429
|
-
}
|
430
|
-
|
431
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
432
|
-
let null_strategy = if ignore_nulls {
|
433
|
-
NullStrategy::Ignore
|
434
|
-
} else {
|
435
|
-
NullStrategy::Propagate
|
436
|
-
};
|
437
|
-
let s = self
|
438
|
-
.df
|
439
|
-
.borrow()
|
440
|
-
.sum_horizontal(null_strategy)
|
441
|
-
.map_err(RbPolarsErr::from)?;
|
442
|
-
Ok(s.map(|s| s.into()))
|
443
|
-
}
|
444
|
-
|
445
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
446
|
-
let null_strategy = if ignore_nulls {
|
447
|
-
NullStrategy::Ignore
|
448
|
-
} else {
|
449
|
-
NullStrategy::Propagate
|
450
|
-
};
|
451
|
-
let s = self
|
452
|
-
.df
|
453
|
-
.borrow()
|
454
|
-
.mean_horizontal(null_strategy)
|
455
|
-
.map_err(RbPolarsErr::from)?;
|
456
|
-
Ok(s.map(|s| s.into()))
|
457
|
-
}
|
458
|
-
|
459
412
|
pub fn to_dummies(
|
460
413
|
&self,
|
461
414
|
columns: Option<Vec<String>>,
|
@@ -2,15 +2,13 @@ use magnus::{prelude::*, Value};
|
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
3
|
use polars::io::RowIndex;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_utils::mmap::ensure_not_mapped;
|
6
5
|
use std::io::BufWriter;
|
7
6
|
use std::num::NonZeroUsize;
|
8
7
|
|
9
8
|
use super::*;
|
10
9
|
use crate::conversion::*;
|
11
10
|
use crate::file::{
|
12
|
-
|
13
|
-
read_if_bytesio, EitherRustRubyFile,
|
11
|
+
get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio,
|
14
12
|
};
|
15
13
|
use crate::{RbPolarsErr, RbResult};
|
16
14
|
|
@@ -330,13 +328,19 @@ impl RbDataFrame {
|
|
330
328
|
rb_f: Value,
|
331
329
|
compression: Wrap<Option<IpcCompression>>,
|
332
330
|
compat_level: RbCompatLevel,
|
331
|
+
cloud_options: Option<Vec<(String, String)>>,
|
332
|
+
retries: usize,
|
333
333
|
) -> RbResult<()> {
|
334
|
-
let
|
335
|
-
|
336
|
-
|
337
|
-
}
|
338
|
-
|
339
|
-
|
334
|
+
let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
|
335
|
+
let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
|
336
|
+
Some(cloud_options.with_max_retries(retries))
|
337
|
+
} else {
|
338
|
+
None
|
339
|
+
};
|
340
|
+
|
341
|
+
let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
|
342
|
+
|
343
|
+
IpcWriter::new(f)
|
340
344
|
.with_compression(compression.0)
|
341
345
|
.with_compat_level(compat_level.0)
|
342
346
|
.finish(&mut self.df.borrow_mut())
|
@@ -271,6 +271,7 @@ impl RbExpr {
|
|
271
271
|
nulls_last,
|
272
272
|
multithreaded: true,
|
273
273
|
maintain_order: false,
|
274
|
+
limit: None,
|
274
275
|
})
|
275
276
|
.into()
|
276
277
|
}
|
@@ -283,6 +284,7 @@ impl RbExpr {
|
|
283
284
|
nulls_last,
|
284
285
|
multithreaded: true,
|
285
286
|
maintain_order: false,
|
287
|
+
limit: None,
|
286
288
|
})
|
287
289
|
.into()
|
288
290
|
}
|
@@ -363,6 +365,7 @@ impl RbExpr {
|
|
363
365
|
nulls_last,
|
364
366
|
multithreaded,
|
365
367
|
maintain_order,
|
368
|
+
limit: None,
|
366
369
|
},
|
367
370
|
)
|
368
371
|
.into())
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -84,13 +84,17 @@ impl RbExpr {
|
|
84
84
|
self.inner.clone().meta()._into_selector().into()
|
85
85
|
}
|
86
86
|
|
87
|
-
|
87
|
+
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
88
88
|
let e = self
|
89
89
|
.inner
|
90
90
|
.clone()
|
91
91
|
.meta()
|
92
|
-
.into_tree_formatter()
|
92
|
+
.into_tree_formatter(display_as_dot)
|
93
93
|
.map_err(RbPolarsErr::from)?;
|
94
94
|
Ok(format!("{e}"))
|
95
95
|
}
|
96
|
+
|
97
|
+
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
+
self.compute_tree_format(false)
|
99
|
+
}
|
96
100
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -4,7 +4,9 @@ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::cloud::CloudOptions;
|
7
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars_utils::mmap::MemSlice;
|
8
10
|
|
9
11
|
use crate::error::RbPolarsErr;
|
10
12
|
use crate::prelude::resolve_homedir;
|
@@ -141,10 +143,17 @@ impl EitherRustRubyFile {
|
|
141
143
|
EitherRustRubyFile::Rust(f) => Box::new(f),
|
142
144
|
}
|
143
145
|
}
|
146
|
+
|
147
|
+
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
|
+
match self {
|
149
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
150
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
151
|
+
}
|
152
|
+
}
|
144
153
|
}
|
145
154
|
|
146
155
|
pub enum RubyScanSourceInput {
|
147
|
-
Buffer(
|
156
|
+
Buffer(MemSlice),
|
148
157
|
Path(PathBuf),
|
149
158
|
#[allow(dead_code)]
|
150
159
|
File(File),
|
@@ -156,7 +165,9 @@ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScan
|
|
156
165
|
Ok(RubyScanSourceInput::Path(file_path))
|
157
166
|
} else {
|
158
167
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
159
|
-
Ok(RubyScanSourceInput::Buffer(
|
168
|
+
Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
|
169
|
+
f.as_bytes(),
|
170
|
+
)))
|
160
171
|
}
|
161
172
|
}
|
162
173
|
|
@@ -167,7 +178,7 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
167
178
|
if let Ok(rstring) = RString::try_convert(rb_f) {
|
168
179
|
let s = unsafe { rstring.as_str() }?;
|
169
180
|
let file_path = std::path::Path::new(&s);
|
170
|
-
let file_path = resolve_homedir(file_path);
|
181
|
+
let file_path = resolve_homedir(&file_path);
|
171
182
|
let f = if truncate {
|
172
183
|
File::create(file_path).map_err(RbPolarsErr::from)?
|
173
184
|
} else {
|
@@ -212,3 +223,10 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
|
|
212
223
|
}
|
213
224
|
}
|
214
225
|
}
|
226
|
+
|
227
|
+
pub fn try_get_writeable(
|
228
|
+
rb_f: Value,
|
229
|
+
_cloud_options: Option<&CloudOptions>,
|
230
|
+
) -> RbResult<Box<dyn Write>> {
|
231
|
+
Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
|
232
|
+
}
|
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
|
28
28
|
Ok(e.into())
|
29
29
|
}
|
30
30
|
|
31
|
-
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
32
32
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
34
34
|
Ok(e.into())
|
35
35
|
}
|
36
36
|
|
37
|
-
pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
37
|
+
pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
38
38
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
39
|
-
let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
39
|
+
let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
40
40
|
Ok(e.into())
|
41
41
|
}
|
@@ -1,34 +1,55 @@
|
|
1
|
+
use std::io::BufReader;
|
2
|
+
|
1
3
|
use magnus::{RHash, Value};
|
4
|
+
use polars::prelude::ArrowSchema;
|
5
|
+
use polars_core::datatypes::create_enum_dtype;
|
6
|
+
use polars_core::export::arrow::array::Utf8ViewArray;
|
2
7
|
|
3
8
|
use crate::conversion::Wrap;
|
4
|
-
use crate::file::
|
5
|
-
use crate::prelude::
|
9
|
+
use crate::file::{get_either_file, EitherRustRubyFile};
|
10
|
+
use crate::prelude::ArrowDataType;
|
6
11
|
use crate::{RbPolarsErr, RbResult};
|
7
12
|
|
8
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
14
|
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
|
10
|
-
let
|
11
|
-
|
15
|
+
let metadata = match get_either_file(rb_f, false)? {
|
16
|
+
EitherRustRubyFile::Rust(r) => {
|
17
|
+
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
18
|
+
}
|
19
|
+
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
20
|
+
};
|
12
21
|
|
13
22
|
let dict = RHash::new();
|
14
|
-
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
-
dict.aset(field.name.as_str(), dt)?;
|
17
|
-
}
|
23
|
+
fields_to_rbdict(&metadata.schema, &dict)?;
|
18
24
|
Ok(dict)
|
19
25
|
}
|
20
26
|
|
21
27
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
28
|
use polars_parquet::read::{infer_schema, read_metadata};
|
23
29
|
|
24
|
-
let
|
25
|
-
|
30
|
+
let metadata = match get_either_file(rb_f, false)? {
|
31
|
+
EitherRustRubyFile::Rust(r) => {
|
32
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
33
|
+
}
|
34
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
35
|
+
};
|
26
36
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
37
|
|
28
38
|
let dict = RHash::new();
|
29
|
-
|
30
|
-
|
39
|
+
fields_to_rbdict(&arrow_schema, &dict)?;
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
43
|
+
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
|
+
for field in schema.iter_values() {
|
45
|
+
let dt = if field.is_enum() {
|
46
|
+
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
+
ArrowDataType::Utf8View,
|
48
|
+
)))
|
49
|
+
} else {
|
50
|
+
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
+
};
|
31
52
|
dict.aset(field.name.as_str(), dt)?;
|
32
53
|
}
|
33
|
-
Ok(
|
54
|
+
Ok(())
|
34
55
|
}
|
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
|
|
70
70
|
nulls_last,
|
71
71
|
multithreaded,
|
72
72
|
maintain_order,
|
73
|
+
limit: None,
|
73
74
|
},
|
74
75
|
)
|
75
76
|
.into())
|
@@ -320,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
320
321
|
}
|
321
322
|
}
|
322
323
|
|
323
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr
|
324
|
-
dsl::pearson_corr(a.inner.clone(), b.inner.clone()
|
324
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
325
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
|
325
326
|
}
|
326
327
|
|
327
328
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
@@ -345,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
345
346
|
Ok(dsl::repeat(value, n).into())
|
346
347
|
}
|
347
348
|
|
348
|
-
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr,
|
349
|
-
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(),
|
349
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
350
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
|
350
351
|
}
|
351
352
|
|
352
353
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
@@ -9,7 +9,7 @@ use polars_core::utils::arrow;
|
|
9
9
|
|
10
10
|
use crate::RbResult;
|
11
11
|
|
12
|
-
#[magnus::wrap(class = "Polars::
|
12
|
+
#[magnus::wrap(class = "Polars::ArrowArrayStream")]
|
13
13
|
pub struct RbArrowArrayStream {
|
14
14
|
stream: ffi::ArrowArrayStream,
|
15
15
|
}
|
@@ -47,7 +47,7 @@ impl DataFrameStreamIterator {
|
|
47
47
|
.collect(),
|
48
48
|
dtype,
|
49
49
|
idx: 0,
|
50
|
-
n_chunks: df.
|
50
|
+
n_chunks: df.first_col_n_chunks(),
|
51
51
|
}
|
52
52
|
}
|
53
53
|
|