polars-df 0.15.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/Cargo.lock +664 -539
- data/LICENSE.txt +1 -1
- data/README.md +37 -2
- data/ext/polars/Cargo.toml +8 -7
- data/ext/polars/src/conversion/any_value.rs +1 -0
- data/ext/polars/src/conversion/mod.rs +35 -21
- data/ext/polars/src/dataframe/general.rs +1 -48
- data/ext/polars/src/dataframe/io.rs +21 -23
- data/ext/polars/src/expr/general.rs +3 -0
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/file.rs +21 -3
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/io.rs +35 -14
- data/ext/polars/src/functions/lazy.rs +5 -4
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/interop/arrow/to_ruby.rs +2 -2
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/general.rs +52 -5
- data/ext/polars/src/lib.rs +13 -17
- data/ext/polars/src/map/mod.rs +1 -1
- data/ext/polars/src/series/export.rs +1 -0
- data/ext/polars/src/series/general.rs +3 -15
- data/ext/polars/src/series/import.rs +3 -3
- data/ext/polars/src/series/scatter.rs +1 -1
- data/lib/polars/data_frame.rb +196 -68
- data/lib/polars/data_types.rb +5 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +49 -7
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/series.rb +6 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +5 -8
data/LICENSE.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
Copyright (c) 2020 Ritchie Vink
|
2
|
-
Copyright (c) 2022-
|
2
|
+
Copyright (c) 2022-2025 Andrew Kane
|
3
3
|
Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem "polars-df"
|
|
14
14
|
|
15
15
|
## Getting Started
|
16
16
|
|
17
|
-
This library follows the [Polars Python API](https://pola
|
17
|
+
This library follows the [Polars Python API](https://docs.pola.rs/api/python/stable/reference/index.html).
|
18
18
|
|
19
19
|
```ruby
|
20
20
|
Polars.scan_csv("iris.csv")
|
@@ -24,7 +24,7 @@ Polars.scan_csv("iris.csv")
|
|
24
24
|
.collect
|
25
25
|
```
|
26
26
|
|
27
|
-
You can follow [Polars tutorials](https://pola
|
27
|
+
You can follow [Polars tutorials](https://docs.pola.rs/user-guide/getting-started/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
|
28
28
|
|
29
29
|
## Reference
|
30
30
|
|
@@ -88,6 +88,15 @@ From Avro
|
|
88
88
|
Polars.read_avro("file.avro")
|
89
89
|
```
|
90
90
|
|
91
|
+
From Delta Lake (requires [deltalake-rb](https://github.com/ankane/delta-ruby)) [experimental]
|
92
|
+
|
93
|
+
```ruby
|
94
|
+
Polars.read_delta("./table")
|
95
|
+
|
96
|
+
# or lazily with
|
97
|
+
Polars.scan_delta("./table")
|
98
|
+
```
|
99
|
+
|
91
100
|
From a hash
|
92
101
|
|
93
102
|
```ruby
|
@@ -336,6 +345,32 @@ Parquet
|
|
336
345
|
df.write_parquet("file.parquet")
|
337
346
|
```
|
338
347
|
|
348
|
+
JSON
|
349
|
+
|
350
|
+
```ruby
|
351
|
+
df.write_json("file.json")
|
352
|
+
# or
|
353
|
+
df.write_ndjson("file.ndjson")
|
354
|
+
```
|
355
|
+
|
356
|
+
Feather / Arrow IPC
|
357
|
+
|
358
|
+
```ruby
|
359
|
+
df.write_ipc("file.arrow")
|
360
|
+
```
|
361
|
+
|
362
|
+
Avro
|
363
|
+
|
364
|
+
```ruby
|
365
|
+
df.write_avro("file.avro")
|
366
|
+
```
|
367
|
+
|
368
|
+
Delta Lake [experimental]
|
369
|
+
|
370
|
+
```ruby
|
371
|
+
df.write_delta("./table")
|
372
|
+
```
|
373
|
+
|
339
374
|
Numo array
|
340
375
|
|
341
376
|
```ruby
|
data/ext/polars/Cargo.toml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
[package]
|
2
2
|
name = "polars"
|
3
|
-
version = "0.
|
3
|
+
version = "0.17.0"
|
4
4
|
license = "MIT"
|
5
5
|
authors = ["Andrew Kane <andrew@ankane.org>"]
|
6
6
|
edition = "2021"
|
@@ -12,20 +12,21 @@ crate-type = ["cdylib"]
|
|
12
12
|
|
13
13
|
[dependencies]
|
14
14
|
ahash = "0.8"
|
15
|
-
arrow = { package = "polars-arrow", version = "=0.
|
15
|
+
arrow = { package = "polars-arrow", version = "=0.46.0" }
|
16
16
|
bytes = "1"
|
17
17
|
chrono = "0.4"
|
18
18
|
either = "1.8"
|
19
19
|
magnus = "0.7"
|
20
|
-
polars-core = "=0.
|
21
|
-
polars-plan = "=0.
|
22
|
-
polars-parquet = "=0.
|
23
|
-
polars-utils = "=0.
|
20
|
+
polars-core = "=0.46.0"
|
21
|
+
polars-plan = "=0.46.0"
|
22
|
+
polars-parquet = "=0.46.0"
|
23
|
+
polars-utils = "=0.46.0"
|
24
|
+
rayon = "1.9"
|
24
25
|
regex = "1"
|
25
26
|
serde_json = "1"
|
26
27
|
|
27
28
|
[dependencies.polars]
|
28
|
-
version = "=0.
|
29
|
+
version = "=0.46.0"
|
29
30
|
features = [
|
30
31
|
"abs",
|
31
32
|
"approx_unique",
|
@@ -33,6 +33,7 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
|
|
33
33
|
AnyValue::Int16(v) => ruby.into_value(v),
|
34
34
|
AnyValue::Int32(v) => ruby.into_value(v),
|
35
35
|
AnyValue::Int64(v) => ruby.into_value(v),
|
36
|
+
AnyValue::Int128(_v) => todo!(),
|
36
37
|
AnyValue::Float32(v) => ruby.into_value(v),
|
37
38
|
AnyValue::Float64(v) => ruby.into_value(v),
|
38
39
|
AnyValue::Null => ruby.qnil().as_value(),
|
@@ -15,7 +15,6 @@ use polars::chunked_array::object::PolarsObjectSafe;
|
|
15
15
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
16
16
|
use polars::datatypes::AnyValue;
|
17
17
|
use polars::frame::row::Row;
|
18
|
-
use polars::frame::NullStrategy;
|
19
18
|
use polars::io::avro::AvroCompression;
|
20
19
|
use polars::io::cloud::CloudOptions;
|
21
20
|
use polars::prelude::*;
|
@@ -23,6 +22,7 @@ use polars::series::ops::NullBehavior;
|
|
23
22
|
use polars_core::utils::arrow::array::Array;
|
24
23
|
use polars_core::utils::materialize_dyn_int;
|
25
24
|
use polars_plan::plans::ScanSources;
|
25
|
+
use polars_utils::mmap::MemSlice;
|
26
26
|
use polars_utils::total_ord::{TotalEq, TotalHash};
|
27
27
|
|
28
28
|
use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
|
@@ -146,6 +146,10 @@ impl IntoValue for Wrap<DataType> {
|
|
146
146
|
let class = pl.const_get::<_, Value>("Int64").unwrap();
|
147
147
|
class.funcall("new", ()).unwrap()
|
148
148
|
}
|
149
|
+
DataType::Int128 => {
|
150
|
+
let class = pl.const_get::<_, Value>("Int128").unwrap();
|
151
|
+
class.funcall("new", ()).unwrap()
|
152
|
+
}
|
149
153
|
DataType::UInt8 => {
|
150
154
|
let class = pl.const_get::<_, Value>("UInt8").unwrap();
|
151
155
|
class.funcall("new", ()).unwrap()
|
@@ -304,29 +308,31 @@ impl TryConvert for Wrap<DataType> {
|
|
304
308
|
let dtype = if ob.is_kind_of(class::class()) {
|
305
309
|
let name = ob.funcall::<_, _, String>("name", ())?;
|
306
310
|
match name.as_str() {
|
307
|
-
"Polars::UInt8" => DataType::UInt8,
|
308
|
-
"Polars::UInt16" => DataType::UInt16,
|
309
|
-
"Polars::UInt32" => DataType::UInt32,
|
310
|
-
"Polars::UInt64" => DataType::UInt64,
|
311
311
|
"Polars::Int8" => DataType::Int8,
|
312
312
|
"Polars::Int16" => DataType::Int16,
|
313
313
|
"Polars::Int32" => DataType::Int32,
|
314
314
|
"Polars::Int64" => DataType::Int64,
|
315
|
+
"Polars::UInt8" => DataType::UInt8,
|
316
|
+
"Polars::UInt16" => DataType::UInt16,
|
317
|
+
"Polars::UInt32" => DataType::UInt32,
|
318
|
+
"Polars::UInt64" => DataType::UInt64,
|
319
|
+
"Polars::Float32" => DataType::Float32,
|
320
|
+
"Polars::Float64" => DataType::Float64,
|
321
|
+
"Polars::Boolean" => DataType::Boolean,
|
315
322
|
"Polars::String" => DataType::String,
|
316
323
|
"Polars::Binary" => DataType::Binary,
|
317
|
-
"Polars::Boolean" => DataType::Boolean,
|
318
324
|
"Polars::Categorical" => DataType::Categorical(None, Default::default()),
|
319
325
|
"Polars::Enum" => DataType::Enum(None, Default::default()),
|
320
326
|
"Polars::Date" => DataType::Date,
|
321
|
-
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
322
327
|
"Polars::Time" => DataType::Time,
|
328
|
+
"Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
|
323
329
|
"Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
|
324
330
|
"Polars::Decimal" => DataType::Decimal(None, None),
|
325
|
-
"Polars::Float32" => DataType::Float32,
|
326
|
-
"Polars::Float64" => DataType::Float64,
|
327
|
-
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
328
331
|
"Polars::List" => DataType::List(Box::new(DataType::Null)),
|
332
|
+
"Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
|
333
|
+
"Polars::Struct" => DataType::Struct(vec![]),
|
329
334
|
"Polars::Null" => DataType::Null,
|
335
|
+
"Polars::Object" => DataType::Object(OBJECT_NAME, None),
|
330
336
|
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
331
337
|
dt => {
|
332
338
|
return Err(RbValueError::new_err(format!(
|
@@ -345,9 +351,11 @@ impl TryConvert for Wrap<DataType> {
|
|
345
351
|
"Polars::UInt16" => DataType::UInt16,
|
346
352
|
"Polars::UInt32" => DataType::UInt32,
|
347
353
|
"Polars::UInt64" => DataType::UInt64,
|
354
|
+
"Polars::Float32" => DataType::Float32,
|
355
|
+
"Polars::Float64" => DataType::Float64,
|
356
|
+
"Polars::Boolean" => DataType::Boolean,
|
348
357
|
"Polars::String" => DataType::String,
|
349
358
|
"Polars::Binary" => DataType::Binary,
|
350
|
-
"Polars::Boolean" => DataType::Boolean,
|
351
359
|
"Polars::Categorical" => {
|
352
360
|
let ordering = ob
|
353
361
|
.funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
|
@@ -363,21 +371,17 @@ impl TryConvert for Wrap<DataType> {
|
|
363
371
|
}
|
364
372
|
"Polars::Date" => DataType::Date,
|
365
373
|
"Polars::Time" => DataType::Time,
|
366
|
-
"Polars::Float32" => DataType::Float32,
|
367
|
-
"Polars::Float64" => DataType::Float64,
|
368
|
-
"Polars::Null" => DataType::Null,
|
369
|
-
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
370
|
-
"Polars::Duration" => {
|
371
|
-
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
372
|
-
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
373
|
-
DataType::Duration(time_unit)
|
374
|
-
}
|
375
374
|
"Polars::Datetime" => {
|
376
375
|
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
377
376
|
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
378
377
|
let time_zone: Option<String> = ob.funcall("time_zone", ())?;
|
379
378
|
DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
|
380
379
|
}
|
380
|
+
"Polars::Duration" => {
|
381
|
+
let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
|
382
|
+
let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
|
383
|
+
DataType::Duration(time_unit)
|
384
|
+
}
|
381
385
|
"Polars::Decimal" => {
|
382
386
|
let precision = ob.funcall("precision", ())?;
|
383
387
|
let scale = ob.funcall("scale", ())?;
|
@@ -388,6 +392,13 @@ impl TryConvert for Wrap<DataType> {
|
|
388
392
|
let inner = Wrap::<DataType>::try_convert(inner)?;
|
389
393
|
DataType::List(Box::new(inner.0))
|
390
394
|
}
|
395
|
+
"Polars::Array" => {
|
396
|
+
let inner: Value = ob.funcall("inner", ()).unwrap();
|
397
|
+
let size: Value = ob.funcall("size", ()).unwrap();
|
398
|
+
let inner = Wrap::<DataType>::try_convert(inner)?;
|
399
|
+
let size = usize::try_convert(size)?;
|
400
|
+
DataType::Array(Box::new(inner.0), size)
|
401
|
+
}
|
391
402
|
"Polars::Struct" => {
|
392
403
|
let arr: RArray = ob.funcall("fields", ())?;
|
393
404
|
let mut fields = Vec::with_capacity(arr.len());
|
@@ -396,6 +407,9 @@ impl TryConvert for Wrap<DataType> {
|
|
396
407
|
}
|
397
408
|
DataType::Struct(fields)
|
398
409
|
}
|
410
|
+
"Polars::Null" => DataType::Null,
|
411
|
+
"Object" => DataType::Object(OBJECT_NAME, None),
|
412
|
+
"Polars::Unknown" => DataType::Unknown(Default::default()),
|
399
413
|
dt => {
|
400
414
|
return Err(RbTypeError::new_err(format!(
|
401
415
|
"A {dt} object is not a correct polars DataType. \
|
@@ -501,7 +515,7 @@ impl TryConvert for Wrap<ScanSources> {
|
|
501
515
|
enum MutableSources {
|
502
516
|
Paths(Vec<PathBuf>),
|
503
517
|
Files(Vec<File>),
|
504
|
-
Buffers(Vec<
|
518
|
+
Buffers(Vec<MemSlice>),
|
505
519
|
}
|
506
520
|
|
507
521
|
let num_items = list.len();
|
@@ -1,6 +1,5 @@
|
|
1
1
|
use either::Either;
|
2
2
|
use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
|
3
|
-
use polars::frame::NullStrategy;
|
4
3
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
5
4
|
use polars::prelude::*;
|
6
5
|
|
@@ -158,7 +157,7 @@ impl RbDataFrame {
|
|
158
157
|
}
|
159
158
|
|
160
159
|
pub fn n_chunks(&self) -> usize {
|
161
|
-
self.df.borrow().
|
160
|
+
self.df.borrow().first_col_n_chunks()
|
162
161
|
}
|
163
162
|
|
164
163
|
pub fn shape(&self) -> (usize, usize) {
|
@@ -410,52 +409,6 @@ impl RbDataFrame {
|
|
410
409
|
self.df.borrow().clone().lazy().into()
|
411
410
|
}
|
412
411
|
|
413
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
414
|
-
let s = self
|
415
|
-
.df
|
416
|
-
.borrow()
|
417
|
-
.max_horizontal()
|
418
|
-
.map_err(RbPolarsErr::from)?;
|
419
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
420
|
-
}
|
421
|
-
|
422
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
423
|
-
let s = self
|
424
|
-
.df
|
425
|
-
.borrow()
|
426
|
-
.min_horizontal()
|
427
|
-
.map_err(RbPolarsErr::from)?;
|
428
|
-
Ok(s.map(|s| s.take_materialized_series().into()))
|
429
|
-
}
|
430
|
-
|
431
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
432
|
-
let null_strategy = if ignore_nulls {
|
433
|
-
NullStrategy::Ignore
|
434
|
-
} else {
|
435
|
-
NullStrategy::Propagate
|
436
|
-
};
|
437
|
-
let s = self
|
438
|
-
.df
|
439
|
-
.borrow()
|
440
|
-
.sum_horizontal(null_strategy)
|
441
|
-
.map_err(RbPolarsErr::from)?;
|
442
|
-
Ok(s.map(|s| s.into()))
|
443
|
-
}
|
444
|
-
|
445
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
446
|
-
let null_strategy = if ignore_nulls {
|
447
|
-
NullStrategy::Ignore
|
448
|
-
} else {
|
449
|
-
NullStrategy::Propagate
|
450
|
-
};
|
451
|
-
let s = self
|
452
|
-
.df
|
453
|
-
.borrow()
|
454
|
-
.mean_horizontal(null_strategy)
|
455
|
-
.map_err(RbPolarsErr::from)?;
|
456
|
-
Ok(s.map(|s| s.into()))
|
457
|
-
}
|
458
|
-
|
459
412
|
pub fn to_dummies(
|
460
413
|
&self,
|
461
414
|
columns: Option<Vec<String>>,
|
@@ -2,15 +2,13 @@ use magnus::{prelude::*, Value};
|
|
2
2
|
use polars::io::avro::AvroCompression;
|
3
3
|
use polars::io::RowIndex;
|
4
4
|
use polars::prelude::*;
|
5
|
-
use polars_utils::mmap::ensure_not_mapped;
|
6
5
|
use std::io::BufWriter;
|
7
6
|
use std::num::NonZeroUsize;
|
8
7
|
|
9
8
|
use super::*;
|
10
9
|
use crate::conversion::*;
|
11
10
|
use crate::file::{
|
12
|
-
|
13
|
-
read_if_bytesio, EitherRustRubyFile,
|
11
|
+
get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio,
|
14
12
|
};
|
15
13
|
use crate::{RbPolarsErr, RbResult};
|
16
14
|
|
@@ -298,30 +296,24 @@ impl RbDataFrame {
|
|
298
296
|
Ok(())
|
299
297
|
}
|
300
298
|
|
301
|
-
pub fn write_json(&self, rb_f: Value
|
299
|
+
pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
|
302
300
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
303
301
|
|
304
|
-
|
305
|
-
(
|
306
|
-
|
307
|
-
|
308
|
-
(true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
|
309
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
310
|
-
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
311
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
312
|
-
};
|
313
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
302
|
+
JsonWriter::new(file)
|
303
|
+
.with_json_format(JsonFormat::Json)
|
304
|
+
.finish(&mut self.df.borrow_mut())
|
305
|
+
.map_err(RbPolarsErr::from)?;
|
314
306
|
Ok(())
|
315
307
|
}
|
316
308
|
|
317
309
|
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
318
310
|
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
319
311
|
|
320
|
-
|
312
|
+
JsonWriter::new(file)
|
321
313
|
.with_json_format(JsonFormat::JsonLines)
|
322
|
-
.finish(&mut self.df.borrow_mut())
|
314
|
+
.finish(&mut self.df.borrow_mut())
|
315
|
+
.map_err(RbPolarsErr::from)?;
|
323
316
|
|
324
|
-
r.map_err(|e| RbPolarsErr::Other(format!("{:?}", e)))?;
|
325
317
|
Ok(())
|
326
318
|
}
|
327
319
|
|
@@ -330,13 +322,19 @@ impl RbDataFrame {
|
|
330
322
|
rb_f: Value,
|
331
323
|
compression: Wrap<Option<IpcCompression>>,
|
332
324
|
compat_level: RbCompatLevel,
|
325
|
+
cloud_options: Option<Vec<(String, String)>>,
|
326
|
+
retries: usize,
|
333
327
|
) -> RbResult<()> {
|
334
|
-
let
|
335
|
-
|
336
|
-
|
337
|
-
}
|
338
|
-
|
339
|
-
|
328
|
+
let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
|
329
|
+
let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
|
330
|
+
Some(cloud_options.with_max_retries(retries))
|
331
|
+
} else {
|
332
|
+
None
|
333
|
+
};
|
334
|
+
|
335
|
+
let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
|
336
|
+
|
337
|
+
IpcWriter::new(f)
|
340
338
|
.with_compression(compression.0)
|
341
339
|
.with_compat_level(compat_level.0)
|
342
340
|
.finish(&mut self.df.borrow_mut())
|
@@ -271,6 +271,7 @@ impl RbExpr {
|
|
271
271
|
nulls_last,
|
272
272
|
multithreaded: true,
|
273
273
|
maintain_order: false,
|
274
|
+
limit: None,
|
274
275
|
})
|
275
276
|
.into()
|
276
277
|
}
|
@@ -283,6 +284,7 @@ impl RbExpr {
|
|
283
284
|
nulls_last,
|
284
285
|
multithreaded: true,
|
285
286
|
maintain_order: false,
|
287
|
+
limit: None,
|
286
288
|
})
|
287
289
|
.into()
|
288
290
|
}
|
@@ -363,6 +365,7 @@ impl RbExpr {
|
|
363
365
|
nulls_last,
|
364
366
|
multithreaded,
|
365
367
|
maintain_order,
|
368
|
+
limit: None,
|
366
369
|
},
|
367
370
|
)
|
368
371
|
.into())
|
data/ext/polars/src/expr/meta.rs
CHANGED
@@ -84,13 +84,17 @@ impl RbExpr {
|
|
84
84
|
self.inner.clone().meta()._into_selector().into()
|
85
85
|
}
|
86
86
|
|
87
|
-
|
87
|
+
fn compute_tree_format(&self, display_as_dot: bool) -> RbResult<String> {
|
88
88
|
let e = self
|
89
89
|
.inner
|
90
90
|
.clone()
|
91
91
|
.meta()
|
92
|
-
.into_tree_formatter()
|
92
|
+
.into_tree_formatter(display_as_dot)
|
93
93
|
.map_err(RbPolarsErr::from)?;
|
94
94
|
Ok(format!("{e}"))
|
95
95
|
}
|
96
|
+
|
97
|
+
pub fn meta_tree_format(&self) -> RbResult<String> {
|
98
|
+
self.compute_tree_format(false)
|
99
|
+
}
|
96
100
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -4,7 +4,9 @@ use std::io::{Cursor, Read, Seek, SeekFrom, Write};
|
|
4
4
|
use std::path::PathBuf;
|
5
5
|
|
6
6
|
use magnus::{exception, prelude::*, Error, RString, Value};
|
7
|
+
use polars::io::cloud::CloudOptions;
|
7
8
|
use polars::io::mmap::MmapBytesReader;
|
9
|
+
use polars_utils::mmap::MemSlice;
|
8
10
|
|
9
11
|
use crate::error::RbPolarsErr;
|
10
12
|
use crate::prelude::resolve_homedir;
|
@@ -141,10 +143,17 @@ impl EitherRustRubyFile {
|
|
141
143
|
EitherRustRubyFile::Rust(f) => Box::new(f),
|
142
144
|
}
|
143
145
|
}
|
146
|
+
|
147
|
+
pub fn into_dyn_writeable(self) -> Box<dyn Write> {
|
148
|
+
match self {
|
149
|
+
EitherRustRubyFile::Rb(f) => Box::new(f),
|
150
|
+
EitherRustRubyFile::Rust(f) => Box::new(f),
|
151
|
+
}
|
152
|
+
}
|
144
153
|
}
|
145
154
|
|
146
155
|
pub enum RubyScanSourceInput {
|
147
|
-
Buffer(
|
156
|
+
Buffer(MemSlice),
|
148
157
|
Path(PathBuf),
|
149
158
|
#[allow(dead_code)]
|
150
159
|
File(File),
|
@@ -156,7 +165,9 @@ pub fn get_ruby_scan_source_input(rb_f: Value, write: bool) -> RbResult<RubyScan
|
|
156
165
|
Ok(RubyScanSourceInput::Path(file_path))
|
157
166
|
} else {
|
158
167
|
let f = RbFileLikeObject::with_requirements(rb_f, !write, write, !write)?;
|
159
|
-
Ok(RubyScanSourceInput::Buffer(
|
168
|
+
Ok(RubyScanSourceInput::Buffer(MemSlice::from_bytes(
|
169
|
+
f.as_bytes(),
|
170
|
+
)))
|
160
171
|
}
|
161
172
|
}
|
162
173
|
|
@@ -167,7 +178,7 @@ pub fn get_either_file(rb_f: Value, truncate: bool) -> RbResult<EitherRustRubyFi
|
|
167
178
|
if let Ok(rstring) = RString::try_convert(rb_f) {
|
168
179
|
let s = unsafe { rstring.as_str() }?;
|
169
180
|
let file_path = std::path::Path::new(&s);
|
170
|
-
let file_path = resolve_homedir(file_path);
|
181
|
+
let file_path = resolve_homedir(&file_path);
|
171
182
|
let f = if truncate {
|
172
183
|
File::create(file_path).map_err(RbPolarsErr::from)?
|
173
184
|
} else {
|
@@ -212,3 +223,10 @@ pub fn get_mmap_bytes_reader_and_path<'a>(
|
|
212
223
|
}
|
213
224
|
}
|
214
225
|
}
|
226
|
+
|
227
|
+
pub fn try_get_writeable(
|
228
|
+
rb_f: Value,
|
229
|
+
_cloud_options: Option<&CloudOptions>,
|
230
|
+
) -> RbResult<Box<dyn Write>> {
|
231
|
+
Ok(get_either_file(rb_f, true)?.into_dyn_writeable())
|
232
|
+
}
|
@@ -28,14 +28,14 @@ pub fn min_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
|
28
28
|
Ok(e.into())
|
29
29
|
}
|
30
30
|
|
31
|
-
pub fn sum_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
31
|
+
pub fn sum_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
32
32
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
33
|
-
let e = dsl::sum_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
33
|
+
let e = dsl::sum_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
34
34
|
Ok(e.into())
|
35
35
|
}
|
36
36
|
|
37
|
-
pub fn mean_horizontal(exprs: RArray) -> RbResult<RbExpr> {
|
37
|
+
pub fn mean_horizontal(exprs: RArray, ignore_nulls: bool) -> RbResult<RbExpr> {
|
38
38
|
let exprs = rb_exprs_to_exprs(exprs)?;
|
39
|
-
let e = dsl::mean_horizontal(exprs).map_err(RbPolarsErr::from)?;
|
39
|
+
let e = dsl::mean_horizontal(exprs, ignore_nulls).map_err(RbPolarsErr::from)?;
|
40
40
|
Ok(e.into())
|
41
41
|
}
|
@@ -1,34 +1,55 @@
|
|
1
|
+
use std::io::BufReader;
|
2
|
+
|
3
|
+
use arrow::array::Utf8ViewArray;
|
1
4
|
use magnus::{RHash, Value};
|
5
|
+
use polars::prelude::ArrowSchema;
|
6
|
+
use polars_core::datatypes::create_enum_dtype;
|
2
7
|
|
3
8
|
use crate::conversion::Wrap;
|
4
|
-
use crate::file::
|
5
|
-
use crate::prelude::
|
9
|
+
use crate::file::{get_either_file, EitherRustRubyFile};
|
10
|
+
use crate::prelude::ArrowDataType;
|
6
11
|
use crate::{RbPolarsErr, RbResult};
|
7
12
|
|
8
13
|
pub fn read_ipc_schema(rb_f: Value) -> RbResult<RHash> {
|
9
|
-
use
|
10
|
-
let
|
11
|
-
|
14
|
+
use arrow::io::ipc::read::read_file_metadata;
|
15
|
+
let metadata = match get_either_file(rb_f, false)? {
|
16
|
+
EitherRustRubyFile::Rust(r) => {
|
17
|
+
read_file_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
18
|
+
}
|
19
|
+
EitherRustRubyFile::Rb(mut r) => read_file_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
20
|
+
};
|
12
21
|
|
13
22
|
let dict = RHash::new();
|
14
|
-
|
15
|
-
let dt: Wrap<DataType> = Wrap((&field.dtype).into());
|
16
|
-
dict.aset(field.name.as_str(), dt)?;
|
17
|
-
}
|
23
|
+
fields_to_rbdict(&metadata.schema, &dict)?;
|
18
24
|
Ok(dict)
|
19
25
|
}
|
20
26
|
|
21
27
|
pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
|
22
28
|
use polars_parquet::read::{infer_schema, read_metadata};
|
23
29
|
|
24
|
-
let
|
25
|
-
|
30
|
+
let metadata = match get_either_file(rb_f, false)? {
|
31
|
+
EitherRustRubyFile::Rust(r) => {
|
32
|
+
read_metadata(&mut BufReader::new(r)).map_err(RbPolarsErr::from)?
|
33
|
+
}
|
34
|
+
EitherRustRubyFile::Rb(mut r) => read_metadata(&mut r).map_err(RbPolarsErr::from)?,
|
35
|
+
};
|
26
36
|
let arrow_schema = infer_schema(&metadata).map_err(RbPolarsErr::from)?;
|
27
37
|
|
28
38
|
let dict = RHash::new();
|
29
|
-
|
30
|
-
|
39
|
+
fields_to_rbdict(&arrow_schema, &dict)?;
|
40
|
+
Ok(dict)
|
41
|
+
}
|
42
|
+
|
43
|
+
fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
|
44
|
+
for field in schema.iter_values() {
|
45
|
+
let dt = if field.is_enum() {
|
46
|
+
Wrap(create_enum_dtype(Utf8ViewArray::new_empty(
|
47
|
+
ArrowDataType::Utf8View,
|
48
|
+
)))
|
49
|
+
} else {
|
50
|
+
Wrap(polars::prelude::DataType::from_arrow_field(field))
|
51
|
+
};
|
31
52
|
dict.aset(field.name.as_str(), dt)?;
|
32
53
|
}
|
33
|
-
Ok(
|
54
|
+
Ok(())
|
34
55
|
}
|
@@ -70,6 +70,7 @@ pub fn arg_sort_by(
|
|
70
70
|
nulls_last,
|
71
71
|
multithreaded,
|
72
72
|
maintain_order,
|
73
|
+
limit: None,
|
73
74
|
},
|
74
75
|
)
|
75
76
|
.into())
|
@@ -320,8 +321,8 @@ pub fn lit(value: Value, allow_object: bool) -> RbResult<RbExpr> {
|
|
320
321
|
}
|
321
322
|
}
|
322
323
|
|
323
|
-
pub fn pearson_corr(a: &RbExpr, b: &RbExpr
|
324
|
-
dsl::pearson_corr(a.inner.clone(), b.inner.clone()
|
324
|
+
pub fn pearson_corr(a: &RbExpr, b: &RbExpr) -> RbExpr {
|
325
|
+
dsl::pearson_corr(a.inner.clone(), b.inner.clone()).into()
|
325
326
|
}
|
326
327
|
|
327
328
|
pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
|
@@ -345,8 +346,8 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
|
|
345
346
|
Ok(dsl::repeat(value, n).into())
|
346
347
|
}
|
347
348
|
|
348
|
-
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr,
|
349
|
-
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(),
|
349
|
+
pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
|
350
|
+
dsl::spearman_rank_corr(a.inner.clone(), b.inner.clone(), propagate_nans).into()
|
350
351
|
}
|
351
352
|
|
352
353
|
pub fn sql_expr(sql: String) -> RbResult<RbExpr> {
|
@@ -9,7 +9,7 @@ use polars_core::utils::arrow;
|
|
9
9
|
|
10
10
|
use crate::RbResult;
|
11
11
|
|
12
|
-
#[magnus::wrap(class = "Polars::
|
12
|
+
#[magnus::wrap(class = "Polars::ArrowArrayStream")]
|
13
13
|
pub struct RbArrowArrayStream {
|
14
14
|
stream: ffi::ArrowArrayStream,
|
15
15
|
}
|
@@ -47,7 +47,7 @@ impl DataFrameStreamIterator {
|
|
47
47
|
.collect(),
|
48
48
|
dtype,
|
49
49
|
idx: 0,
|
50
|
-
n_chunks: df.
|
50
|
+
n_chunks: df.first_col_n_chunks(),
|
51
51
|
}
|
52
52
|
}
|
53
53
|
|
@@ -23,7 +23,7 @@ impl RbSeries {
|
|
23
23
|
.const_get::<_, RClass>("RObject")?
|
24
24
|
.funcall("cast", (np_arr,))
|
25
25
|
}
|
26
|
-
dt if dt.
|
26
|
+
dt if dt.is_primitive_numeric() => {
|
27
27
|
if let Some(BitRepr::Large(_)) = s.bit_repr() {
|
28
28
|
let s = s.cast(&DataType::Float64).unwrap();
|
29
29
|
let ca = s.f64().unwrap();
|