polars-df 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,83 @@
|
|
1
|
+
use arrow::datatypes::ArrowDataType;
|
2
|
+
use arrow::ffi;
|
3
|
+
use magnus::{IntoValue, Value};
|
4
|
+
use polars::datatypes::CompatLevel;
|
5
|
+
use polars::frame::DataFrame;
|
6
|
+
use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
|
7
|
+
use polars::series::Series;
|
8
|
+
use polars_core::utils::arrow;
|
9
|
+
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::ArrowArrayStream")]
|
13
|
+
pub struct RbArrowArrayStream {
|
14
|
+
stream: ffi::ArrowArrayStream,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RbArrowArrayStream {
|
18
|
+
pub fn to_i(&self) -> usize {
|
19
|
+
(&self.stream as *const _) as usize
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
|
24
|
+
let iter = Box::new(DataFrameStreamIterator::new(df));
|
25
|
+
let field = iter.field();
|
26
|
+
let stream = ffi::export_iterator(iter, field);
|
27
|
+
Ok(RbArrowArrayStream { stream }.into_value())
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct DataFrameStreamIterator {
|
31
|
+
columns: Vec<Series>,
|
32
|
+
dtype: ArrowDataType,
|
33
|
+
idx: usize,
|
34
|
+
n_chunks: usize,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl DataFrameStreamIterator {
|
38
|
+
fn new(df: &DataFrame) -> Self {
|
39
|
+
let schema = df.schema().to_arrow(CompatLevel::newest());
|
40
|
+
let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
|
41
|
+
|
42
|
+
Self {
|
43
|
+
columns: df
|
44
|
+
.get_columns()
|
45
|
+
.iter()
|
46
|
+
.map(|v| v.as_materialized_series().clone())
|
47
|
+
.collect(),
|
48
|
+
dtype,
|
49
|
+
idx: 0,
|
50
|
+
n_chunks: df.first_col_n_chunks(),
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
fn field(&self) -> ArrowField {
|
55
|
+
ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
impl Iterator for DataFrameStreamIterator {
|
60
|
+
type Item = PolarsResult<ArrayRef>;
|
61
|
+
|
62
|
+
fn next(&mut self) -> Option<Self::Item> {
|
63
|
+
if self.idx >= self.n_chunks {
|
64
|
+
None
|
65
|
+
} else {
|
66
|
+
// create a batch of the columns with the same chunk no.
|
67
|
+
let batch_cols = self
|
68
|
+
.columns
|
69
|
+
.iter()
|
70
|
+
.map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
|
71
|
+
.collect::<Vec<_>>();
|
72
|
+
self.idx += 1;
|
73
|
+
|
74
|
+
let array = arrow::array::StructArray::new(
|
75
|
+
self.dtype.clone(),
|
76
|
+
batch_cols[0].len(),
|
77
|
+
batch_cols,
|
78
|
+
None,
|
79
|
+
);
|
80
|
+
Some(Ok(Box::new(array)))
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|