polars-df 0.14.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,83 @@
|
|
1
|
+
use arrow::datatypes::ArrowDataType;
|
2
|
+
use arrow::ffi;
|
3
|
+
use magnus::{IntoValue, Value};
|
4
|
+
use polars::datatypes::CompatLevel;
|
5
|
+
use polars::frame::DataFrame;
|
6
|
+
use polars::prelude::{ArrayRef, ArrowField, PlSmallStr, PolarsResult, SchemaExt};
|
7
|
+
use polars::series::Series;
|
8
|
+
use polars_core::utils::arrow;
|
9
|
+
|
10
|
+
use crate::RbResult;
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::ArrowArrayStream")]
|
13
|
+
pub struct RbArrowArrayStream {
|
14
|
+
stream: ffi::ArrowArrayStream,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RbArrowArrayStream {
|
18
|
+
pub fn to_i(&self) -> usize {
|
19
|
+
(&self.stream as *const _) as usize
|
20
|
+
}
|
21
|
+
}
|
22
|
+
|
23
|
+
pub(crate) fn dataframe_to_stream(df: &DataFrame) -> RbResult<Value> {
|
24
|
+
let iter = Box::new(DataFrameStreamIterator::new(df));
|
25
|
+
let field = iter.field();
|
26
|
+
let stream = ffi::export_iterator(iter, field);
|
27
|
+
Ok(RbArrowArrayStream { stream }.into_value())
|
28
|
+
}
|
29
|
+
|
30
|
+
pub struct DataFrameStreamIterator {
|
31
|
+
columns: Vec<Series>,
|
32
|
+
dtype: ArrowDataType,
|
33
|
+
idx: usize,
|
34
|
+
n_chunks: usize,
|
35
|
+
}
|
36
|
+
|
37
|
+
impl DataFrameStreamIterator {
|
38
|
+
fn new(df: &DataFrame) -> Self {
|
39
|
+
let schema = df.schema().to_arrow(CompatLevel::newest());
|
40
|
+
let dtype = ArrowDataType::Struct(schema.into_iter_values().collect());
|
41
|
+
|
42
|
+
Self {
|
43
|
+
columns: df
|
44
|
+
.get_columns()
|
45
|
+
.iter()
|
46
|
+
.map(|v| v.as_materialized_series().clone())
|
47
|
+
.collect(),
|
48
|
+
dtype,
|
49
|
+
idx: 0,
|
50
|
+
n_chunks: df.first_col_n_chunks(),
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
fn field(&self) -> ArrowField {
|
55
|
+
ArrowField::new(PlSmallStr::EMPTY, self.dtype.clone(), false)
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
impl Iterator for DataFrameStreamIterator {
|
60
|
+
type Item = PolarsResult<ArrayRef>;
|
61
|
+
|
62
|
+
fn next(&mut self) -> Option<Self::Item> {
|
63
|
+
if self.idx >= self.n_chunks {
|
64
|
+
None
|
65
|
+
} else {
|
66
|
+
// create a batch of the columns with the same chunk no.
|
67
|
+
let batch_cols = self
|
68
|
+
.columns
|
69
|
+
.iter()
|
70
|
+
.map(|s| s.to_arrow(self.idx, CompatLevel::newest()))
|
71
|
+
.collect::<Vec<_>>();
|
72
|
+
self.idx += 1;
|
73
|
+
|
74
|
+
let array = arrow::array::StructArray::new(
|
75
|
+
self.dtype.clone(),
|
76
|
+
batch_cols[0].len(),
|
77
|
+
batch_cols,
|
78
|
+
None,
|
79
|
+
);
|
80
|
+
Some(Ok(Box::new(array)))
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|