polars-df 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +1 -1
- data/ext/polars/Cargo.toml +1 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +105 -5
- data/ext/polars/src/dataframe.rs +132 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +132 -0
- data/ext/polars/src/lazy/dsl.rs +38 -0
- data/ext/polars/src/lazy/meta.rs +1 -1
- data/ext/polars/src/lazy/mod.rs +1 -0
- data/ext/polars/src/lib.rs +77 -3
- data/ext/polars/src/series.rs +8 -9
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/data_frame.rb +585 -19
- data/lib/polars/expr.rb +17 -2
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +156 -2
- data/lib/polars/lazy_functions.rb +154 -11
- data/lib/polars/series.rb +806 -18
- data/lib/polars/utils.rb +33 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 510c7a761553fb3a49919add842d520da1518c5df6dd37f93af338e928c7a207
|
4
|
+
data.tar.gz: 3e724d3c2553bb6b4a587f056a04977990b85103ea9ae231166746407e1c0b1e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59391ecd7a3d14372ad24693240ebeae4d83c59d55b37876fc6f216dd576897113607f7b5f6f6a3d7a5d75f96def72b1f2659f3de71e6a975a2753719879fa33
|
7
|
+
data.tar.gz: 5a2e8cdf3aed01f16823be62067b0ec01e4e0dc3bfa4dc27929dfbe42d7c92e62f6e7a09bc78533d220a3bfc9890f9f0fb069482a338fbd0b55603aa42fddc57
|
data/CHANGELOG.md
CHANGED
data/Cargo.lock
CHANGED
data/ext/polars/Cargo.toml
CHANGED
@@ -0,0 +1,120 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::io::mmap::MmapBytesReader;
|
3
|
+
use polars::io::RowCount;
|
4
|
+
use polars::prelude::read_impl::OwnedBatchedCsvReader;
|
5
|
+
use polars::prelude::*;
|
6
|
+
use std::cell::RefCell;
|
7
|
+
use std::path::PathBuf;
|
8
|
+
|
9
|
+
use crate::conversion::*;
|
10
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult};
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "Polars::RbBatchedCsv")]
|
13
|
+
pub struct RbBatchedCsv {
|
14
|
+
pub reader: RefCell<OwnedBatchedCsvReader>,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RbBatchedCsv {
|
18
|
+
pub fn new(arguments: &[Value]) -> RbResult<Self> {
|
19
|
+
// start arguments
|
20
|
+
// this pattern is needed for more than 16
|
21
|
+
let infer_schema_length: Option<usize> = arguments[0].try_convert()?;
|
22
|
+
let chunk_size: usize = arguments[1].try_convert()?;
|
23
|
+
let has_header: bool = arguments[2].try_convert()?;
|
24
|
+
let ignore_errors: bool = arguments[3].try_convert()?;
|
25
|
+
let n_rows: Option<usize> = arguments[4].try_convert()?;
|
26
|
+
let skip_rows: usize = arguments[5].try_convert()?;
|
27
|
+
let projection: Option<Vec<usize>> = arguments[6].try_convert()?;
|
28
|
+
let sep: String = arguments[7].try_convert()?;
|
29
|
+
let rechunk: bool = arguments[8].try_convert()?;
|
30
|
+
let columns: Option<Vec<String>> = arguments[9].try_convert()?;
|
31
|
+
let encoding: Wrap<CsvEncoding> = arguments[10].try_convert()?;
|
32
|
+
let n_threads: Option<usize> = arguments[11].try_convert()?;
|
33
|
+
let path: PathBuf = arguments[12].try_convert()?;
|
34
|
+
let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[13].try_convert()?;
|
35
|
+
// TODO fix
|
36
|
+
let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[14].try_convert()?;
|
37
|
+
let low_memory: bool = arguments[15].try_convert()?;
|
38
|
+
let comment_char: Option<String> = arguments[16].try_convert()?;
|
39
|
+
let quote_char: Option<String> = arguments[17].try_convert()?;
|
40
|
+
let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
|
41
|
+
let parse_dates: bool = arguments[19].try_convert()?;
|
42
|
+
let skip_rows_after_header: usize = arguments[20].try_convert()?;
|
43
|
+
let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
|
44
|
+
let sample_size: usize = arguments[22].try_convert()?;
|
45
|
+
let eol_char: String = arguments[23].try_convert()?;
|
46
|
+
// end arguments
|
47
|
+
|
48
|
+
let null_values = null_values.map(|w| w.0);
|
49
|
+
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
50
|
+
let eol_char = eol_char.as_bytes()[0];
|
51
|
+
|
52
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
53
|
+
|
54
|
+
let quote_char = if let Some(s) = quote_char {
|
55
|
+
if s.is_empty() {
|
56
|
+
None
|
57
|
+
} else {
|
58
|
+
Some(s.as_bytes()[0])
|
59
|
+
}
|
60
|
+
} else {
|
61
|
+
None
|
62
|
+
};
|
63
|
+
|
64
|
+
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
65
|
+
let fields = overwrite_dtype.iter().map(|(name, dtype)| {
|
66
|
+
let dtype = dtype.0.clone();
|
67
|
+
Field::new(name, dtype)
|
68
|
+
});
|
69
|
+
Schema::from(fields)
|
70
|
+
});
|
71
|
+
|
72
|
+
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
73
|
+
overwrite_dtype
|
74
|
+
.iter()
|
75
|
+
.map(|dt| dt.0.clone())
|
76
|
+
.collect::<Vec<_>>()
|
77
|
+
});
|
78
|
+
|
79
|
+
let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
|
80
|
+
let reader = Box::new(file) as Box<dyn MmapBytesReader>;
|
81
|
+
let reader = CsvReader::new(reader)
|
82
|
+
.infer_schema(infer_schema_length)
|
83
|
+
.has_header(has_header)
|
84
|
+
.with_n_rows(n_rows)
|
85
|
+
.with_delimiter(sep.as_bytes()[0])
|
86
|
+
.with_skip_rows(skip_rows)
|
87
|
+
.with_ignore_parser_errors(ignore_errors)
|
88
|
+
.with_projection(projection)
|
89
|
+
.with_rechunk(rechunk)
|
90
|
+
.with_chunk_size(chunk_size)
|
91
|
+
.with_encoding(encoding.0)
|
92
|
+
.with_columns(columns)
|
93
|
+
.with_n_threads(n_threads)
|
94
|
+
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
95
|
+
.low_memory(low_memory)
|
96
|
+
.with_comment_char(comment_char)
|
97
|
+
.with_null_values(null_values)
|
98
|
+
.with_parse_dates(parse_dates)
|
99
|
+
.with_quote_char(quote_char)
|
100
|
+
.with_end_of_line_char(eol_char)
|
101
|
+
.with_skip_rows_after_header(skip_rows_after_header)
|
102
|
+
.with_row_count(row_count)
|
103
|
+
.sample_size(sample_size)
|
104
|
+
.batched(overwrite_dtype.map(Arc::new))
|
105
|
+
.map_err(RbPolarsErr::from)?;
|
106
|
+
|
107
|
+
Ok(RbBatchedCsv {
|
108
|
+
reader: RefCell::new(reader),
|
109
|
+
})
|
110
|
+
}
|
111
|
+
|
112
|
+
pub fn next_batches(&self, n: usize) -> RbResult<Option<Vec<RbDataFrame>>> {
|
113
|
+
let batches = self
|
114
|
+
.reader
|
115
|
+
.borrow_mut()
|
116
|
+
.next_batches(n)
|
117
|
+
.map_err(RbPolarsErr::from)?;
|
118
|
+
Ok(batches.map(|batches| batches.into_iter().map(|out| out.1.into()).collect()))
|
119
|
+
}
|
120
|
+
}
|
@@ -1,11 +1,11 @@
|
|
1
|
-
use magnus::{TryConvert, Value, QNIL};
|
1
|
+
use magnus::{RArray, Symbol, TryConvert, Value, QNIL};
|
2
2
|
use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
|
3
3
|
use polars::datatypes::AnyValue;
|
4
4
|
use polars::frame::DataFrame;
|
5
5
|
use polars::prelude::*;
|
6
6
|
use polars::series::ops::NullBehavior;
|
7
7
|
|
8
|
-
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
8
|
+
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
|
9
9
|
|
10
10
|
pub struct Wrap<T>(pub T);
|
11
11
|
|
@@ -15,14 +15,57 @@ impl<T> From<T> for Wrap<T> {
|
|
15
15
|
}
|
16
16
|
}
|
17
17
|
|
18
|
+
pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
|
19
|
+
let seq: RArray = obj.try_convert()?;
|
20
|
+
let len = seq.len();
|
21
|
+
Ok((seq, len))
|
22
|
+
}
|
23
|
+
|
18
24
|
pub fn get_df(obj: Value) -> RbResult<DataFrame> {
|
19
25
|
let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?;
|
20
26
|
Ok(rbdf.df.borrow().clone())
|
21
27
|
}
|
22
28
|
|
23
|
-
|
24
|
-
|
25
|
-
|
29
|
+
pub fn get_series(obj: Value) -> RbResult<Series> {
|
30
|
+
let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
31
|
+
Ok(rbs.series.borrow().clone())
|
32
|
+
}
|
33
|
+
|
34
|
+
impl TryConvert for Wrap<Utf8Chunked> {
|
35
|
+
fn try_convert(obj: Value) -> RbResult<Self> {
|
36
|
+
let (seq, len) = get_rbseq(obj)?;
|
37
|
+
let mut builder = Utf8ChunkedBuilder::new("", len, len * 25);
|
38
|
+
|
39
|
+
for res in seq.each() {
|
40
|
+
let item = res?;
|
41
|
+
match item.try_convert::<String>() {
|
42
|
+
Ok(val) => builder.append_value(&val),
|
43
|
+
Err(_) => builder.append_null(),
|
44
|
+
}
|
45
|
+
}
|
46
|
+
Ok(Wrap(builder.finish()))
|
47
|
+
}
|
48
|
+
}
|
49
|
+
|
50
|
+
impl TryConvert for Wrap<NullValues> {
|
51
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
52
|
+
if let Ok(s) = ob.try_convert::<String>() {
|
53
|
+
Ok(Wrap(NullValues::AllColumnsSingle(s)))
|
54
|
+
} else if let Ok(s) = ob.try_convert::<Vec<String>>() {
|
55
|
+
Ok(Wrap(NullValues::AllColumns(s)))
|
56
|
+
} else if let Ok(s) = ob.try_convert::<Vec<(String, String)>>() {
|
57
|
+
Ok(Wrap(NullValues::Named(s)))
|
58
|
+
} else {
|
59
|
+
Err(RbPolarsErr::other(
|
60
|
+
"could not extract value from null_values argument".into(),
|
61
|
+
))
|
62
|
+
}
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
impl From<Wrap<AnyValue<'_>>> for Value {
|
67
|
+
fn from(w: Wrap<AnyValue<'_>>) -> Self {
|
68
|
+
match w.0 {
|
26
69
|
AnyValue::UInt8(v) => Value::from(v),
|
27
70
|
AnyValue::UInt16(v) => Value::from(v),
|
28
71
|
AnyValue::UInt32(v) => Value::from(v),
|
@@ -41,6 +84,12 @@ impl Into<Value> for Wrap<AnyValue<'_>> {
|
|
41
84
|
}
|
42
85
|
}
|
43
86
|
|
87
|
+
impl From<Wrap<DataType>> for Value {
|
88
|
+
fn from(w: Wrap<DataType>) -> Self {
|
89
|
+
Symbol::from(w.0.to_string()).into()
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
44
93
|
impl TryConvert for Wrap<DataType> {
|
45
94
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
46
95
|
let dtype = match ob.try_convert::<String>()?.as_str() {
|
@@ -118,6 +167,39 @@ impl TryConvert for Wrap<ClosedWindow> {
|
|
118
167
|
}
|
119
168
|
}
|
120
169
|
|
170
|
+
impl TryConvert for Wrap<CsvEncoding> {
|
171
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
172
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
173
|
+
"utf8" => CsvEncoding::Utf8,
|
174
|
+
"utf8-lossy" => CsvEncoding::LossyUtf8,
|
175
|
+
v => {
|
176
|
+
return Err(RbValueError::new_err(format!(
|
177
|
+
"encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}",
|
178
|
+
v
|
179
|
+
)))
|
180
|
+
}
|
181
|
+
};
|
182
|
+
Ok(Wrap(parsed))
|
183
|
+
}
|
184
|
+
}
|
185
|
+
|
186
|
+
impl TryConvert for Wrap<Option<IpcCompression>> {
|
187
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
188
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
189
|
+
"uncompressed" => None,
|
190
|
+
"lz4" => Some(IpcCompression::LZ4),
|
191
|
+
"zstd" => Some(IpcCompression::ZSTD),
|
192
|
+
v => {
|
193
|
+
return Err(RbValueError::new_err(format!(
|
194
|
+
"compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}",
|
195
|
+
v
|
196
|
+
)))
|
197
|
+
}
|
198
|
+
};
|
199
|
+
Ok(Wrap(parsed))
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
121
203
|
impl TryConvert for Wrap<JoinType> {
|
122
204
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
123
205
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
@@ -171,6 +253,24 @@ impl TryConvert for Wrap<NullStrategy> {
|
|
171
253
|
}
|
172
254
|
}
|
173
255
|
|
256
|
+
impl TryConvert for Wrap<ParallelStrategy> {
|
257
|
+
fn try_convert(ob: Value) -> RbResult<Self> {
|
258
|
+
let parsed = match ob.try_convert::<String>()?.as_str() {
|
259
|
+
"auto" => ParallelStrategy::Auto,
|
260
|
+
"columns" => ParallelStrategy::Columns,
|
261
|
+
"row_groups" => ParallelStrategy::RowGroups,
|
262
|
+
"none" => ParallelStrategy::None,
|
263
|
+
v => {
|
264
|
+
return Err(RbValueError::new_err(format!(
|
265
|
+
"parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}",
|
266
|
+
v
|
267
|
+
)))
|
268
|
+
}
|
269
|
+
};
|
270
|
+
Ok(Wrap(parsed))
|
271
|
+
}
|
272
|
+
}
|
273
|
+
|
174
274
|
impl TryConvert for Wrap<QuantileInterpolOptions> {
|
175
275
|
fn try_convert(ob: Value) -> RbResult<Self> {
|
176
276
|
let parsed = match ob.try_convert::<String>()?.as_str() {
|
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, Error, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
|
+
use polars::io::RowCount;
|
3
4
|
use polars::prelude::*;
|
4
5
|
use std::cell::RefCell;
|
5
6
|
use std::fs::File;
|
@@ -43,10 +44,94 @@ impl RbDataFrame {
|
|
43
44
|
self.df.borrow().estimated_size()
|
44
45
|
}
|
45
46
|
|
46
|
-
pub fn read_csv(
|
47
|
+
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
48
|
+
// start arguments
|
49
|
+
// this pattern is needed for more than 16
|
50
|
+
let rb_f: Value = arguments[0].try_convert()?;
|
51
|
+
let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
|
52
|
+
let chunk_size: usize = arguments[2].try_convert()?;
|
53
|
+
let has_header: bool = arguments[3].try_convert()?;
|
54
|
+
let ignore_errors: bool = arguments[4].try_convert()?;
|
55
|
+
let n_rows: Option<usize> = arguments[5].try_convert()?;
|
56
|
+
let skip_rows: usize = arguments[6].try_convert()?;
|
57
|
+
let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
|
58
|
+
let sep: String = arguments[8].try_convert()?;
|
59
|
+
let rechunk: bool = arguments[9].try_convert()?;
|
60
|
+
let columns: Option<Vec<String>> = arguments[10].try_convert()?;
|
61
|
+
let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
|
62
|
+
let n_threads: Option<usize> = arguments[12].try_convert()?;
|
63
|
+
let path: Option<String> = arguments[13].try_convert()?;
|
64
|
+
let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
|
65
|
+
// TODO fix
|
66
|
+
let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
|
67
|
+
let low_memory: bool = arguments[16].try_convert()?;
|
68
|
+
let comment_char: Option<String> = arguments[17].try_convert()?;
|
69
|
+
let quote_char: Option<String> = arguments[18].try_convert()?;
|
70
|
+
let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
|
71
|
+
let parse_dates: bool = arguments[20].try_convert()?;
|
72
|
+
let skip_rows_after_header: usize = arguments[21].try_convert()?;
|
73
|
+
let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
|
74
|
+
let sample_size: usize = arguments[23].try_convert()?;
|
75
|
+
let eol_char: String = arguments[24].try_convert()?;
|
76
|
+
// end arguments
|
77
|
+
|
78
|
+
let null_values = null_values.map(|w| w.0);
|
79
|
+
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
80
|
+
let eol_char = eol_char.as_bytes()[0];
|
81
|
+
|
82
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
83
|
+
|
84
|
+
let quote_char = if let Some(s) = quote_char {
|
85
|
+
if s.is_empty() {
|
86
|
+
None
|
87
|
+
} else {
|
88
|
+
Some(s.as_bytes()[0])
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
None
|
92
|
+
};
|
93
|
+
|
94
|
+
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
95
|
+
let fields = overwrite_dtype.iter().map(|(name, dtype)| {
|
96
|
+
let dtype = dtype.0.clone();
|
97
|
+
Field::new(name, dtype)
|
98
|
+
});
|
99
|
+
Schema::from(fields)
|
100
|
+
});
|
101
|
+
|
102
|
+
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
103
|
+
overwrite_dtype
|
104
|
+
.iter()
|
105
|
+
.map(|dt| dt.0.clone())
|
106
|
+
.collect::<Vec<_>>()
|
107
|
+
});
|
108
|
+
|
47
109
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
48
110
|
let df = CsvReader::new(mmap_bytes_r)
|
111
|
+
.infer_schema(infer_schema_length)
|
49
112
|
.has_header(has_header)
|
113
|
+
.with_n_rows(n_rows)
|
114
|
+
.with_delimiter(sep.as_bytes()[0])
|
115
|
+
.with_skip_rows(skip_rows)
|
116
|
+
.with_ignore_parser_errors(ignore_errors)
|
117
|
+
.with_projection(projection)
|
118
|
+
.with_rechunk(rechunk)
|
119
|
+
.with_chunk_size(chunk_size)
|
120
|
+
.with_encoding(encoding.0)
|
121
|
+
.with_columns(columns)
|
122
|
+
.with_n_threads(n_threads)
|
123
|
+
.with_path(path)
|
124
|
+
.with_dtypes(overwrite_dtype.as_ref())
|
125
|
+
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
126
|
+
.low_memory(low_memory)
|
127
|
+
.with_comment_char(comment_char)
|
128
|
+
.with_null_values(null_values)
|
129
|
+
.with_parse_dates(parse_dates)
|
130
|
+
.with_quote_char(quote_char)
|
131
|
+
.with_end_of_line_char(eol_char)
|
132
|
+
.with_skip_rows_after_header(skip_rows_after_header)
|
133
|
+
.with_row_count(row_count)
|
134
|
+
.sample_size(sample_size)
|
50
135
|
.finish()
|
51
136
|
.map_err(RbPolarsErr::from)?;
|
52
137
|
Ok(df.into())
|
@@ -61,6 +146,27 @@ impl RbDataFrame {
|
|
61
146
|
.map(|v| v.into())
|
62
147
|
}
|
63
148
|
|
149
|
+
pub fn read_ipc(
|
150
|
+
rb_f: Value,
|
151
|
+
columns: Option<Vec<String>>,
|
152
|
+
projection: Option<Vec<usize>>,
|
153
|
+
n_rows: Option<usize>,
|
154
|
+
row_count: Option<(String, IdxSize)>,
|
155
|
+
memory_map: bool,
|
156
|
+
) -> RbResult<Self> {
|
157
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
158
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
159
|
+
let df = IpcReader::new(mmap_bytes_r)
|
160
|
+
.with_projection(projection)
|
161
|
+
.with_columns(columns)
|
162
|
+
.with_n_rows(n_rows)
|
163
|
+
.with_row_count(row_count)
|
164
|
+
.memory_mapped(memory_map)
|
165
|
+
.finish()
|
166
|
+
.map_err(RbPolarsErr::from)?;
|
167
|
+
Ok(RbDataFrame::new(df))
|
168
|
+
}
|
169
|
+
|
64
170
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
65
171
|
// memmap the file first
|
66
172
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -185,6 +291,28 @@ impl RbDataFrame {
|
|
185
291
|
Ok(())
|
186
292
|
}
|
187
293
|
|
294
|
+
pub fn write_ipc(
|
295
|
+
&self,
|
296
|
+
rb_f: Value,
|
297
|
+
compression: Wrap<Option<IpcCompression>>,
|
298
|
+
) -> RbResult<()> {
|
299
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
300
|
+
let f = std::fs::File::create(&s).unwrap();
|
301
|
+
IpcWriter::new(f)
|
302
|
+
.with_compression(compression.0)
|
303
|
+
.finish(&mut self.df.borrow_mut())
|
304
|
+
.map_err(RbPolarsErr::from)?;
|
305
|
+
} else {
|
306
|
+
let mut buf = get_file_like(rb_f, true)?;
|
307
|
+
|
308
|
+
IpcWriter::new(&mut buf)
|
309
|
+
.with_compression(compression.0)
|
310
|
+
.finish(&mut self.df.borrow_mut())
|
311
|
+
.map_err(RbPolarsErr::from)?;
|
312
|
+
}
|
313
|
+
Ok(())
|
314
|
+
}
|
315
|
+
|
188
316
|
pub fn write_parquet(
|
189
317
|
&self,
|
190
318
|
rb_f: Value,
|
@@ -240,11 +368,11 @@ impl RbDataFrame {
|
|
240
368
|
Ok(())
|
241
369
|
}
|
242
370
|
|
243
|
-
pub fn dtypes(&self) -> Vec<
|
371
|
+
pub fn dtypes(&self) -> Vec<Value> {
|
244
372
|
self.df
|
245
373
|
.borrow()
|
246
374
|
.iter()
|
247
|
-
.map(|s| s.dtype().
|
375
|
+
.map(|s| Wrap(s.dtype().clone()).into())
|
248
376
|
.collect()
|
249
377
|
}
|
250
378
|
|
@@ -418,7 +546,7 @@ impl RbDataFrame {
|
|
418
546
|
self.df.borrow().partition_by(groups)
|
419
547
|
}
|
420
548
|
.map_err(RbPolarsErr::from)?;
|
421
|
-
Ok(out.into_iter().map(
|
549
|
+
Ok(out.into_iter().map(RbDataFrame::new).collect())
|
422
550
|
}
|
423
551
|
|
424
552
|
pub fn shift(&self, periods: i64) -> Self {
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::exception::arg_error;
|
2
2
|
use magnus::Error;
|
3
|
+
use polars::error::ArrowError;
|
3
4
|
use polars::prelude::PolarsError;
|
4
5
|
|
5
6
|
pub struct RbPolarsErr {}
|
@@ -10,6 +11,14 @@ impl RbPolarsErr {
|
|
10
11
|
Error::runtime_error(e.to_string())
|
11
12
|
}
|
12
13
|
|
14
|
+
pub fn arrow(e: ArrowError) -> Error {
|
15
|
+
Error::runtime_error(e.to_string())
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn io(e: std::io::Error) -> Error {
|
19
|
+
Error::runtime_error(e.to_string())
|
20
|
+
}
|
21
|
+
|
13
22
|
pub fn other(message: String) -> Error {
|
14
23
|
Error::runtime_error(message)
|
15
24
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
use magnus::{Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
|
-
use std::fs::
|
3
|
+
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
5
5
|
use std::path::PathBuf;
|
6
6
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
let str_slice = f.try_convert::<PathBuf>()?;
|
11
|
+
let f = if truncate {
|
12
|
+
File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
13
|
+
} else {
|
14
|
+
File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
15
|
+
};
|
16
|
+
Ok(f)
|
16
17
|
}
|
17
18
|
|
18
19
|
pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
|