polars-df 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -0
- data/Cargo.lock +392 -351
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +12 -7
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/any_value.rs +261 -0
- data/ext/polars/src/conversion/chunked_array.rs +4 -4
- data/ext/polars/src/conversion/mod.rs +60 -66
- data/ext/polars/src/dataframe/construction.rs +184 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +597 -0
- data/ext/polars/src/dataframe/io.rs +473 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/error.rs +26 -4
- data/ext/polars/src/expr/categorical.rs +0 -10
- data/ext/polars/src/expr/datetime.rs +4 -8
- data/ext/polars/src/expr/general.rs +129 -94
- data/ext/polars/src/expr/mod.rs +2 -2
- data/ext/polars/src/expr/rolling.rs +201 -77
- data/ext/polars/src/expr/string.rs +11 -36
- data/ext/polars/src/functions/eager.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +23 -21
- data/ext/polars/src/functions/range.rs +69 -1
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +61 -0
- data/ext/polars/src/lazyframe/mod.rs +135 -136
- data/ext/polars/src/lib.rs +94 -59
- data/ext/polars/src/map/dataframe.rs +2 -2
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/rb_modules.rs +25 -1
- data/ext/polars/src/series/aggregation.rs +49 -30
- data/ext/polars/src/series/arithmetic.rs +21 -11
- data/ext/polars/src/series/construction.rs +56 -38
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +32 -141
- data/ext/polars/src/sql.rs +3 -1
- data/lib/polars/array_expr.rb +4 -4
- data/lib/polars/batched_csv_reader.rb +11 -5
- data/lib/polars/cat_expr.rb +0 -36
- data/lib/polars/cat_name_space.rb +0 -37
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +176 -403
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/date_time_expr.rb +525 -572
- data/lib/polars/date_time_name_space.rb +263 -460
- data/lib/polars/dynamic_group_by.rb +5 -5
- data/lib/polars/exceptions.rb +7 -0
- data/lib/polars/expr.rb +1394 -243
- data/lib/polars/expr_dispatch.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +8 -8
- data/lib/polars/functions/as_datatype.rb +63 -40
- data/lib/polars/functions/lazy.rb +63 -14
- data/lib/polars/functions/lit.rb +1 -1
- data/lib/polars/functions/range/date_range.rb +90 -57
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/int_range.rb +2 -2
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/functions/repeat.rb +1 -1
- data/lib/polars/functions/whenthen.rb +1 -1
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +299 -493
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +29 -0
- data/lib/polars/io/ndjson.rb +80 -0
- data/lib/polars/io/parquet.rb +227 -0
- data/lib/polars/lazy_frame.rb +143 -272
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/list_expr.rb +11 -11
- data/lib/polars/list_name_space.rb +5 -1
- data/lib/polars/rolling_group_by.rb +7 -9
- data/lib/polars/series.rb +103 -187
- data/lib/polars/string_expr.rb +78 -102
- data/lib/polars/string_name_space.rb +5 -4
- data/lib/polars/testing.rb +2 -2
- data/lib/polars/utils/constants.rb +9 -0
- data/lib/polars/utils/convert.rb +97 -0
- data/lib/polars/utils/parse.rb +89 -0
- data/lib/polars/utils/various.rb +76 -0
- data/lib/polars/utils/wrap.rb +19 -0
- data/lib/polars/utils.rb +8 -300
- data/lib/polars/version.rb +1 -1
- data/lib/polars/whenthen.rb +6 -6
- data/lib/polars.rb +20 -1
- metadata +28 -7
- data/ext/polars/src/conversion/anyvalue.rs +0 -186
- data/ext/polars/src/dataframe.rs +0 -1208
@@ -0,0 +1,473 @@
|
|
1
|
+
use magnus::{prelude::*, RString, Value};
|
2
|
+
use polars::io::avro::AvroCompression;
|
3
|
+
use polars::io::RowIndex;
|
4
|
+
use polars::prelude::*;
|
5
|
+
use std::io::{BufWriter, Cursor};
|
6
|
+
use std::num::NonZeroUsize;
|
7
|
+
|
8
|
+
use super::*;
|
9
|
+
use crate::conversion::*;
|
10
|
+
use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
|
11
|
+
use crate::{RbPolarsErr, RbResult};
|
12
|
+
|
13
|
+
impl RbDataFrame {
|
14
|
+
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
15
|
+
// start arguments
|
16
|
+
// this pattern is needed for more than 16
|
17
|
+
let rb_f = arguments[0];
|
18
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
|
19
|
+
let chunk_size = usize::try_convert(arguments[2])?;
|
20
|
+
let has_header = bool::try_convert(arguments[3])?;
|
21
|
+
let ignore_errors = bool::try_convert(arguments[4])?;
|
22
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
23
|
+
let skip_rows = usize::try_convert(arguments[6])?;
|
24
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
|
25
|
+
let separator = String::try_convert(arguments[8])?;
|
26
|
+
let rechunk = bool::try_convert(arguments[9])?;
|
27
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
|
28
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
|
29
|
+
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
30
|
+
let path = Option::<String>::try_convert(arguments[13])?;
|
31
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
32
|
+
// TODO fix
|
33
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
34
|
+
let low_memory = bool::try_convert(arguments[16])?;
|
35
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
36
|
+
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
37
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
38
|
+
let missing_utf8_is_empty_string = bool::try_convert(arguments[20])?;
|
39
|
+
let try_parse_dates = bool::try_convert(arguments[21])?;
|
40
|
+
let skip_rows_after_header = usize::try_convert(arguments[22])?;
|
41
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[23])?;
|
42
|
+
let sample_size = usize::try_convert(arguments[24])?;
|
43
|
+
let eol_char = String::try_convert(arguments[25])?;
|
44
|
+
let raise_if_empty = bool::try_convert(arguments[26])?;
|
45
|
+
let truncate_ragged_lines = bool::try_convert(arguments[27])?;
|
46
|
+
let decimal_comma = bool::try_convert(arguments[28])?;
|
47
|
+
let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
|
48
|
+
// end arguments
|
49
|
+
|
50
|
+
let null_values = null_values.map(|w| w.0);
|
51
|
+
let eol_char = eol_char.as_bytes()[0];
|
52
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
53
|
+
name: Arc::from(name.as_str()),
|
54
|
+
offset,
|
55
|
+
});
|
56
|
+
let quote_char = if let Some(s) = quote_char {
|
57
|
+
if s.is_empty() {
|
58
|
+
None
|
59
|
+
} else {
|
60
|
+
Some(s.as_bytes()[0])
|
61
|
+
}
|
62
|
+
} else {
|
63
|
+
None
|
64
|
+
};
|
65
|
+
|
66
|
+
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
67
|
+
overwrite_dtype
|
68
|
+
.iter()
|
69
|
+
.map(|(name, dtype)| {
|
70
|
+
let dtype = dtype.0.clone();
|
71
|
+
Field::new(name, dtype)
|
72
|
+
})
|
73
|
+
.collect::<Schema>()
|
74
|
+
});
|
75
|
+
|
76
|
+
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
77
|
+
overwrite_dtype
|
78
|
+
.iter()
|
79
|
+
.map(|dt| dt.0.clone())
|
80
|
+
.collect::<Vec<_>>()
|
81
|
+
});
|
82
|
+
|
83
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
84
|
+
let df = CsvReadOptions::default()
|
85
|
+
.with_path(path)
|
86
|
+
.with_infer_schema_length(infer_schema_length)
|
87
|
+
.with_has_header(has_header)
|
88
|
+
.with_n_rows(n_rows)
|
89
|
+
.with_skip_rows(skip_rows)
|
90
|
+
.with_ignore_errors(ignore_errors)
|
91
|
+
.with_projection(projection.map(Arc::new))
|
92
|
+
.with_rechunk(rechunk)
|
93
|
+
.with_chunk_size(chunk_size)
|
94
|
+
.with_columns(columns.map(Arc::from))
|
95
|
+
.with_n_threads(n_threads)
|
96
|
+
.with_schema_overwrite(overwrite_dtype.map(Arc::new))
|
97
|
+
.with_dtype_overwrite(overwrite_dtype_slice.map(Arc::new))
|
98
|
+
.with_schema(schema.map(|schema| Arc::new(schema.0)))
|
99
|
+
.with_low_memory(low_memory)
|
100
|
+
.with_skip_rows_after_header(skip_rows_after_header)
|
101
|
+
.with_row_index(row_index)
|
102
|
+
.with_sample_size(sample_size)
|
103
|
+
.with_raise_if_empty(raise_if_empty)
|
104
|
+
.with_parse_options(
|
105
|
+
CsvParseOptions::default()
|
106
|
+
.with_separator(separator.as_bytes()[0])
|
107
|
+
.with_encoding(encoding.0)
|
108
|
+
.with_missing_is_null(!missing_utf8_is_empty_string)
|
109
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
110
|
+
.with_null_values(null_values)
|
111
|
+
.with_try_parse_dates(try_parse_dates)
|
112
|
+
.with_quote_char(quote_char)
|
113
|
+
.with_eol_char(eol_char)
|
114
|
+
.with_truncate_ragged_lines(truncate_ragged_lines)
|
115
|
+
.with_decimal_comma(decimal_comma),
|
116
|
+
)
|
117
|
+
.into_reader_with_file_handle(mmap_bytes_r)
|
118
|
+
.finish()
|
119
|
+
.map_err(RbPolarsErr::from)?;
|
120
|
+
Ok(df.into())
|
121
|
+
}
|
122
|
+
|
123
|
+
#[allow(clippy::too_many_arguments)]
|
124
|
+
pub fn read_parquet(
|
125
|
+
rb_f: Value,
|
126
|
+
columns: Option<Vec<String>>,
|
127
|
+
projection: Option<Vec<usize>>,
|
128
|
+
n_rows: Option<usize>,
|
129
|
+
parallel: Wrap<ParallelStrategy>,
|
130
|
+
row_index: Option<(String, IdxSize)>,
|
131
|
+
low_memory: bool,
|
132
|
+
use_statistics: bool,
|
133
|
+
rechunk: bool,
|
134
|
+
) -> RbResult<Self> {
|
135
|
+
use EitherRustRubyFile::*;
|
136
|
+
|
137
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
138
|
+
name: Arc::from(name.as_str()),
|
139
|
+
offset,
|
140
|
+
});
|
141
|
+
let result = match get_either_file(rb_f, false)? {
|
142
|
+
Rb(f) => {
|
143
|
+
let buf = f.as_buffer();
|
144
|
+
ParquetReader::new(buf)
|
145
|
+
.with_projection(projection)
|
146
|
+
.with_columns(columns)
|
147
|
+
.read_parallel(parallel.0)
|
148
|
+
.with_n_rows(n_rows)
|
149
|
+
.with_row_index(row_index)
|
150
|
+
.set_low_memory(low_memory)
|
151
|
+
.use_statistics(use_statistics)
|
152
|
+
.set_rechunk(rechunk)
|
153
|
+
.finish()
|
154
|
+
}
|
155
|
+
Rust(f) => ParquetReader::new(f.into_inner())
|
156
|
+
.with_projection(projection)
|
157
|
+
.with_columns(columns)
|
158
|
+
.read_parallel(parallel.0)
|
159
|
+
.with_n_rows(n_rows)
|
160
|
+
.with_row_index(row_index)
|
161
|
+
.use_statistics(use_statistics)
|
162
|
+
.set_rechunk(rechunk)
|
163
|
+
.finish(),
|
164
|
+
};
|
165
|
+
let df = result.map_err(RbPolarsErr::from)?;
|
166
|
+
Ok(RbDataFrame::new(df))
|
167
|
+
}
|
168
|
+
|
169
|
+
pub fn read_json(
|
170
|
+
rb_f: Value,
|
171
|
+
infer_schema_length: Option<usize>,
|
172
|
+
schema: Option<Wrap<Schema>>,
|
173
|
+
schema_overrides: Option<Wrap<Schema>>,
|
174
|
+
) -> RbResult<Self> {
|
175
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
176
|
+
|
177
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
178
|
+
.with_json_format(JsonFormat::Json)
|
179
|
+
.infer_schema_len(infer_schema_length.and_then(NonZeroUsize::new));
|
180
|
+
|
181
|
+
if let Some(schema) = schema {
|
182
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
183
|
+
}
|
184
|
+
|
185
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
186
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
187
|
+
}
|
188
|
+
|
189
|
+
let out = builder.finish().map_err(RbPolarsErr::from)?;
|
190
|
+
Ok(out.into())
|
191
|
+
}
|
192
|
+
|
193
|
+
pub fn read_ndjson(
|
194
|
+
rb_f: Value,
|
195
|
+
ignore_errors: bool,
|
196
|
+
schema: Option<Wrap<Schema>>,
|
197
|
+
schema_overrides: Option<Wrap<Schema>>,
|
198
|
+
) -> RbResult<Self> {
|
199
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
200
|
+
|
201
|
+
let mut builder = JsonReader::new(mmap_bytes_r)
|
202
|
+
.with_json_format(JsonFormat::JsonLines)
|
203
|
+
.with_ignore_errors(ignore_errors);
|
204
|
+
|
205
|
+
if let Some(schema) = schema {
|
206
|
+
builder = builder.with_schema(Arc::new(schema.0));
|
207
|
+
}
|
208
|
+
|
209
|
+
if let Some(schema) = schema_overrides.as_ref() {
|
210
|
+
builder = builder.with_schema_overwrite(&schema.0);
|
211
|
+
}
|
212
|
+
|
213
|
+
let out = builder
|
214
|
+
.finish()
|
215
|
+
.map_err(|e| RbPolarsErr::other(format!("{e}")))?;
|
216
|
+
Ok(out.into())
|
217
|
+
}
|
218
|
+
|
219
|
+
pub fn read_ipc(
|
220
|
+
rb_f: Value,
|
221
|
+
columns: Option<Vec<String>>,
|
222
|
+
projection: Option<Vec<usize>>,
|
223
|
+
n_rows: Option<usize>,
|
224
|
+
row_index: Option<(String, IdxSize)>,
|
225
|
+
_memory_map: bool,
|
226
|
+
) -> RbResult<Self> {
|
227
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
228
|
+
name: Arc::from(name.as_str()),
|
229
|
+
offset,
|
230
|
+
});
|
231
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
232
|
+
|
233
|
+
// TODO fix
|
234
|
+
let mmap_path = None;
|
235
|
+
let df = IpcReader::new(mmap_bytes_r)
|
236
|
+
.with_projection(projection)
|
237
|
+
.with_columns(columns)
|
238
|
+
.with_n_rows(n_rows)
|
239
|
+
.with_row_index(row_index)
|
240
|
+
.memory_mapped(mmap_path)
|
241
|
+
.finish()
|
242
|
+
.map_err(RbPolarsErr::from)?;
|
243
|
+
Ok(RbDataFrame::new(df))
|
244
|
+
}
|
245
|
+
|
246
|
+
pub fn read_ipc_stream(
|
247
|
+
rb_f: Value,
|
248
|
+
columns: Option<Vec<String>>,
|
249
|
+
projection: Option<Vec<usize>>,
|
250
|
+
n_rows: Option<usize>,
|
251
|
+
row_index: Option<(String, IdxSize)>,
|
252
|
+
rechunk: bool,
|
253
|
+
) -> RbResult<Self> {
|
254
|
+
let row_index = row_index.map(|(name, offset)| RowIndex {
|
255
|
+
name: Arc::from(name.as_str()),
|
256
|
+
offset,
|
257
|
+
});
|
258
|
+
// rb_f = read_if_bytesio(rb_f);
|
259
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
260
|
+
let df = IpcStreamReader::new(mmap_bytes_r)
|
261
|
+
.with_projection(projection)
|
262
|
+
.with_columns(columns)
|
263
|
+
.with_n_rows(n_rows)
|
264
|
+
.with_row_index(row_index)
|
265
|
+
.set_rechunk(rechunk)
|
266
|
+
.finish()
|
267
|
+
.map_err(RbPolarsErr::from)?;
|
268
|
+
Ok(RbDataFrame::new(df))
|
269
|
+
}
|
270
|
+
|
271
|
+
pub fn read_avro(
|
272
|
+
rb_f: Value,
|
273
|
+
columns: Option<Vec<String>>,
|
274
|
+
projection: Option<Vec<usize>>,
|
275
|
+
n_rows: Option<usize>,
|
276
|
+
) -> RbResult<Self> {
|
277
|
+
use polars::io::avro::AvroReader;
|
278
|
+
|
279
|
+
let file = get_file_like(rb_f, false)?;
|
280
|
+
let df = AvroReader::new(file)
|
281
|
+
.with_projection(projection)
|
282
|
+
.with_columns(columns)
|
283
|
+
.with_n_rows(n_rows)
|
284
|
+
.finish()
|
285
|
+
.map_err(RbPolarsErr::from)?;
|
286
|
+
Ok(RbDataFrame::new(df))
|
287
|
+
}
|
288
|
+
|
289
|
+
#[allow(clippy::too_many_arguments)]
|
290
|
+
pub fn write_csv(
|
291
|
+
&self,
|
292
|
+
rb_f: Value,
|
293
|
+
include_header: bool,
|
294
|
+
separator: u8,
|
295
|
+
quote_char: u8,
|
296
|
+
batch_size: Wrap<NonZeroUsize>,
|
297
|
+
datetime_format: Option<String>,
|
298
|
+
date_format: Option<String>,
|
299
|
+
time_format: Option<String>,
|
300
|
+
float_precision: Option<usize>,
|
301
|
+
null_value: Option<String>,
|
302
|
+
) -> RbResult<()> {
|
303
|
+
let batch_size = batch_size.0;
|
304
|
+
let null = null_value.unwrap_or_default();
|
305
|
+
|
306
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
307
|
+
let f = std::fs::File::create(s).unwrap();
|
308
|
+
// no need for a buffered writer, because the csv writer does internal buffering
|
309
|
+
CsvWriter::new(f)
|
310
|
+
.include_header(include_header)
|
311
|
+
.with_separator(separator)
|
312
|
+
.with_quote_char(quote_char)
|
313
|
+
.with_batch_size(batch_size)
|
314
|
+
.with_datetime_format(datetime_format)
|
315
|
+
.with_date_format(date_format)
|
316
|
+
.with_time_format(time_format)
|
317
|
+
.with_float_precision(float_precision)
|
318
|
+
.with_null_value(null)
|
319
|
+
.finish(&mut self.df.borrow_mut())
|
320
|
+
.map_err(RbPolarsErr::from)?;
|
321
|
+
} else {
|
322
|
+
let mut buf = Cursor::new(Vec::new());
|
323
|
+
CsvWriter::new(&mut buf)
|
324
|
+
.include_header(include_header)
|
325
|
+
.with_separator(separator)
|
326
|
+
.with_quote_char(quote_char)
|
327
|
+
.with_batch_size(batch_size)
|
328
|
+
.with_datetime_format(datetime_format)
|
329
|
+
.with_date_format(date_format)
|
330
|
+
.with_time_format(time_format)
|
331
|
+
.with_float_precision(float_precision)
|
332
|
+
.with_null_value(null)
|
333
|
+
.finish(&mut self.df.borrow_mut())
|
334
|
+
.map_err(RbPolarsErr::from)?;
|
335
|
+
// TODO less copying
|
336
|
+
let rb_str = RString::from_slice(&buf.into_inner());
|
337
|
+
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
338
|
+
}
|
339
|
+
|
340
|
+
Ok(())
|
341
|
+
}
|
342
|
+
|
343
|
+
pub fn write_parquet(
|
344
|
+
&self,
|
345
|
+
rb_f: Value,
|
346
|
+
compression: String,
|
347
|
+
compression_level: Option<i32>,
|
348
|
+
statistics: Wrap<StatisticsOptions>,
|
349
|
+
row_group_size: Option<usize>,
|
350
|
+
data_page_size: Option<usize>,
|
351
|
+
) -> RbResult<()> {
|
352
|
+
let compression = parse_parquet_compression(&compression, compression_level)?;
|
353
|
+
|
354
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
355
|
+
let f = std::fs::File::create(s).unwrap();
|
356
|
+
ParquetWriter::new(f)
|
357
|
+
.with_compression(compression)
|
358
|
+
.with_statistics(statistics.0)
|
359
|
+
.with_row_group_size(row_group_size)
|
360
|
+
.with_data_page_size(data_page_size)
|
361
|
+
.finish(&mut self.df.borrow_mut())
|
362
|
+
.map_err(RbPolarsErr::from)?;
|
363
|
+
} else {
|
364
|
+
let buf = get_file_like(rb_f, true)?;
|
365
|
+
ParquetWriter::new(buf)
|
366
|
+
.with_compression(compression)
|
367
|
+
.with_statistics(statistics.0)
|
368
|
+
.with_row_group_size(row_group_size)
|
369
|
+
.with_data_page_size(data_page_size)
|
370
|
+
.finish(&mut self.df.borrow_mut())
|
371
|
+
.map_err(RbPolarsErr::from)?;
|
372
|
+
}
|
373
|
+
|
374
|
+
Ok(())
|
375
|
+
}
|
376
|
+
|
377
|
+
pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
|
378
|
+
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
379
|
+
|
380
|
+
let r = match (pretty, row_oriented) {
|
381
|
+
(_, true) => JsonWriter::new(file)
|
382
|
+
.with_json_format(JsonFormat::Json)
|
383
|
+
.finish(&mut self.df.borrow_mut()),
|
384
|
+
(true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
|
385
|
+
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
386
|
+
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
387
|
+
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
388
|
+
};
|
389
|
+
r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
390
|
+
Ok(())
|
391
|
+
}
|
392
|
+
|
393
|
+
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
394
|
+
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
395
|
+
|
396
|
+
let r = JsonWriter::new(file)
|
397
|
+
.with_json_format(JsonFormat::JsonLines)
|
398
|
+
.finish(&mut self.df.borrow_mut());
|
399
|
+
|
400
|
+
r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
401
|
+
Ok(())
|
402
|
+
}
|
403
|
+
|
404
|
+
pub fn write_ipc(
|
405
|
+
&self,
|
406
|
+
rb_f: Value,
|
407
|
+
compression: Wrap<Option<IpcCompression>>,
|
408
|
+
) -> RbResult<()> {
|
409
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
410
|
+
let f = std::fs::File::create(s).unwrap();
|
411
|
+
IpcWriter::new(f)
|
412
|
+
.with_compression(compression.0)
|
413
|
+
.finish(&mut self.df.borrow_mut())
|
414
|
+
.map_err(RbPolarsErr::from)?;
|
415
|
+
} else {
|
416
|
+
let mut buf = Cursor::new(Vec::new());
|
417
|
+
IpcWriter::new(&mut buf)
|
418
|
+
.with_compression(compression.0)
|
419
|
+
.finish(&mut self.df.borrow_mut())
|
420
|
+
.map_err(RbPolarsErr::from)?;
|
421
|
+
// TODO less copying
|
422
|
+
let rb_str = RString::from_slice(&buf.into_inner());
|
423
|
+
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
424
|
+
}
|
425
|
+
Ok(())
|
426
|
+
}
|
427
|
+
|
428
|
+
pub fn write_ipc_stream(
|
429
|
+
&self,
|
430
|
+
rb_f: Value,
|
431
|
+
compression: Wrap<Option<IpcCompression>>,
|
432
|
+
) -> RbResult<()> {
|
433
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
434
|
+
let f = std::fs::File::create(s).unwrap();
|
435
|
+
IpcStreamWriter::new(f)
|
436
|
+
.with_compression(compression.0)
|
437
|
+
.finish(&mut self.df.borrow_mut())
|
438
|
+
.map_err(RbPolarsErr::from)?
|
439
|
+
} else {
|
440
|
+
let mut buf = get_file_like(rb_f, true)?;
|
441
|
+
|
442
|
+
IpcStreamWriter::new(&mut buf)
|
443
|
+
.with_compression(compression.0)
|
444
|
+
.finish(&mut self.df.borrow_mut())
|
445
|
+
.map_err(RbPolarsErr::from)?;
|
446
|
+
}
|
447
|
+
Ok(())
|
448
|
+
}
|
449
|
+
|
450
|
+
pub fn write_avro(
|
451
|
+
&self,
|
452
|
+
rb_f: Value,
|
453
|
+
compression: Wrap<Option<AvroCompression>>,
|
454
|
+
) -> RbResult<()> {
|
455
|
+
use polars::io::avro::AvroWriter;
|
456
|
+
|
457
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
458
|
+
let f = std::fs::File::create(s).unwrap();
|
459
|
+
AvroWriter::new(f)
|
460
|
+
.with_compression(compression.0)
|
461
|
+
.finish(&mut self.df.borrow_mut())
|
462
|
+
.map_err(RbPolarsErr::from)?;
|
463
|
+
} else {
|
464
|
+
let mut buf = get_file_like(rb_f, true)?;
|
465
|
+
AvroWriter::new(&mut buf)
|
466
|
+
.with_compression(compression.0)
|
467
|
+
.finish(&mut self.df.borrow_mut())
|
468
|
+
.map_err(RbPolarsErr::from)?;
|
469
|
+
}
|
470
|
+
|
471
|
+
Ok(())
|
472
|
+
}
|
473
|
+
}
|
@@ -0,0 +1,26 @@
|
|
1
|
+
mod construction;
|
2
|
+
mod export;
|
3
|
+
mod general;
|
4
|
+
mod io;
|
5
|
+
|
6
|
+
use polars::prelude::*;
|
7
|
+
use std::cell::RefCell;
|
8
|
+
|
9
|
+
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
10
|
+
pub struct RbDataFrame {
|
11
|
+
pub df: RefCell<DataFrame>,
|
12
|
+
}
|
13
|
+
|
14
|
+
impl From<DataFrame> for RbDataFrame {
|
15
|
+
fn from(df: DataFrame) -> Self {
|
16
|
+
RbDataFrame::new(df)
|
17
|
+
}
|
18
|
+
}
|
19
|
+
|
20
|
+
impl RbDataFrame {
|
21
|
+
pub fn new(df: DataFrame) -> Self {
|
22
|
+
RbDataFrame {
|
23
|
+
df: RefCell::new(df),
|
24
|
+
}
|
25
|
+
}
|
26
|
+
}
|
data/ext/polars/src/error.rs
CHANGED
@@ -2,20 +2,26 @@ use magnus::exception;
|
|
2
2
|
use magnus::Error;
|
3
3
|
use polars::prelude::PolarsError;
|
4
4
|
|
5
|
+
use crate::rb_modules;
|
6
|
+
|
5
7
|
pub struct RbPolarsErr {}
|
6
8
|
|
7
9
|
impl RbPolarsErr {
|
8
10
|
// convert to Error instead of Self
|
9
11
|
pub fn from(e: PolarsError) -> Error {
|
10
|
-
|
12
|
+
match e {
|
13
|
+
PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
|
14
|
+
PolarsError::InvalidOperation(err) => InvalidOperationError::new_err(err.to_string()),
|
15
|
+
_ => Error::new(rb_modules::error(), e.to_string()),
|
16
|
+
}
|
11
17
|
}
|
12
18
|
|
13
19
|
pub fn io(e: std::io::Error) -> Error {
|
14
|
-
Error::new(
|
20
|
+
Error::new(rb_modules::error(), e.to_string())
|
15
21
|
}
|
16
22
|
|
17
23
|
pub fn other(message: String) -> Error {
|
18
|
-
Error::new(
|
24
|
+
Error::new(rb_modules::error(), message)
|
19
25
|
}
|
20
26
|
}
|
21
27
|
|
@@ -35,11 +41,27 @@ impl RbValueError {
|
|
35
41
|
}
|
36
42
|
}
|
37
43
|
|
44
|
+
pub struct RbOverflowError {}
|
45
|
+
|
46
|
+
impl RbOverflowError {
|
47
|
+
pub fn new_err(message: String) -> Error {
|
48
|
+
Error::new(exception::range_error(), message)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
38
52
|
pub struct ComputeError {}
|
39
53
|
|
40
54
|
impl ComputeError {
|
41
55
|
pub fn new_err(message: String) -> Error {
|
42
|
-
Error::new(
|
56
|
+
Error::new(rb_modules::compute_error(), message)
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
pub struct InvalidOperationError {}
|
61
|
+
|
62
|
+
impl InvalidOperationError {
|
63
|
+
pub fn new_err(message: String) -> Error {
|
64
|
+
Error::new(rb_modules::invalid_operation_error(), message)
|
43
65
|
}
|
44
66
|
}
|
45
67
|
|
@@ -1,16 +1,6 @@
|
|
1
|
-
use polars::prelude::*;
|
2
|
-
|
3
|
-
use crate::conversion::Wrap;
|
4
1
|
use crate::RbExpr;
|
5
2
|
|
6
3
|
impl RbExpr {
|
7
|
-
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
9
|
-
.clone()
|
10
|
-
.cast(DataType::Categorical(None, ordering.0))
|
11
|
-
.into()
|
12
|
-
}
|
13
|
-
|
14
4
|
pub fn cat_get_categories(&self) -> Self {
|
15
5
|
self.inner.clone().cat().get_categories().into()
|
16
6
|
}
|
@@ -50,12 +50,8 @@ impl RbExpr {
|
|
50
50
|
.into()
|
51
51
|
}
|
52
52
|
|
53
|
-
pub fn dt_truncate(&self, every: &Self
|
54
|
-
self.inner
|
55
|
-
.clone()
|
56
|
-
.dt()
|
57
|
-
.truncate(every.inner.clone(), offset)
|
58
|
-
.into()
|
53
|
+
pub fn dt_truncate(&self, every: &Self) -> Self {
|
54
|
+
self.inner.clone().dt().truncate(every.inner.clone()).into()
|
59
55
|
}
|
60
56
|
|
61
57
|
pub fn dt_month_start(&self) -> Self {
|
@@ -74,8 +70,8 @@ impl RbExpr {
|
|
74
70
|
self.inner.clone().dt().dst_offset().into()
|
75
71
|
}
|
76
72
|
|
77
|
-
pub fn dt_round(&self, every:
|
78
|
-
self.inner.clone().dt().round(
|
73
|
+
pub fn dt_round(&self, every: &Self) -> Self {
|
74
|
+
self.inner.clone().dt().round(every.inner.clone()).into()
|
79
75
|
}
|
80
76
|
|
81
77
|
pub fn dt_combine(&self, time: &Self, time_unit: Wrap<TimeUnit>) -> Self {
|