polars-df 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Cargo.lock +90 -48
- data/README.md +6 -6
- data/ext/polars/Cargo.toml +7 -5
- data/ext/polars/src/batched_csv.rs +53 -52
- data/ext/polars/src/conversion/mod.rs +13 -60
- data/ext/polars/src/dataframe/construction.rs +186 -0
- data/ext/polars/src/dataframe/export.rs +48 -0
- data/ext/polars/src/dataframe/general.rs +607 -0
- data/ext/polars/src/dataframe/io.rs +463 -0
- data/ext/polars/src/dataframe/mod.rs +26 -0
- data/ext/polars/src/expr/datetime.rs +6 -2
- data/ext/polars/src/expr/general.rs +28 -6
- data/ext/polars/src/expr/rolling.rs +185 -69
- data/ext/polars/src/expr/string.rs +9 -30
- data/ext/polars/src/functions/lazy.rs +2 -0
- data/ext/polars/src/functions/range.rs +74 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/interop/numo/mod.rs +2 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +23 -0
- data/ext/polars/src/interop/numo/to_numo_series.rs +60 -0
- data/ext/polars/src/lazyframe/mod.rs +54 -38
- data/ext/polars/src/lib.rs +46 -21
- data/ext/polars/src/map/lazy.rs +5 -25
- data/ext/polars/src/map/series.rs +7 -1
- data/ext/polars/src/series/aggregation.rs +47 -30
- data/ext/polars/src/series/export.rs +131 -49
- data/ext/polars/src/series/mod.rs +1 -131
- data/lib/polars/batched_csv_reader.rb +9 -3
- data/lib/polars/convert.rb +6 -1
- data/lib/polars/data_frame.rb +83 -302
- data/lib/polars/date_time_expr.rb +1 -0
- data/lib/polars/date_time_name_space.rb +5 -1
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/exceptions.rb +4 -0
- data/lib/polars/expr.rb +1134 -20
- data/lib/polars/functions/range/date_range.rb +92 -0
- data/lib/polars/functions/range/datetime_range.rb +149 -0
- data/lib/polars/functions/range/time_range.rb +141 -0
- data/lib/polars/group_by.rb +88 -23
- data/lib/polars/io/avro.rb +24 -0
- data/lib/polars/{io.rb → io/csv.rb} +296 -490
- data/lib/polars/io/database.rb +73 -0
- data/lib/polars/io/ipc.rb +247 -0
- data/lib/polars/io/json.rb +18 -0
- data/lib/polars/io/ndjson.rb +69 -0
- data/lib/polars/io/parquet.rb +226 -0
- data/lib/polars/lazy_frame.rb +23 -166
- data/lib/polars/lazy_group_by.rb +100 -3
- data/lib/polars/rolling_group_by.rb +2 -2
- data/lib/polars/series.rb +2 -2
- data/lib/polars/string_expr.rb +37 -36
- data/lib/polars/utils.rb +35 -1
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +9 -1
- metadata +21 -5
- data/ext/polars/src/dataframe.rs +0 -1208
data/ext/polars/src/dataframe.rs
DELETED
@@ -1,1208 +0,0 @@
|
|
1
|
-
use either::Either;
|
2
|
-
use magnus::{
|
3
|
-
prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
|
4
|
-
};
|
5
|
-
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
6
|
-
use polars::frame::NullStrategy;
|
7
|
-
use polars::io::avro::AvroCompression;
|
8
|
-
use polars::io::mmap::ReaderBytes;
|
9
|
-
use polars::io::RowIndex;
|
10
|
-
use polars::prelude::pivot::{pivot, pivot_stable};
|
11
|
-
use polars::prelude::*;
|
12
|
-
use polars_core::utils::try_get_supertype;
|
13
|
-
use std::cell::RefCell;
|
14
|
-
use std::io::{BufWriter, Cursor};
|
15
|
-
use std::num::NonZeroUsize;
|
16
|
-
use std::ops::Deref;
|
17
|
-
|
18
|
-
use crate::conversion::*;
|
19
|
-
use crate::file::{get_either_file, get_file_like, get_mmap_bytes_reader, EitherRustRubyFile};
|
20
|
-
use crate::map::dataframe::{
|
21
|
-
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
22
|
-
apply_lambda_with_utf8_out_type,
|
23
|
-
};
|
24
|
-
use crate::rb_modules;
|
25
|
-
use crate::series::{to_rbseries_collection, to_series_collection};
|
26
|
-
use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
27
|
-
|
28
|
-
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
29
|
-
pub struct RbDataFrame {
|
30
|
-
pub df: RefCell<DataFrame>,
|
31
|
-
}
|
32
|
-
|
33
|
-
impl From<DataFrame> for RbDataFrame {
|
34
|
-
fn from(df: DataFrame) -> Self {
|
35
|
-
RbDataFrame::new(df)
|
36
|
-
}
|
37
|
-
}
|
38
|
-
|
39
|
-
impl RbDataFrame {
|
40
|
-
pub fn new(df: DataFrame) -> Self {
|
41
|
-
RbDataFrame {
|
42
|
-
df: RefCell::new(df),
|
43
|
-
}
|
44
|
-
}
|
45
|
-
|
46
|
-
fn finish_from_rows(
|
47
|
-
rows: Vec<Row>,
|
48
|
-
infer_schema_length: Option<usize>,
|
49
|
-
schema: Option<Schema>,
|
50
|
-
schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
|
51
|
-
) -> RbResult<Self> {
|
52
|
-
// Object builder must be registered
|
53
|
-
crate::on_startup::register_object_builder();
|
54
|
-
|
55
|
-
let mut final_schema =
|
56
|
-
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
57
|
-
.map_err(RbPolarsErr::from)?;
|
58
|
-
|
59
|
-
// Erase scale from inferred decimals.
|
60
|
-
for dtype in final_schema.iter_dtypes_mut() {
|
61
|
-
if let DataType::Decimal(_, _) = dtype {
|
62
|
-
*dtype = DataType::Decimal(None, None)
|
63
|
-
}
|
64
|
-
}
|
65
|
-
|
66
|
-
// Integrate explicit/inferred schema.
|
67
|
-
if let Some(schema) = schema {
|
68
|
-
for (i, (name, dtype)) in schema.into_iter().enumerate() {
|
69
|
-
if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
|
70
|
-
*name_ = name;
|
71
|
-
|
72
|
-
// If schema dtype is Unknown, overwrite with inferred datatype.
|
73
|
-
if !matches!(dtype, DataType::Unknown) {
|
74
|
-
*dtype_ = dtype;
|
75
|
-
}
|
76
|
-
} else {
|
77
|
-
final_schema.with_column(name, dtype);
|
78
|
-
}
|
79
|
-
}
|
80
|
-
}
|
81
|
-
|
82
|
-
// Optional per-field overrides; these supersede default/inferred dtypes.
|
83
|
-
if let Some(overrides) = schema_overrides_by_idx {
|
84
|
-
for (i, dtype) in overrides {
|
85
|
-
if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
|
86
|
-
if !matches!(dtype, DataType::Unknown) {
|
87
|
-
*dtype_ = dtype;
|
88
|
-
}
|
89
|
-
}
|
90
|
-
}
|
91
|
-
}
|
92
|
-
let df =
|
93
|
-
DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
|
94
|
-
Ok(df.into())
|
95
|
-
}
|
96
|
-
|
97
|
-
pub fn init(columns: RArray) -> RbResult<Self> {
|
98
|
-
let mut cols = Vec::new();
|
99
|
-
for i in columns.each() {
|
100
|
-
cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
|
101
|
-
}
|
102
|
-
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
103
|
-
Ok(RbDataFrame::new(df))
|
104
|
-
}
|
105
|
-
|
106
|
-
pub fn estimated_size(&self) -> usize {
|
107
|
-
self.df.borrow().estimated_size()
|
108
|
-
}
|
109
|
-
|
110
|
-
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
111
|
-
// start arguments
|
112
|
-
// this pattern is needed for more than 16
|
113
|
-
let rb_f = arguments[0];
|
114
|
-
let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
|
115
|
-
let chunk_size = usize::try_convert(arguments[2])?;
|
116
|
-
let has_header = bool::try_convert(arguments[3])?;
|
117
|
-
let ignore_errors = bool::try_convert(arguments[4])?;
|
118
|
-
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
119
|
-
let skip_rows = usize::try_convert(arguments[6])?;
|
120
|
-
let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
|
121
|
-
let separator = String::try_convert(arguments[8])?;
|
122
|
-
let rechunk = bool::try_convert(arguments[9])?;
|
123
|
-
let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
|
124
|
-
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
|
125
|
-
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
126
|
-
let path = Option::<String>::try_convert(arguments[13])?;
|
127
|
-
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
128
|
-
// TODO fix
|
129
|
-
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
130
|
-
let low_memory = bool::try_convert(arguments[16])?;
|
131
|
-
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
132
|
-
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
133
|
-
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
134
|
-
let try_parse_dates = bool::try_convert(arguments[20])?;
|
135
|
-
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
136
|
-
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
137
|
-
let sample_size = usize::try_convert(arguments[23])?;
|
138
|
-
let eol_char = String::try_convert(arguments[24])?;
|
139
|
-
let truncate_ragged_lines = bool::try_convert(arguments[25])?;
|
140
|
-
// end arguments
|
141
|
-
|
142
|
-
let null_values = null_values.map(|w| w.0);
|
143
|
-
let eol_char = eol_char.as_bytes()[0];
|
144
|
-
|
145
|
-
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
146
|
-
|
147
|
-
let quote_char = if let Some(s) = quote_char {
|
148
|
-
if s.is_empty() {
|
149
|
-
None
|
150
|
-
} else {
|
151
|
-
Some(s.as_bytes()[0])
|
152
|
-
}
|
153
|
-
} else {
|
154
|
-
None
|
155
|
-
};
|
156
|
-
|
157
|
-
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
158
|
-
overwrite_dtype
|
159
|
-
.iter()
|
160
|
-
.map(|(name, dtype)| {
|
161
|
-
let dtype = dtype.0.clone();
|
162
|
-
Field::new(name, dtype)
|
163
|
-
})
|
164
|
-
.collect::<Schema>()
|
165
|
-
});
|
166
|
-
|
167
|
-
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
168
|
-
overwrite_dtype
|
169
|
-
.iter()
|
170
|
-
.map(|dt| dt.0.clone())
|
171
|
-
.collect::<Vec<_>>()
|
172
|
-
});
|
173
|
-
|
174
|
-
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
175
|
-
let df = CsvReader::new(mmap_bytes_r)
|
176
|
-
.infer_schema(infer_schema_length)
|
177
|
-
.has_header(has_header)
|
178
|
-
.with_n_rows(n_rows)
|
179
|
-
.with_separator(separator.as_bytes()[0])
|
180
|
-
.with_skip_rows(skip_rows)
|
181
|
-
.with_ignore_errors(ignore_errors)
|
182
|
-
.with_projection(projection)
|
183
|
-
.with_rechunk(rechunk)
|
184
|
-
.with_chunk_size(chunk_size)
|
185
|
-
.with_encoding(encoding.0)
|
186
|
-
.with_columns(columns)
|
187
|
-
.with_n_threads(n_threads)
|
188
|
-
.with_path(path)
|
189
|
-
.with_dtypes(overwrite_dtype.map(Arc::new))
|
190
|
-
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
191
|
-
.low_memory(low_memory)
|
192
|
-
.with_comment_prefix(comment_prefix.as_deref())
|
193
|
-
.with_null_values(null_values)
|
194
|
-
.with_try_parse_dates(try_parse_dates)
|
195
|
-
.with_quote_char(quote_char)
|
196
|
-
.with_end_of_line_char(eol_char)
|
197
|
-
.with_skip_rows_after_header(skip_rows_after_header)
|
198
|
-
.with_row_index(row_index)
|
199
|
-
.sample_size(sample_size)
|
200
|
-
.truncate_ragged_lines(truncate_ragged_lines)
|
201
|
-
.finish()
|
202
|
-
.map_err(RbPolarsErr::from)?;
|
203
|
-
Ok(df.into())
|
204
|
-
}
|
205
|
-
|
206
|
-
#[allow(clippy::too_many_arguments)]
|
207
|
-
pub fn read_parquet(
|
208
|
-
rb_f: Value,
|
209
|
-
columns: Option<Vec<String>>,
|
210
|
-
projection: Option<Vec<usize>>,
|
211
|
-
n_rows: Option<usize>,
|
212
|
-
parallel: Wrap<ParallelStrategy>,
|
213
|
-
row_index: Option<(String, IdxSize)>,
|
214
|
-
low_memory: bool,
|
215
|
-
use_statistics: bool,
|
216
|
-
rechunk: bool,
|
217
|
-
) -> RbResult<Self> {
|
218
|
-
use EitherRustRubyFile::*;
|
219
|
-
|
220
|
-
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
221
|
-
let result = match get_either_file(rb_f, false)? {
|
222
|
-
Rb(f) => {
|
223
|
-
let buf = f.as_buffer();
|
224
|
-
ParquetReader::new(buf)
|
225
|
-
.with_projection(projection)
|
226
|
-
.with_columns(columns)
|
227
|
-
.read_parallel(parallel.0)
|
228
|
-
.with_n_rows(n_rows)
|
229
|
-
.with_row_index(row_index)
|
230
|
-
.set_low_memory(low_memory)
|
231
|
-
.use_statistics(use_statistics)
|
232
|
-
.set_rechunk(rechunk)
|
233
|
-
.finish()
|
234
|
-
}
|
235
|
-
Rust(f) => ParquetReader::new(f.into_inner())
|
236
|
-
.with_projection(projection)
|
237
|
-
.with_columns(columns)
|
238
|
-
.read_parallel(parallel.0)
|
239
|
-
.with_n_rows(n_rows)
|
240
|
-
.with_row_index(row_index)
|
241
|
-
.use_statistics(use_statistics)
|
242
|
-
.set_rechunk(rechunk)
|
243
|
-
.finish(),
|
244
|
-
};
|
245
|
-
let df = result.map_err(RbPolarsErr::from)?;
|
246
|
-
Ok(RbDataFrame::new(df))
|
247
|
-
}
|
248
|
-
|
249
|
-
pub fn read_ipc(
|
250
|
-
rb_f: Value,
|
251
|
-
columns: Option<Vec<String>>,
|
252
|
-
projection: Option<Vec<usize>>,
|
253
|
-
n_rows: Option<usize>,
|
254
|
-
row_index: Option<(String, IdxSize)>,
|
255
|
-
memory_map: bool,
|
256
|
-
) -> RbResult<Self> {
|
257
|
-
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
258
|
-
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
259
|
-
let df = IpcReader::new(mmap_bytes_r)
|
260
|
-
.with_projection(projection)
|
261
|
-
.with_columns(columns)
|
262
|
-
.with_n_rows(n_rows)
|
263
|
-
.with_row_index(row_index)
|
264
|
-
.memory_mapped(memory_map)
|
265
|
-
.finish()
|
266
|
-
.map_err(RbPolarsErr::from)?;
|
267
|
-
Ok(RbDataFrame::new(df))
|
268
|
-
}
|
269
|
-
|
270
|
-
pub fn read_avro(
|
271
|
-
rb_f: Value,
|
272
|
-
columns: Option<Vec<String>>,
|
273
|
-
projection: Option<Vec<usize>>,
|
274
|
-
n_rows: Option<usize>,
|
275
|
-
) -> RbResult<Self> {
|
276
|
-
use polars::io::avro::AvroReader;
|
277
|
-
|
278
|
-
let file = get_file_like(rb_f, false)?;
|
279
|
-
let df = AvroReader::new(file)
|
280
|
-
.with_projection(projection)
|
281
|
-
.with_columns(columns)
|
282
|
-
.with_n_rows(n_rows)
|
283
|
-
.finish()
|
284
|
-
.map_err(RbPolarsErr::from)?;
|
285
|
-
Ok(RbDataFrame::new(df))
|
286
|
-
}
|
287
|
-
|
288
|
-
pub fn write_avro(
|
289
|
-
&self,
|
290
|
-
rb_f: Value,
|
291
|
-
compression: Wrap<Option<AvroCompression>>,
|
292
|
-
) -> RbResult<()> {
|
293
|
-
use polars::io::avro::AvroWriter;
|
294
|
-
|
295
|
-
if let Ok(s) = String::try_convert(rb_f) {
|
296
|
-
let f = std::fs::File::create(s).unwrap();
|
297
|
-
AvroWriter::new(f)
|
298
|
-
.with_compression(compression.0)
|
299
|
-
.finish(&mut self.df.borrow_mut())
|
300
|
-
.map_err(RbPolarsErr::from)?;
|
301
|
-
} else {
|
302
|
-
let mut buf = get_file_like(rb_f, true)?;
|
303
|
-
AvroWriter::new(&mut buf)
|
304
|
-
.with_compression(compression.0)
|
305
|
-
.finish(&mut self.df.borrow_mut())
|
306
|
-
.map_err(RbPolarsErr::from)?;
|
307
|
-
}
|
308
|
-
|
309
|
-
Ok(())
|
310
|
-
}
|
311
|
-
|
312
|
-
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
313
|
-
// memmap the file first
|
314
|
-
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
315
|
-
let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
|
316
|
-
let bytes = mmap_read.deref();
|
317
|
-
|
318
|
-
// Happy path is our column oriented json as that is most performant
|
319
|
-
// on failure we try
|
320
|
-
match serde_json::from_slice::<DataFrame>(bytes) {
|
321
|
-
Ok(df) => Ok(df.into()),
|
322
|
-
// try arrow json reader instead
|
323
|
-
// this is row oriented
|
324
|
-
Err(e) => {
|
325
|
-
let msg = format!("{e}");
|
326
|
-
if msg.contains("successful parse invalid data") {
|
327
|
-
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
328
|
-
Err(e)
|
329
|
-
} else {
|
330
|
-
let out = JsonReader::new(mmap_bytes_r)
|
331
|
-
.with_json_format(JsonFormat::Json)
|
332
|
-
.finish()
|
333
|
-
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
334
|
-
Ok(out.into())
|
335
|
-
}
|
336
|
-
}
|
337
|
-
}
|
338
|
-
}
|
339
|
-
|
340
|
-
pub fn read_ndjson(rb_f: Value) -> RbResult<Self> {
|
341
|
-
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
342
|
-
|
343
|
-
let out = JsonReader::new(mmap_bytes_r)
|
344
|
-
.with_json_format(JsonFormat::JsonLines)
|
345
|
-
.finish()
|
346
|
-
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
347
|
-
Ok(out.into())
|
348
|
-
}
|
349
|
-
|
350
|
-
pub fn write_json(&self, rb_f: Value, pretty: bool, row_oriented: bool) -> RbResult<()> {
|
351
|
-
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
352
|
-
|
353
|
-
let r = match (pretty, row_oriented) {
|
354
|
-
(_, true) => JsonWriter::new(file)
|
355
|
-
.with_json_format(JsonFormat::Json)
|
356
|
-
.finish(&mut self.df.borrow_mut()),
|
357
|
-
(true, _) => serde_json::to_writer_pretty(file, &*self.df.borrow())
|
358
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
359
|
-
(false, _) => serde_json::to_writer(file, &*self.df.borrow())
|
360
|
-
.map_err(|e| PolarsError::ComputeError(format!("{:?}", e).into())),
|
361
|
-
};
|
362
|
-
r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
363
|
-
Ok(())
|
364
|
-
}
|
365
|
-
|
366
|
-
pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
|
367
|
-
let file = BufWriter::new(get_file_like(rb_f, true)?);
|
368
|
-
|
369
|
-
let r = JsonWriter::new(file)
|
370
|
-
.with_json_format(JsonFormat::JsonLines)
|
371
|
-
.finish(&mut self.df.borrow_mut());
|
372
|
-
|
373
|
-
r.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
374
|
-
Ok(())
|
375
|
-
}
|
376
|
-
|
377
|
-
pub fn read_rows(
|
378
|
-
rb_rows: RArray,
|
379
|
-
infer_schema_length: Option<usize>,
|
380
|
-
schema: Option<Wrap<Schema>>,
|
381
|
-
) -> RbResult<Self> {
|
382
|
-
let mut rows = Vec::with_capacity(rb_rows.len());
|
383
|
-
for v in rb_rows.each() {
|
384
|
-
let rb_row = RArray::try_convert(v?)?;
|
385
|
-
let mut row = Vec::with_capacity(rb_row.len());
|
386
|
-
for val in rb_row.each() {
|
387
|
-
row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
|
388
|
-
}
|
389
|
-
rows.push(Row(row));
|
390
|
-
}
|
391
|
-
Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
|
392
|
-
}
|
393
|
-
|
394
|
-
pub fn read_hashes(
|
395
|
-
dicts: Value,
|
396
|
-
infer_schema_length: Option<usize>,
|
397
|
-
schema: Option<Wrap<Schema>>,
|
398
|
-
schema_overrides: Option<Wrap<Schema>>,
|
399
|
-
) -> RbResult<Self> {
|
400
|
-
let mut schema_columns = PlIndexSet::new();
|
401
|
-
if let Some(s) = &schema {
|
402
|
-
schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
|
403
|
-
}
|
404
|
-
let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
|
405
|
-
|
406
|
-
let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
|
407
|
-
if let Some(overrides) = schema_overrides {
|
408
|
-
for (idx, name) in names.iter().enumerate() {
|
409
|
-
if let Some(dtype) = overrides.0.get(name) {
|
410
|
-
schema_overrides_by_idx.push((idx, dtype.clone()));
|
411
|
-
}
|
412
|
-
}
|
413
|
-
}
|
414
|
-
let rbdf = Self::finish_from_rows(
|
415
|
-
rows,
|
416
|
-
infer_schema_length,
|
417
|
-
schema.map(|wrap| wrap.0),
|
418
|
-
Some(schema_overrides_by_idx),
|
419
|
-
)?;
|
420
|
-
|
421
|
-
unsafe {
|
422
|
-
rbdf.df
|
423
|
-
.borrow_mut()
|
424
|
-
.get_columns_mut()
|
425
|
-
.iter_mut()
|
426
|
-
.zip(&names)
|
427
|
-
.for_each(|(s, name)| {
|
428
|
-
s.rename(name);
|
429
|
-
});
|
430
|
-
}
|
431
|
-
let length = names.len();
|
432
|
-
if names.into_iter().collect::<PlHashSet<_>>().len() != length {
|
433
|
-
let err = PolarsError::SchemaMismatch("duplicate column names found".into());
|
434
|
-
Err(RbPolarsErr::from(err))?;
|
435
|
-
}
|
436
|
-
|
437
|
-
Ok(rbdf)
|
438
|
-
}
|
439
|
-
|
440
|
-
pub fn read_hash(data: RHash) -> RbResult<Self> {
|
441
|
-
let mut cols: Vec<Series> = Vec::new();
|
442
|
-
data.foreach(|name: String, values: Value| {
|
443
|
-
let obj: Value = rb_modules::series().funcall("new", (name, values))?;
|
444
|
-
let rbseries = obj.funcall::<_, _, &RbSeries>("_s", ())?;
|
445
|
-
cols.push(rbseries.series.borrow().clone());
|
446
|
-
Ok(ForEach::Continue)
|
447
|
-
})?;
|
448
|
-
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
449
|
-
Ok(df.into())
|
450
|
-
}
|
451
|
-
|
452
|
-
#[allow(clippy::too_many_arguments)]
|
453
|
-
pub fn write_csv(
|
454
|
-
&self,
|
455
|
-
rb_f: Value,
|
456
|
-
include_header: bool,
|
457
|
-
separator: u8,
|
458
|
-
quote_char: u8,
|
459
|
-
batch_size: Wrap<NonZeroUsize>,
|
460
|
-
datetime_format: Option<String>,
|
461
|
-
date_format: Option<String>,
|
462
|
-
time_format: Option<String>,
|
463
|
-
float_precision: Option<usize>,
|
464
|
-
null_value: Option<String>,
|
465
|
-
) -> RbResult<()> {
|
466
|
-
let batch_size = batch_size.0;
|
467
|
-
let null = null_value.unwrap_or_default();
|
468
|
-
|
469
|
-
if let Ok(s) = String::try_convert(rb_f) {
|
470
|
-
let f = std::fs::File::create(s).unwrap();
|
471
|
-
// no need for a buffered writer, because the csv writer does internal buffering
|
472
|
-
CsvWriter::new(f)
|
473
|
-
.include_header(include_header)
|
474
|
-
.with_separator(separator)
|
475
|
-
.with_quote_char(quote_char)
|
476
|
-
.with_batch_size(batch_size)
|
477
|
-
.with_datetime_format(datetime_format)
|
478
|
-
.with_date_format(date_format)
|
479
|
-
.with_time_format(time_format)
|
480
|
-
.with_float_precision(float_precision)
|
481
|
-
.with_null_value(null)
|
482
|
-
.finish(&mut self.df.borrow_mut())
|
483
|
-
.map_err(RbPolarsErr::from)?;
|
484
|
-
} else {
|
485
|
-
let mut buf = Cursor::new(Vec::new());
|
486
|
-
CsvWriter::new(&mut buf)
|
487
|
-
.include_header(include_header)
|
488
|
-
.with_separator(separator)
|
489
|
-
.with_quote_char(quote_char)
|
490
|
-
.with_batch_size(batch_size)
|
491
|
-
.with_datetime_format(datetime_format)
|
492
|
-
.with_date_format(date_format)
|
493
|
-
.with_time_format(time_format)
|
494
|
-
.with_float_precision(float_precision)
|
495
|
-
.with_null_value(null)
|
496
|
-
.finish(&mut self.df.borrow_mut())
|
497
|
-
.map_err(RbPolarsErr::from)?;
|
498
|
-
// TODO less copying
|
499
|
-
let rb_str = RString::from_slice(&buf.into_inner());
|
500
|
-
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
501
|
-
}
|
502
|
-
|
503
|
-
Ok(())
|
504
|
-
}
|
505
|
-
|
506
|
-
pub fn write_ipc(
|
507
|
-
&self,
|
508
|
-
rb_f: Value,
|
509
|
-
compression: Wrap<Option<IpcCompression>>,
|
510
|
-
) -> RbResult<()> {
|
511
|
-
if let Ok(s) = String::try_convert(rb_f) {
|
512
|
-
let f = std::fs::File::create(s).unwrap();
|
513
|
-
IpcWriter::new(f)
|
514
|
-
.with_compression(compression.0)
|
515
|
-
.finish(&mut self.df.borrow_mut())
|
516
|
-
.map_err(RbPolarsErr::from)?;
|
517
|
-
} else {
|
518
|
-
let mut buf = Cursor::new(Vec::new());
|
519
|
-
IpcWriter::new(&mut buf)
|
520
|
-
.with_compression(compression.0)
|
521
|
-
.finish(&mut self.df.borrow_mut())
|
522
|
-
.map_err(RbPolarsErr::from)?;
|
523
|
-
// TODO less copying
|
524
|
-
let rb_str = RString::from_slice(&buf.into_inner());
|
525
|
-
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
526
|
-
}
|
527
|
-
Ok(())
|
528
|
-
}
|
529
|
-
|
530
|
-
pub fn row_tuple(&self, idx: i64) -> Value {
|
531
|
-
let idx = if idx < 0 {
|
532
|
-
(self.df.borrow().height() as i64 + idx) as usize
|
533
|
-
} else {
|
534
|
-
idx as usize
|
535
|
-
};
|
536
|
-
RArray::from_iter(
|
537
|
-
self.df
|
538
|
-
.borrow()
|
539
|
-
.get_columns()
|
540
|
-
.iter()
|
541
|
-
.map(|s| match s.dtype() {
|
542
|
-
DataType::Object(_, _) => {
|
543
|
-
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
544
|
-
obj.unwrap().to_object()
|
545
|
-
}
|
546
|
-
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
547
|
-
}),
|
548
|
-
)
|
549
|
-
.as_value()
|
550
|
-
}
|
551
|
-
|
552
|
-
pub fn row_tuples(&self) -> Value {
|
553
|
-
let df = &self.df;
|
554
|
-
RArray::from_iter((0..df.borrow().height()).map(|idx| {
|
555
|
-
RArray::from_iter(
|
556
|
-
self.df
|
557
|
-
.borrow()
|
558
|
-
.get_columns()
|
559
|
-
.iter()
|
560
|
-
.map(|s| match s.dtype() {
|
561
|
-
DataType::Object(_, _) => {
|
562
|
-
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
563
|
-
obj.unwrap().to_object()
|
564
|
-
}
|
565
|
-
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
566
|
-
}),
|
567
|
-
)
|
568
|
-
}))
|
569
|
-
.as_value()
|
570
|
-
}
|
571
|
-
|
572
|
-
pub fn to_numo(&self) -> Option<Value> {
|
573
|
-
let mut st = None;
|
574
|
-
for s in self.df.borrow().iter() {
|
575
|
-
let dt_i = s.dtype();
|
576
|
-
match st {
|
577
|
-
None => st = Some(dt_i.clone()),
|
578
|
-
Some(ref mut st) => {
|
579
|
-
*st = try_get_supertype(st, dt_i).ok()?;
|
580
|
-
}
|
581
|
-
}
|
582
|
-
}
|
583
|
-
let _st = st?;
|
584
|
-
|
585
|
-
// TODO
|
586
|
-
None
|
587
|
-
}
|
588
|
-
|
589
|
-
pub fn write_parquet(
|
590
|
-
&self,
|
591
|
-
rb_f: Value,
|
592
|
-
compression: String,
|
593
|
-
compression_level: Option<i32>,
|
594
|
-
statistics: bool,
|
595
|
-
row_group_size: Option<usize>,
|
596
|
-
data_page_size: Option<usize>,
|
597
|
-
) -> RbResult<()> {
|
598
|
-
let compression = parse_parquet_compression(&compression, compression_level)?;
|
599
|
-
|
600
|
-
if let Ok(s) = String::try_convert(rb_f) {
|
601
|
-
let f = std::fs::File::create(s).unwrap();
|
602
|
-
ParquetWriter::new(f)
|
603
|
-
.with_compression(compression)
|
604
|
-
.with_statistics(statistics)
|
605
|
-
.with_row_group_size(row_group_size)
|
606
|
-
.with_data_page_size(data_page_size)
|
607
|
-
.finish(&mut self.df.borrow_mut())
|
608
|
-
.map_err(RbPolarsErr::from)?;
|
609
|
-
} else {
|
610
|
-
let buf = get_file_like(rb_f, true)?;
|
611
|
-
ParquetWriter::new(buf)
|
612
|
-
.with_compression(compression)
|
613
|
-
.with_statistics(statistics)
|
614
|
-
.with_row_group_size(row_group_size)
|
615
|
-
.with_data_page_size(data_page_size)
|
616
|
-
.finish(&mut self.df.borrow_mut())
|
617
|
-
.map_err(RbPolarsErr::from)?;
|
618
|
-
}
|
619
|
-
|
620
|
-
Ok(())
|
621
|
-
}
|
622
|
-
|
623
|
-
pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
|
624
|
-
let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
625
|
-
Ok(df.into())
|
626
|
-
}
|
627
|
-
|
628
|
-
pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
|
629
|
-
let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
630
|
-
Ok(df.into())
|
631
|
-
}
|
632
|
-
|
633
|
-
pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
|
634
|
-
let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
635
|
-
Ok(df.into())
|
636
|
-
}
|
637
|
-
|
638
|
-
pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
|
639
|
-
let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
640
|
-
Ok(df.into())
|
641
|
-
}
|
642
|
-
|
643
|
-
pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
|
644
|
-
let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
|
645
|
-
Ok(df.into())
|
646
|
-
}
|
647
|
-
|
648
|
-
pub fn add_df(&self, s: &Self) -> RbResult<Self> {
|
649
|
-
let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
650
|
-
Ok(df.into())
|
651
|
-
}
|
652
|
-
|
653
|
-
pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
|
654
|
-
let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
655
|
-
Ok(df.into())
|
656
|
-
}
|
657
|
-
|
658
|
-
pub fn div_df(&self, s: &Self) -> RbResult<Self> {
|
659
|
-
let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
660
|
-
Ok(df.into())
|
661
|
-
}
|
662
|
-
|
663
|
-
pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
|
664
|
-
let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
665
|
-
Ok(df.into())
|
666
|
-
}
|
667
|
-
|
668
|
-
pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
|
669
|
-
let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
|
670
|
-
Ok(df.into())
|
671
|
-
}
|
672
|
-
|
673
|
-
pub fn sample_n(
|
674
|
-
&self,
|
675
|
-
n: &RbSeries,
|
676
|
-
with_replacement: bool,
|
677
|
-
shuffle: bool,
|
678
|
-
seed: Option<u64>,
|
679
|
-
) -> RbResult<Self> {
|
680
|
-
let df = self
|
681
|
-
.df
|
682
|
-
.borrow()
|
683
|
-
.sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
|
684
|
-
.map_err(RbPolarsErr::from)?;
|
685
|
-
Ok(df.into())
|
686
|
-
}
|
687
|
-
|
688
|
-
pub fn sample_frac(
|
689
|
-
&self,
|
690
|
-
frac: &RbSeries,
|
691
|
-
with_replacement: bool,
|
692
|
-
shuffle: bool,
|
693
|
-
seed: Option<u64>,
|
694
|
-
) -> RbResult<Self> {
|
695
|
-
let df = self
|
696
|
-
.df
|
697
|
-
.borrow()
|
698
|
-
.sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
|
699
|
-
.map_err(RbPolarsErr::from)?;
|
700
|
-
Ok(df.into())
|
701
|
-
}
|
702
|
-
|
703
|
-
pub fn rechunk(&self) -> Self {
|
704
|
-
self.df.borrow().agg_chunks().into()
|
705
|
-
}
|
706
|
-
|
707
|
-
pub fn to_s(&self) -> String {
|
708
|
-
format!("{}", self.df.borrow())
|
709
|
-
}
|
710
|
-
|
711
|
-
pub fn get_columns(&self) -> RArray {
|
712
|
-
let cols = self.df.borrow().get_columns().to_vec();
|
713
|
-
to_rbseries_collection(cols)
|
714
|
-
}
|
715
|
-
|
716
|
-
pub fn columns(&self) -> Vec<String> {
|
717
|
-
self.df
|
718
|
-
.borrow()
|
719
|
-
.get_column_names()
|
720
|
-
.iter()
|
721
|
-
.map(|v| v.to_string())
|
722
|
-
.collect()
|
723
|
-
}
|
724
|
-
|
725
|
-
pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
|
726
|
-
self.df
|
727
|
-
.borrow_mut()
|
728
|
-
.set_column_names(&names)
|
729
|
-
.map_err(RbPolarsErr::from)?;
|
730
|
-
Ok(())
|
731
|
-
}
|
732
|
-
|
733
|
-
pub fn dtypes(&self) -> RArray {
|
734
|
-
RArray::from_iter(
|
735
|
-
self.df
|
736
|
-
.borrow()
|
737
|
-
.iter()
|
738
|
-
.map(|s| Wrap(s.dtype().clone()).into_value()),
|
739
|
-
)
|
740
|
-
}
|
741
|
-
|
742
|
-
pub fn n_chunks(&self) -> usize {
|
743
|
-
self.df.borrow().n_chunks()
|
744
|
-
}
|
745
|
-
|
746
|
-
pub fn shape(&self) -> (usize, usize) {
|
747
|
-
self.df.borrow().shape()
|
748
|
-
}
|
749
|
-
|
750
|
-
pub fn height(&self) -> usize {
|
751
|
-
self.df.borrow().height()
|
752
|
-
}
|
753
|
-
|
754
|
-
pub fn width(&self) -> usize {
|
755
|
-
self.df.borrow().width()
|
756
|
-
}
|
757
|
-
|
758
|
-
pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
|
759
|
-
let columns = to_series_collection(columns)?;
|
760
|
-
self.df
|
761
|
-
.borrow_mut()
|
762
|
-
.hstack_mut(&columns)
|
763
|
-
.map_err(RbPolarsErr::from)?;
|
764
|
-
Ok(())
|
765
|
-
}
|
766
|
-
|
767
|
-
pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
|
768
|
-
let columns = to_series_collection(columns)?;
|
769
|
-
let df = self
|
770
|
-
.df
|
771
|
-
.borrow()
|
772
|
-
.hstack(&columns)
|
773
|
-
.map_err(RbPolarsErr::from)?;
|
774
|
-
Ok(df.into())
|
775
|
-
}
|
776
|
-
|
777
|
-
pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
|
778
|
-
self.df
|
779
|
-
.borrow_mut()
|
780
|
-
.extend(&df.df.borrow())
|
781
|
-
.map_err(RbPolarsErr::from)?;
|
782
|
-
Ok(())
|
783
|
-
}
|
784
|
-
|
785
|
-
pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
|
786
|
-
self.df
|
787
|
-
.borrow_mut()
|
788
|
-
.vstack_mut(&df.df.borrow())
|
789
|
-
.map_err(RbPolarsErr::from)?;
|
790
|
-
Ok(())
|
791
|
-
}
|
792
|
-
|
793
|
-
pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
|
794
|
-
let df = self
|
795
|
-
.df
|
796
|
-
.borrow()
|
797
|
-
.vstack(&df.df.borrow())
|
798
|
-
.map_err(RbPolarsErr::from)?;
|
799
|
-
Ok(df.into())
|
800
|
-
}
|
801
|
-
|
802
|
-
pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
|
803
|
-
let s = self
|
804
|
-
.df
|
805
|
-
.borrow_mut()
|
806
|
-
.drop_in_place(&name)
|
807
|
-
.map_err(RbPolarsErr::from)?;
|
808
|
-
Ok(RbSeries::new(s))
|
809
|
-
}
|
810
|
-
|
811
|
-
pub fn drop_nulls(&self, subset: Option<Vec<String>>) -> RbResult<Self> {
|
812
|
-
let df = self
|
813
|
-
.df
|
814
|
-
.borrow()
|
815
|
-
.drop_nulls(subset.as_ref().map(|s| s.as_ref()))
|
816
|
-
.map_err(RbPolarsErr::from)?;
|
817
|
-
Ok(df.into())
|
818
|
-
}
|
819
|
-
|
820
|
-
pub fn drop(&self, name: String) -> RbResult<Self> {
|
821
|
-
let df = self.df.borrow().drop(&name).map_err(RbPolarsErr::from)?;
|
822
|
-
Ok(RbDataFrame::new(df))
|
823
|
-
}
|
824
|
-
|
825
|
-
pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
|
826
|
-
self.df
|
827
|
-
.borrow()
|
828
|
-
.select_at_idx(idx)
|
829
|
-
.map(|s| RbSeries::new(s.clone()))
|
830
|
-
}
|
831
|
-
|
832
|
-
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
833
|
-
self.df.borrow().get_column_index(&name)
|
834
|
-
}
|
835
|
-
|
836
|
-
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
837
|
-
self.df
|
838
|
-
.borrow()
|
839
|
-
.column(&name)
|
840
|
-
.map(|s| RbSeries::new(s.clone()))
|
841
|
-
.map_err(RbPolarsErr::from)
|
842
|
-
}
|
843
|
-
|
844
|
-
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
845
|
-
let df = self
|
846
|
-
.df
|
847
|
-
.borrow()
|
848
|
-
.select(selection)
|
849
|
-
.map_err(RbPolarsErr::from)?;
|
850
|
-
Ok(RbDataFrame::new(df))
|
851
|
-
}
|
852
|
-
|
853
|
-
pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
854
|
-
let indices = IdxCa::from_vec("", indices);
|
855
|
-
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
856
|
-
Ok(RbDataFrame::new(df))
|
857
|
-
}
|
858
|
-
|
859
|
-
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
860
|
-
let binding = indices.series.borrow();
|
861
|
-
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
862
|
-
let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
863
|
-
Ok(RbDataFrame::new(df))
|
864
|
-
}
|
865
|
-
|
866
|
-
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
867
|
-
self.df
|
868
|
-
.borrow_mut()
|
869
|
-
.replace(&column, new_col.series.borrow().clone())
|
870
|
-
.map_err(RbPolarsErr::from)?;
|
871
|
-
Ok(())
|
872
|
-
}
|
873
|
-
|
874
|
-
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
875
|
-
self.df
|
876
|
-
.borrow_mut()
|
877
|
-
.replace_column(index, new_col.series.borrow().clone())
|
878
|
-
.map_err(RbPolarsErr::from)?;
|
879
|
-
Ok(())
|
880
|
-
}
|
881
|
-
|
882
|
-
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
883
|
-
self.df
|
884
|
-
.borrow_mut()
|
885
|
-
.insert_column(index, new_col.series.borrow().clone())
|
886
|
-
.map_err(RbPolarsErr::from)?;
|
887
|
-
Ok(())
|
888
|
-
}
|
889
|
-
|
890
|
-
pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
|
891
|
-
let df = self.df.borrow().slice(
|
892
|
-
offset as i64,
|
893
|
-
length.unwrap_or_else(|| self.df.borrow().height()),
|
894
|
-
);
|
895
|
-
df.into()
|
896
|
-
}
|
897
|
-
|
898
|
-
pub fn head(&self, length: Option<usize>) -> Self {
|
899
|
-
self.df.borrow().head(length).into()
|
900
|
-
}
|
901
|
-
|
902
|
-
pub fn tail(&self, length: Option<usize>) -> Self {
|
903
|
-
self.df.borrow().tail(length).into()
|
904
|
-
}
|
905
|
-
|
906
|
-
pub fn is_unique(&self) -> RbResult<RbSeries> {
|
907
|
-
let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
|
908
|
-
Ok(mask.into_series().into())
|
909
|
-
}
|
910
|
-
|
911
|
-
pub fn is_duplicated(&self) -> RbResult<RbSeries> {
|
912
|
-
let mask = self
|
913
|
-
.df
|
914
|
-
.borrow()
|
915
|
-
.is_duplicated()
|
916
|
-
.map_err(RbPolarsErr::from)?;
|
917
|
-
Ok(mask.into_series().into())
|
918
|
-
}
|
919
|
-
|
920
|
-
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
921
|
-
if null_equal {
|
922
|
-
self.df.borrow().equals_missing(&other.df.borrow())
|
923
|
-
} else {
|
924
|
-
self.df.borrow().equals(&other.df.borrow())
|
925
|
-
}
|
926
|
-
}
|
927
|
-
|
928
|
-
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
929
|
-
let df = self
|
930
|
-
.df
|
931
|
-
.borrow()
|
932
|
-
.with_row_index(&name, offset)
|
933
|
-
.map_err(RbPolarsErr::from)?;
|
934
|
-
Ok(df.into())
|
935
|
-
}
|
936
|
-
|
937
|
-
pub fn clone(&self) -> Self {
|
938
|
-
RbDataFrame::new(self.df.borrow().clone())
|
939
|
-
}
|
940
|
-
|
941
|
-
pub fn melt(
|
942
|
-
&self,
|
943
|
-
id_vars: Vec<String>,
|
944
|
-
value_vars: Vec<String>,
|
945
|
-
value_name: Option<String>,
|
946
|
-
variable_name: Option<String>,
|
947
|
-
) -> RbResult<Self> {
|
948
|
-
let args = MeltArgs {
|
949
|
-
id_vars: strings_to_smartstrings(id_vars),
|
950
|
-
value_vars: strings_to_smartstrings(value_vars),
|
951
|
-
value_name: value_name.map(|s| s.into()),
|
952
|
-
variable_name: variable_name.map(|s| s.into()),
|
953
|
-
streamable: false,
|
954
|
-
};
|
955
|
-
|
956
|
-
let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
|
957
|
-
Ok(RbDataFrame::new(df))
|
958
|
-
}
|
959
|
-
|
960
|
-
#[allow(clippy::too_many_arguments)]
|
961
|
-
pub fn pivot_expr(
|
962
|
-
&self,
|
963
|
-
index: Vec<String>,
|
964
|
-
columns: Vec<String>,
|
965
|
-
values: Option<Vec<String>>,
|
966
|
-
maintain_order: bool,
|
967
|
-
sort_columns: bool,
|
968
|
-
aggregate_expr: Option<&RbExpr>,
|
969
|
-
separator: Option<String>,
|
970
|
-
) -> RbResult<Self> {
|
971
|
-
let fun = match maintain_order {
|
972
|
-
true => pivot_stable,
|
973
|
-
false => pivot,
|
974
|
-
};
|
975
|
-
let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
|
976
|
-
let df = fun(
|
977
|
-
&self.df.borrow(),
|
978
|
-
index,
|
979
|
-
columns,
|
980
|
-
values,
|
981
|
-
sort_columns,
|
982
|
-
agg_expr,
|
983
|
-
separator.as_deref(),
|
984
|
-
)
|
985
|
-
.map_err(RbPolarsErr::from)?;
|
986
|
-
Ok(RbDataFrame::new(df))
|
987
|
-
}
|
988
|
-
|
989
|
-
pub fn partition_by(
|
990
|
-
&self,
|
991
|
-
by: Vec<String>,
|
992
|
-
maintain_order: bool,
|
993
|
-
include_key: bool,
|
994
|
-
) -> RbResult<RArray> {
|
995
|
-
let out = if maintain_order {
|
996
|
-
self.df.borrow().partition_by_stable(by, include_key)
|
997
|
-
} else {
|
998
|
-
self.df.borrow().partition_by(by, include_key)
|
999
|
-
}
|
1000
|
-
.map_err(RbPolarsErr::from)?;
|
1001
|
-
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
1002
|
-
}
|
1003
|
-
|
1004
|
-
pub fn shift(&self, periods: i64) -> Self {
|
1005
|
-
self.df.borrow().shift(periods).into()
|
1006
|
-
}
|
1007
|
-
|
1008
|
-
pub fn lazy(&self) -> RbLazyFrame {
|
1009
|
-
self.df.borrow().clone().lazy().into()
|
1010
|
-
}
|
1011
|
-
|
1012
|
-
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
1013
|
-
let s = self
|
1014
|
-
.df
|
1015
|
-
.borrow()
|
1016
|
-
.max_horizontal()
|
1017
|
-
.map_err(RbPolarsErr::from)?;
|
1018
|
-
Ok(s.map(|s| s.into()))
|
1019
|
-
}
|
1020
|
-
|
1021
|
-
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
1022
|
-
let s = self
|
1023
|
-
.df
|
1024
|
-
.borrow()
|
1025
|
-
.min_horizontal()
|
1026
|
-
.map_err(RbPolarsErr::from)?;
|
1027
|
-
Ok(s.map(|s| s.into()))
|
1028
|
-
}
|
1029
|
-
|
1030
|
-
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1031
|
-
let null_strategy = if ignore_nulls {
|
1032
|
-
NullStrategy::Ignore
|
1033
|
-
} else {
|
1034
|
-
NullStrategy::Propagate
|
1035
|
-
};
|
1036
|
-
let s = self
|
1037
|
-
.df
|
1038
|
-
.borrow()
|
1039
|
-
.sum_horizontal(null_strategy)
|
1040
|
-
.map_err(RbPolarsErr::from)?;
|
1041
|
-
Ok(s.map(|s| s.into()))
|
1042
|
-
}
|
1043
|
-
|
1044
|
-
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1045
|
-
let null_strategy = if ignore_nulls {
|
1046
|
-
NullStrategy::Ignore
|
1047
|
-
} else {
|
1048
|
-
NullStrategy::Propagate
|
1049
|
-
};
|
1050
|
-
let s = self
|
1051
|
-
.df
|
1052
|
-
.borrow()
|
1053
|
-
.mean_horizontal(null_strategy)
|
1054
|
-
.map_err(RbPolarsErr::from)?;
|
1055
|
-
Ok(s.map(|s| s.into()))
|
1056
|
-
}
|
1057
|
-
|
1058
|
-
pub fn to_dummies(
|
1059
|
-
&self,
|
1060
|
-
columns: Option<Vec<String>>,
|
1061
|
-
separator: Option<String>,
|
1062
|
-
drop_first: bool,
|
1063
|
-
) -> RbResult<Self> {
|
1064
|
-
let df = match columns {
|
1065
|
-
Some(cols) => self.df.borrow().columns_to_dummies(
|
1066
|
-
cols.iter().map(|x| x as &str).collect(),
|
1067
|
-
separator.as_deref(),
|
1068
|
-
drop_first,
|
1069
|
-
),
|
1070
|
-
None => self
|
1071
|
-
.df
|
1072
|
-
.borrow()
|
1073
|
-
.to_dummies(separator.as_deref(), drop_first),
|
1074
|
-
}
|
1075
|
-
.map_err(RbPolarsErr::from)?;
|
1076
|
-
Ok(df.into())
|
1077
|
-
}
|
1078
|
-
|
1079
|
-
pub fn null_count(&self) -> Self {
|
1080
|
-
let df = self.df.borrow().null_count();
|
1081
|
-
df.into()
|
1082
|
-
}
|
1083
|
-
|
1084
|
-
pub fn apply(
|
1085
|
-
&self,
|
1086
|
-
lambda: Value,
|
1087
|
-
output_type: Option<Wrap<DataType>>,
|
1088
|
-
inference_size: usize,
|
1089
|
-
) -> RbResult<(Value, bool)> {
|
1090
|
-
let df = &self.df.borrow();
|
1091
|
-
|
1092
|
-
let output_type = output_type.map(|dt| dt.0);
|
1093
|
-
let out = match output_type {
|
1094
|
-
Some(DataType::Int32) => {
|
1095
|
-
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None).into_series()
|
1096
|
-
}
|
1097
|
-
Some(DataType::Int64) => {
|
1098
|
-
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None).into_series()
|
1099
|
-
}
|
1100
|
-
Some(DataType::UInt32) => {
|
1101
|
-
apply_lambda_with_primitive_out_type::<UInt32Type>(df, lambda, 0, None)
|
1102
|
-
.into_series()
|
1103
|
-
}
|
1104
|
-
Some(DataType::UInt64) => {
|
1105
|
-
apply_lambda_with_primitive_out_type::<UInt64Type>(df, lambda, 0, None)
|
1106
|
-
.into_series()
|
1107
|
-
}
|
1108
|
-
Some(DataType::Float32) => {
|
1109
|
-
apply_lambda_with_primitive_out_type::<Float32Type>(df, lambda, 0, None)
|
1110
|
-
.into_series()
|
1111
|
-
}
|
1112
|
-
Some(DataType::Float64) => {
|
1113
|
-
apply_lambda_with_primitive_out_type::<Float64Type>(df, lambda, 0, None)
|
1114
|
-
.into_series()
|
1115
|
-
}
|
1116
|
-
Some(DataType::Boolean) => {
|
1117
|
-
apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series()
|
1118
|
-
}
|
1119
|
-
Some(DataType::Date) => {
|
1120
|
-
apply_lambda_with_primitive_out_type::<Int32Type>(df, lambda, 0, None)
|
1121
|
-
.into_date()
|
1122
|
-
.into_series()
|
1123
|
-
}
|
1124
|
-
Some(DataType::Datetime(tu, tz)) => {
|
1125
|
-
apply_lambda_with_primitive_out_type::<Int64Type>(df, lambda, 0, None)
|
1126
|
-
.into_datetime(tu, tz)
|
1127
|
-
.into_series()
|
1128
|
-
}
|
1129
|
-
Some(DataType::String) => {
|
1130
|
-
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1131
|
-
}
|
1132
|
-
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1133
|
-
};
|
1134
|
-
|
1135
|
-
Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
|
1136
|
-
}
|
1137
|
-
|
1138
|
-
pub fn shrink_to_fit(&self) {
|
1139
|
-
self.df.borrow_mut().shrink_to_fit();
|
1140
|
-
}
|
1141
|
-
|
1142
|
-
pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
|
1143
|
-
let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3);
|
1144
|
-
let hash = self
|
1145
|
-
.df
|
1146
|
-
.borrow_mut()
|
1147
|
-
.hash_rows(Some(hb))
|
1148
|
-
.map_err(RbPolarsErr::from)?;
|
1149
|
-
Ok(hash.into_series().into())
|
1150
|
-
}
|
1151
|
-
|
1152
|
-
pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
|
1153
|
-
let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
|
1154
|
-
Some(Either::Right(name))
|
1155
|
-
} else if let Ok(name) = String::try_convert(column_names) {
|
1156
|
-
Some(Either::Left(name))
|
1157
|
-
} else {
|
1158
|
-
None
|
1159
|
-
};
|
1160
|
-
Ok(self
|
1161
|
-
.df
|
1162
|
-
.borrow_mut()
|
1163
|
-
.transpose(keep_names_as.as_deref(), new_col_names)
|
1164
|
-
.map_err(RbPolarsErr::from)?
|
1165
|
-
.into())
|
1166
|
-
}
|
1167
|
-
|
1168
|
-
pub fn upsample(
|
1169
|
-
&self,
|
1170
|
-
by: Vec<String>,
|
1171
|
-
index_column: String,
|
1172
|
-
every: String,
|
1173
|
-
offset: String,
|
1174
|
-
stable: bool,
|
1175
|
-
) -> RbResult<Self> {
|
1176
|
-
let out = if stable {
|
1177
|
-
self.df.borrow().upsample_stable(
|
1178
|
-
by,
|
1179
|
-
&index_column,
|
1180
|
-
Duration::parse(&every),
|
1181
|
-
Duration::parse(&offset),
|
1182
|
-
)
|
1183
|
-
} else {
|
1184
|
-
self.df.borrow().upsample(
|
1185
|
-
by,
|
1186
|
-
&index_column,
|
1187
|
-
Duration::parse(&every),
|
1188
|
-
Duration::parse(&offset),
|
1189
|
-
)
|
1190
|
-
};
|
1191
|
-
let out = out.map_err(RbPolarsErr::from)?;
|
1192
|
-
Ok(out.into())
|
1193
|
-
}
|
1194
|
-
|
1195
|
-
pub fn to_struct(&self, name: String) -> RbSeries {
|
1196
|
-
let s = self.df.borrow().clone().into_struct(&name);
|
1197
|
-
s.into_series().into()
|
1198
|
-
}
|
1199
|
-
|
1200
|
-
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
1201
|
-
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
1202
|
-
Ok(df.into())
|
1203
|
-
}
|
1204
|
-
|
1205
|
-
pub fn clear(&self) -> Self {
|
1206
|
-
self.df.borrow().clear().into()
|
1207
|
-
}
|
1208
|
-
}
|