polars-df 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +336 -42
- data/ext/polars/src/dataframe.rs +409 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +436 -10
- data/ext/polars/src/lazy/dsl.rs +1134 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +2 -0
- data/ext/polars/src/lib.rs +390 -3
- data/ext/polars/src/series.rs +175 -13
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +892 -21
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +503 -0
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +338 -6
- data/lib/polars/lazy_functions.rb +158 -11
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +1304 -14
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +60 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -1
- metadata +13 -2
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, Error, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
|
+
use polars::io::RowCount;
|
3
4
|
use polars::prelude::*;
|
4
5
|
use std::cell::RefCell;
|
5
6
|
use std::fs::File;
|
@@ -7,8 +8,9 @@ use std::io::{BufReader, BufWriter, Cursor};
|
|
7
8
|
use std::ops::Deref;
|
8
9
|
use std::path::PathBuf;
|
9
10
|
|
10
|
-
use crate::conversion
|
11
|
+
use crate::conversion::*;
|
11
12
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
13
|
+
use crate::series::to_rbseries_collection;
|
12
14
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
15
|
|
14
16
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -38,10 +40,98 @@ impl RbDataFrame {
|
|
38
40
|
Ok(RbDataFrame::new(df))
|
39
41
|
}
|
40
42
|
|
41
|
-
pub fn
|
43
|
+
pub fn estimated_size(&self) -> usize {
|
44
|
+
self.df.borrow().estimated_size()
|
45
|
+
}
|
46
|
+
|
47
|
+
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
48
|
+
// start arguments
|
49
|
+
// this pattern is needed for more than 16
|
50
|
+
let rb_f: Value = arguments[0].try_convert()?;
|
51
|
+
let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
|
52
|
+
let chunk_size: usize = arguments[2].try_convert()?;
|
53
|
+
let has_header: bool = arguments[3].try_convert()?;
|
54
|
+
let ignore_errors: bool = arguments[4].try_convert()?;
|
55
|
+
let n_rows: Option<usize> = arguments[5].try_convert()?;
|
56
|
+
let skip_rows: usize = arguments[6].try_convert()?;
|
57
|
+
let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
|
58
|
+
let sep: String = arguments[8].try_convert()?;
|
59
|
+
let rechunk: bool = arguments[9].try_convert()?;
|
60
|
+
let columns: Option<Vec<String>> = arguments[10].try_convert()?;
|
61
|
+
let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
|
62
|
+
let n_threads: Option<usize> = arguments[12].try_convert()?;
|
63
|
+
let path: Option<String> = arguments[13].try_convert()?;
|
64
|
+
let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
|
65
|
+
// TODO fix
|
66
|
+
let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
|
67
|
+
let low_memory: bool = arguments[16].try_convert()?;
|
68
|
+
let comment_char: Option<String> = arguments[17].try_convert()?;
|
69
|
+
let quote_char: Option<String> = arguments[18].try_convert()?;
|
70
|
+
let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
|
71
|
+
let parse_dates: bool = arguments[20].try_convert()?;
|
72
|
+
let skip_rows_after_header: usize = arguments[21].try_convert()?;
|
73
|
+
let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
|
74
|
+
let sample_size: usize = arguments[23].try_convert()?;
|
75
|
+
let eol_char: String = arguments[24].try_convert()?;
|
76
|
+
// end arguments
|
77
|
+
|
78
|
+
let null_values = null_values.map(|w| w.0);
|
79
|
+
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
80
|
+
let eol_char = eol_char.as_bytes()[0];
|
81
|
+
|
82
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
83
|
+
|
84
|
+
let quote_char = if let Some(s) = quote_char {
|
85
|
+
if s.is_empty() {
|
86
|
+
None
|
87
|
+
} else {
|
88
|
+
Some(s.as_bytes()[0])
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
None
|
92
|
+
};
|
93
|
+
|
94
|
+
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
95
|
+
let fields = overwrite_dtype.iter().map(|(name, dtype)| {
|
96
|
+
let dtype = dtype.0.clone();
|
97
|
+
Field::new(name, dtype)
|
98
|
+
});
|
99
|
+
Schema::from(fields)
|
100
|
+
});
|
101
|
+
|
102
|
+
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
103
|
+
overwrite_dtype
|
104
|
+
.iter()
|
105
|
+
.map(|dt| dt.0.clone())
|
106
|
+
.collect::<Vec<_>>()
|
107
|
+
});
|
108
|
+
|
42
109
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
43
110
|
let df = CsvReader::new(mmap_bytes_r)
|
111
|
+
.infer_schema(infer_schema_length)
|
44
112
|
.has_header(has_header)
|
113
|
+
.with_n_rows(n_rows)
|
114
|
+
.with_delimiter(sep.as_bytes()[0])
|
115
|
+
.with_skip_rows(skip_rows)
|
116
|
+
.with_ignore_parser_errors(ignore_errors)
|
117
|
+
.with_projection(projection)
|
118
|
+
.with_rechunk(rechunk)
|
119
|
+
.with_chunk_size(chunk_size)
|
120
|
+
.with_encoding(encoding.0)
|
121
|
+
.with_columns(columns)
|
122
|
+
.with_n_threads(n_threads)
|
123
|
+
.with_path(path)
|
124
|
+
.with_dtypes(overwrite_dtype.as_ref())
|
125
|
+
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
126
|
+
.low_memory(low_memory)
|
127
|
+
.with_comment_char(comment_char)
|
128
|
+
.with_null_values(null_values)
|
129
|
+
.with_parse_dates(parse_dates)
|
130
|
+
.with_quote_char(quote_char)
|
131
|
+
.with_end_of_line_char(eol_char)
|
132
|
+
.with_skip_rows_after_header(skip_rows_after_header)
|
133
|
+
.with_row_count(row_count)
|
134
|
+
.sample_size(sample_size)
|
45
135
|
.finish()
|
46
136
|
.map_err(RbPolarsErr::from)?;
|
47
137
|
Ok(df.into())
|
@@ -56,6 +146,27 @@ impl RbDataFrame {
|
|
56
146
|
.map(|v| v.into())
|
57
147
|
}
|
58
148
|
|
149
|
+
pub fn read_ipc(
|
150
|
+
rb_f: Value,
|
151
|
+
columns: Option<Vec<String>>,
|
152
|
+
projection: Option<Vec<usize>>,
|
153
|
+
n_rows: Option<usize>,
|
154
|
+
row_count: Option<(String, IdxSize)>,
|
155
|
+
memory_map: bool,
|
156
|
+
) -> RbResult<Self> {
|
157
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
158
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
159
|
+
let df = IpcReader::new(mmap_bytes_r)
|
160
|
+
.with_projection(projection)
|
161
|
+
.with_columns(columns)
|
162
|
+
.with_n_rows(n_rows)
|
163
|
+
.with_row_count(row_count)
|
164
|
+
.memory_mapped(memory_map)
|
165
|
+
.finish()
|
166
|
+
.map_err(RbPolarsErr::from)?;
|
167
|
+
Ok(RbDataFrame::new(df))
|
168
|
+
}
|
169
|
+
|
59
170
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
60
171
|
// memmap the file first
|
61
172
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -180,6 +291,28 @@ impl RbDataFrame {
|
|
180
291
|
Ok(())
|
181
292
|
}
|
182
293
|
|
294
|
+
pub fn write_ipc(
|
295
|
+
&self,
|
296
|
+
rb_f: Value,
|
297
|
+
compression: Wrap<Option<IpcCompression>>,
|
298
|
+
) -> RbResult<()> {
|
299
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
300
|
+
let f = std::fs::File::create(&s).unwrap();
|
301
|
+
IpcWriter::new(f)
|
302
|
+
.with_compression(compression.0)
|
303
|
+
.finish(&mut self.df.borrow_mut())
|
304
|
+
.map_err(RbPolarsErr::from)?;
|
305
|
+
} else {
|
306
|
+
let mut buf = get_file_like(rb_f, true)?;
|
307
|
+
|
308
|
+
IpcWriter::new(&mut buf)
|
309
|
+
.with_compression(compression.0)
|
310
|
+
.finish(&mut self.df.borrow_mut())
|
311
|
+
.map_err(RbPolarsErr::from)?;
|
312
|
+
}
|
313
|
+
Ok(())
|
314
|
+
}
|
315
|
+
|
183
316
|
pub fn write_parquet(
|
184
317
|
&self,
|
185
318
|
rb_f: Value,
|
@@ -213,6 +346,11 @@ impl RbDataFrame {
|
|
213
346
|
format!("{}", self.df.borrow())
|
214
347
|
}
|
215
348
|
|
349
|
+
pub fn get_columns(&self) -> Vec<RbSeries> {
|
350
|
+
let cols = self.df.borrow().get_columns().clone();
|
351
|
+
to_rbseries_collection(cols)
|
352
|
+
}
|
353
|
+
|
216
354
|
pub fn columns(&self) -> Vec<String> {
|
217
355
|
self.df
|
218
356
|
.borrow()
|
@@ -222,14 +360,27 @@ impl RbDataFrame {
|
|
222
360
|
.collect()
|
223
361
|
}
|
224
362
|
|
225
|
-
pub fn
|
363
|
+
pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
|
364
|
+
self.df
|
365
|
+
.borrow_mut()
|
366
|
+
.set_column_names(&names)
|
367
|
+
.map_err(RbPolarsErr::from)?;
|
368
|
+
Ok(())
|
369
|
+
}
|
370
|
+
|
371
|
+
pub fn dtypes(&self) -> Vec<Value> {
|
226
372
|
self.df
|
227
373
|
.borrow()
|
228
374
|
.iter()
|
229
|
-
.map(|s| s.dtype().
|
375
|
+
.map(|s| Wrap(s.dtype().clone()).into())
|
230
376
|
.collect()
|
231
377
|
}
|
232
378
|
|
379
|
+
pub fn n_chunks(&self) -> RbResult<usize> {
|
380
|
+
let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
|
381
|
+
Ok(n)
|
382
|
+
}
|
383
|
+
|
233
384
|
pub fn shape(&self) -> (usize, usize) {
|
234
385
|
self.df.borrow().shape()
|
235
386
|
}
|
@@ -258,6 +409,28 @@ impl RbDataFrame {
|
|
258
409
|
.map_err(RbPolarsErr::from)
|
259
410
|
}
|
260
411
|
|
412
|
+
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
413
|
+
let df = self
|
414
|
+
.df
|
415
|
+
.borrow()
|
416
|
+
.select(selection)
|
417
|
+
.map_err(RbPolarsErr::from)?;
|
418
|
+
Ok(RbDataFrame::new(df))
|
419
|
+
}
|
420
|
+
|
421
|
+
pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
422
|
+
let indices = IdxCa::from_vec("", indices);
|
423
|
+
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
424
|
+
Ok(RbDataFrame::new(df))
|
425
|
+
}
|
426
|
+
|
427
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
428
|
+
let binding = indices.series.borrow();
|
429
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
430
|
+
let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
431
|
+
Ok(RbDataFrame::new(df))
|
432
|
+
}
|
433
|
+
|
261
434
|
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
|
262
435
|
let df = self
|
263
436
|
.df
|
@@ -273,6 +446,38 @@ impl RbDataFrame {
|
|
273
446
|
Ok(RbDataFrame::new(df))
|
274
447
|
}
|
275
448
|
|
449
|
+
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
450
|
+
self.df
|
451
|
+
.borrow_mut()
|
452
|
+
.replace(&column, new_col.series.borrow().clone())
|
453
|
+
.map_err(RbPolarsErr::from)?;
|
454
|
+
Ok(())
|
455
|
+
}
|
456
|
+
|
457
|
+
pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
458
|
+
self.df
|
459
|
+
.borrow_mut()
|
460
|
+
.replace_at_idx(index, new_col.series.borrow().clone())
|
461
|
+
.map_err(RbPolarsErr::from)?;
|
462
|
+
Ok(())
|
463
|
+
}
|
464
|
+
|
465
|
+
pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
466
|
+
self.df
|
467
|
+
.borrow_mut()
|
468
|
+
.insert_at_idx(index, new_col.series.borrow().clone())
|
469
|
+
.map_err(RbPolarsErr::from)?;
|
470
|
+
Ok(())
|
471
|
+
}
|
472
|
+
|
473
|
+
pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
|
474
|
+
let df = self.df.borrow().slice(
|
475
|
+
offset as i64,
|
476
|
+
length.unwrap_or_else(|| self.df.borrow().height()),
|
477
|
+
);
|
478
|
+
df.into()
|
479
|
+
}
|
480
|
+
|
276
481
|
pub fn head(&self, length: Option<usize>) -> Self {
|
277
482
|
self.df.borrow().head(length).into()
|
278
483
|
}
|
@@ -281,6 +486,20 @@ impl RbDataFrame {
|
|
281
486
|
self.df.borrow().tail(length).into()
|
282
487
|
}
|
283
488
|
|
489
|
+
pub fn is_unique(&self) -> RbResult<RbSeries> {
|
490
|
+
let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
|
491
|
+
Ok(mask.into_series().into())
|
492
|
+
}
|
493
|
+
|
494
|
+
pub fn is_duplicated(&self) -> RbResult<RbSeries> {
|
495
|
+
let mask = self
|
496
|
+
.df
|
497
|
+
.borrow()
|
498
|
+
.is_duplicated()
|
499
|
+
.map_err(RbPolarsErr::from)?;
|
500
|
+
Ok(mask.into_series().into())
|
501
|
+
}
|
502
|
+
|
284
503
|
pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
285
504
|
if null_equal {
|
286
505
|
self.df.borrow().frame_equal_missing(&other.df.borrow())
|
@@ -289,16 +508,202 @@ impl RbDataFrame {
|
|
289
508
|
}
|
290
509
|
}
|
291
510
|
|
511
|
+
pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
512
|
+
let df = self
|
513
|
+
.df
|
514
|
+
.borrow()
|
515
|
+
.with_row_count(&name, offset)
|
516
|
+
.map_err(RbPolarsErr::from)?;
|
517
|
+
Ok(df.into())
|
518
|
+
}
|
519
|
+
|
520
|
+
pub fn clone(&self) -> Self {
|
521
|
+
RbDataFrame::new(self.df.borrow().clone())
|
522
|
+
}
|
523
|
+
|
524
|
+
pub fn melt(
|
525
|
+
&self,
|
526
|
+
id_vars: Vec<String>,
|
527
|
+
value_vars: Vec<String>,
|
528
|
+
value_name: Option<String>,
|
529
|
+
variable_name: Option<String>,
|
530
|
+
) -> RbResult<Self> {
|
531
|
+
let args = MeltArgs {
|
532
|
+
id_vars,
|
533
|
+
value_vars,
|
534
|
+
value_name,
|
535
|
+
variable_name,
|
536
|
+
};
|
537
|
+
|
538
|
+
let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(RbDataFrame::new(df))
|
540
|
+
}
|
541
|
+
|
542
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
543
|
+
let out = if stable {
|
544
|
+
self.df.borrow().partition_by_stable(groups)
|
545
|
+
} else {
|
546
|
+
self.df.borrow().partition_by(groups)
|
547
|
+
}
|
548
|
+
.map_err(RbPolarsErr::from)?;
|
549
|
+
Ok(out.into_iter().map(RbDataFrame::new).collect())
|
550
|
+
}
|
551
|
+
|
552
|
+
pub fn shift(&self, periods: i64) -> Self {
|
553
|
+
self.df.borrow().shift(periods).into()
|
554
|
+
}
|
555
|
+
|
556
|
+
pub fn unique(
|
557
|
+
&self,
|
558
|
+
maintain_order: bool,
|
559
|
+
subset: Option<Vec<String>>,
|
560
|
+
keep: Wrap<UniqueKeepStrategy>,
|
561
|
+
) -> RbResult<Self> {
|
562
|
+
let subset = subset.as_ref().map(|v| v.as_ref());
|
563
|
+
let df = match maintain_order {
|
564
|
+
true => self.df.borrow().unique_stable(subset, keep.0),
|
565
|
+
false => self.df.borrow().unique(subset, keep.0),
|
566
|
+
}
|
567
|
+
.map_err(RbPolarsErr::from)?;
|
568
|
+
Ok(df.into())
|
569
|
+
}
|
570
|
+
|
292
571
|
pub fn lazy(&self) -> RbLazyFrame {
|
293
572
|
self.df.borrow().clone().lazy().into()
|
294
573
|
}
|
295
574
|
|
575
|
+
pub fn max(&self) -> Self {
|
576
|
+
self.df.borrow().max().into()
|
577
|
+
}
|
578
|
+
|
579
|
+
pub fn min(&self) -> Self {
|
580
|
+
self.df.borrow().min().into()
|
581
|
+
}
|
582
|
+
|
583
|
+
pub fn sum(&self) -> Self {
|
584
|
+
self.df.borrow().sum().into()
|
585
|
+
}
|
586
|
+
|
296
587
|
pub fn mean(&self) -> Self {
|
297
588
|
self.df.borrow().mean().into()
|
298
589
|
}
|
299
590
|
|
591
|
+
pub fn std(&self, ddof: u8) -> Self {
|
592
|
+
self.df.borrow().std(ddof).into()
|
593
|
+
}
|
594
|
+
|
595
|
+
pub fn var(&self, ddof: u8) -> Self {
|
596
|
+
self.df.borrow().var(ddof).into()
|
597
|
+
}
|
598
|
+
|
599
|
+
pub fn median(&self) -> Self {
|
600
|
+
self.df.borrow().median().into()
|
601
|
+
}
|
602
|
+
|
603
|
+
pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
604
|
+
let s = self
|
605
|
+
.df
|
606
|
+
.borrow()
|
607
|
+
.hmean(null_strategy.0)
|
608
|
+
.map_err(RbPolarsErr::from)?;
|
609
|
+
Ok(s.map(|s| s.into()))
|
610
|
+
}
|
611
|
+
|
612
|
+
pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
|
613
|
+
let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
|
614
|
+
Ok(s.map(|s| s.into()))
|
615
|
+
}
|
616
|
+
|
617
|
+
pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
|
618
|
+
let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
|
619
|
+
Ok(s.map(|s| s.into()))
|
620
|
+
}
|
621
|
+
|
622
|
+
pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
623
|
+
let s = self
|
624
|
+
.df
|
625
|
+
.borrow()
|
626
|
+
.hsum(null_strategy.0)
|
627
|
+
.map_err(RbPolarsErr::from)?;
|
628
|
+
Ok(s.map(|s| s.into()))
|
629
|
+
}
|
630
|
+
|
631
|
+
pub fn quantile(
|
632
|
+
&self,
|
633
|
+
quantile: f64,
|
634
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
635
|
+
) -> RbResult<Self> {
|
636
|
+
let df = self
|
637
|
+
.df
|
638
|
+
.borrow()
|
639
|
+
.quantile(quantile, interpolation.0)
|
640
|
+
.map_err(RbPolarsErr::from)?;
|
641
|
+
Ok(df.into())
|
642
|
+
}
|
643
|
+
|
644
|
+
pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
|
645
|
+
let df = match columns {
|
646
|
+
Some(cols) => self
|
647
|
+
.df
|
648
|
+
.borrow()
|
649
|
+
.columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
|
650
|
+
None => self.df.borrow().to_dummies(),
|
651
|
+
}
|
652
|
+
.map_err(RbPolarsErr::from)?;
|
653
|
+
Ok(df.into())
|
654
|
+
}
|
655
|
+
|
300
656
|
pub fn null_count(&self) -> Self {
|
301
657
|
let df = self.df.borrow().null_count();
|
302
658
|
df.into()
|
303
659
|
}
|
660
|
+
|
661
|
+
pub fn shrink_to_fit(&self) {
|
662
|
+
self.df.borrow_mut().shrink_to_fit();
|
663
|
+
}
|
664
|
+
|
665
|
+
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
666
|
+
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
667
|
+
if include_header {
|
668
|
+
let s = Utf8Chunked::from_iter_values(
|
669
|
+
&names,
|
670
|
+
self.df.borrow().get_columns().iter().map(|s| s.name()),
|
671
|
+
)
|
672
|
+
.into_series();
|
673
|
+
df.insert_at_idx(0, s).unwrap();
|
674
|
+
}
|
675
|
+
Ok(df.into())
|
676
|
+
}
|
677
|
+
|
678
|
+
pub fn upsample(
|
679
|
+
&self,
|
680
|
+
by: Vec<String>,
|
681
|
+
index_column: String,
|
682
|
+
every: String,
|
683
|
+
offset: String,
|
684
|
+
stable: bool,
|
685
|
+
) -> RbResult<Self> {
|
686
|
+
let out = if stable {
|
687
|
+
self.df.borrow().upsample_stable(
|
688
|
+
by,
|
689
|
+
&index_column,
|
690
|
+
Duration::parse(&every),
|
691
|
+
Duration::parse(&offset),
|
692
|
+
)
|
693
|
+
} else {
|
694
|
+
self.df.borrow().upsample(
|
695
|
+
by,
|
696
|
+
&index_column,
|
697
|
+
Duration::parse(&every),
|
698
|
+
Duration::parse(&offset),
|
699
|
+
)
|
700
|
+
};
|
701
|
+
let out = out.map_err(RbPolarsErr::from)?;
|
702
|
+
Ok(out.into())
|
703
|
+
}
|
704
|
+
|
705
|
+
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
706
|
+
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
707
|
+
Ok(df.into())
|
708
|
+
}
|
304
709
|
}
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::exception::arg_error;
|
2
2
|
use magnus::Error;
|
3
|
+
use polars::error::ArrowError;
|
3
4
|
use polars::prelude::PolarsError;
|
4
5
|
|
5
6
|
pub struct RbPolarsErr {}
|
@@ -10,6 +11,14 @@ impl RbPolarsErr {
|
|
10
11
|
Error::runtime_error(e.to_string())
|
11
12
|
}
|
12
13
|
|
14
|
+
pub fn arrow(e: ArrowError) -> Error {
|
15
|
+
Error::runtime_error(e.to_string())
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn io(e: std::io::Error) -> Error {
|
19
|
+
Error::runtime_error(e.to_string())
|
20
|
+
}
|
21
|
+
|
13
22
|
pub fn other(message: String) -> Error {
|
14
23
|
Error::runtime_error(message)
|
15
24
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
use magnus::{Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
|
-
use std::fs::
|
3
|
+
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
5
5
|
use std::path::PathBuf;
|
6
6
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
let str_slice = f.try_convert::<PathBuf>()?;
|
11
|
+
let f = if truncate {
|
12
|
+
File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
13
|
+
} else {
|
14
|
+
File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
15
|
+
};
|
16
|
+
Ok(f)
|
16
17
|
}
|
17
18
|
|
18
19
|
pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
|