polars-df 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Cargo.lock +1946 -0
- data/Cargo.toml +5 -0
- data/ext/polars/Cargo.toml +31 -1
- data/ext/polars/src/batched_csv.rs +120 -0
- data/ext/polars/src/conversion.rs +336 -42
- data/ext/polars/src/dataframe.rs +409 -4
- data/ext/polars/src/error.rs +9 -0
- data/ext/polars/src/file.rs +8 -7
- data/ext/polars/src/lazy/apply.rs +7 -0
- data/ext/polars/src/lazy/dataframe.rs +436 -10
- data/ext/polars/src/lazy/dsl.rs +1134 -5
- data/ext/polars/src/lazy/meta.rs +41 -0
- data/ext/polars/src/lazy/mod.rs +2 -0
- data/ext/polars/src/lib.rs +390 -3
- data/ext/polars/src/series.rs +175 -13
- data/lib/polars/batched_csv_reader.rb +95 -0
- data/lib/polars/cat_expr.rb +13 -0
- data/lib/polars/data_frame.rb +892 -21
- data/lib/polars/date_time_expr.rb +143 -0
- data/lib/polars/expr.rb +503 -0
- data/lib/polars/io.rb +342 -2
- data/lib/polars/lazy_frame.rb +338 -6
- data/lib/polars/lazy_functions.rb +158 -11
- data/lib/polars/list_expr.rb +108 -0
- data/lib/polars/meta_expr.rb +33 -0
- data/lib/polars/series.rb +1304 -14
- data/lib/polars/string_expr.rb +117 -0
- data/lib/polars/struct_expr.rb +27 -0
- data/lib/polars/utils.rb +60 -0
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -1
- metadata +13 -2
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::{r_hash::ForEach, Error, RArray, RHash, RString, Value};
|
2
2
|
use polars::io::mmap::ReaderBytes;
|
3
|
+
use polars::io::RowCount;
|
3
4
|
use polars::prelude::*;
|
4
5
|
use std::cell::RefCell;
|
5
6
|
use std::fs::File;
|
@@ -7,8 +8,9 @@ use std::io::{BufReader, BufWriter, Cursor};
|
|
7
8
|
use std::ops::Deref;
|
8
9
|
use std::path::PathBuf;
|
9
10
|
|
10
|
-
use crate::conversion
|
11
|
+
use crate::conversion::*;
|
11
12
|
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
13
|
+
use crate::series::to_rbseries_collection;
|
12
14
|
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
13
15
|
|
14
16
|
#[magnus::wrap(class = "Polars::RbDataFrame")]
|
@@ -38,10 +40,98 @@ impl RbDataFrame {
|
|
38
40
|
Ok(RbDataFrame::new(df))
|
39
41
|
}
|
40
42
|
|
41
|
-
pub fn
|
43
|
+
pub fn estimated_size(&self) -> usize {
|
44
|
+
self.df.borrow().estimated_size()
|
45
|
+
}
|
46
|
+
|
47
|
+
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
48
|
+
// start arguments
|
49
|
+
// this pattern is needed for more than 16
|
50
|
+
let rb_f: Value = arguments[0].try_convert()?;
|
51
|
+
let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
|
52
|
+
let chunk_size: usize = arguments[2].try_convert()?;
|
53
|
+
let has_header: bool = arguments[3].try_convert()?;
|
54
|
+
let ignore_errors: bool = arguments[4].try_convert()?;
|
55
|
+
let n_rows: Option<usize> = arguments[5].try_convert()?;
|
56
|
+
let skip_rows: usize = arguments[6].try_convert()?;
|
57
|
+
let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
|
58
|
+
let sep: String = arguments[8].try_convert()?;
|
59
|
+
let rechunk: bool = arguments[9].try_convert()?;
|
60
|
+
let columns: Option<Vec<String>> = arguments[10].try_convert()?;
|
61
|
+
let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
|
62
|
+
let n_threads: Option<usize> = arguments[12].try_convert()?;
|
63
|
+
let path: Option<String> = arguments[13].try_convert()?;
|
64
|
+
let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
|
65
|
+
// TODO fix
|
66
|
+
let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
|
67
|
+
let low_memory: bool = arguments[16].try_convert()?;
|
68
|
+
let comment_char: Option<String> = arguments[17].try_convert()?;
|
69
|
+
let quote_char: Option<String> = arguments[18].try_convert()?;
|
70
|
+
let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
|
71
|
+
let parse_dates: bool = arguments[20].try_convert()?;
|
72
|
+
let skip_rows_after_header: usize = arguments[21].try_convert()?;
|
73
|
+
let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
|
74
|
+
let sample_size: usize = arguments[23].try_convert()?;
|
75
|
+
let eol_char: String = arguments[24].try_convert()?;
|
76
|
+
// end arguments
|
77
|
+
|
78
|
+
let null_values = null_values.map(|w| w.0);
|
79
|
+
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
80
|
+
let eol_char = eol_char.as_bytes()[0];
|
81
|
+
|
82
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
83
|
+
|
84
|
+
let quote_char = if let Some(s) = quote_char {
|
85
|
+
if s.is_empty() {
|
86
|
+
None
|
87
|
+
} else {
|
88
|
+
Some(s.as_bytes()[0])
|
89
|
+
}
|
90
|
+
} else {
|
91
|
+
None
|
92
|
+
};
|
93
|
+
|
94
|
+
let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
|
95
|
+
let fields = overwrite_dtype.iter().map(|(name, dtype)| {
|
96
|
+
let dtype = dtype.0.clone();
|
97
|
+
Field::new(name, dtype)
|
98
|
+
});
|
99
|
+
Schema::from(fields)
|
100
|
+
});
|
101
|
+
|
102
|
+
let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
|
103
|
+
overwrite_dtype
|
104
|
+
.iter()
|
105
|
+
.map(|dt| dt.0.clone())
|
106
|
+
.collect::<Vec<_>>()
|
107
|
+
});
|
108
|
+
|
42
109
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
43
110
|
let df = CsvReader::new(mmap_bytes_r)
|
111
|
+
.infer_schema(infer_schema_length)
|
44
112
|
.has_header(has_header)
|
113
|
+
.with_n_rows(n_rows)
|
114
|
+
.with_delimiter(sep.as_bytes()[0])
|
115
|
+
.with_skip_rows(skip_rows)
|
116
|
+
.with_ignore_parser_errors(ignore_errors)
|
117
|
+
.with_projection(projection)
|
118
|
+
.with_rechunk(rechunk)
|
119
|
+
.with_chunk_size(chunk_size)
|
120
|
+
.with_encoding(encoding.0)
|
121
|
+
.with_columns(columns)
|
122
|
+
.with_n_threads(n_threads)
|
123
|
+
.with_path(path)
|
124
|
+
.with_dtypes(overwrite_dtype.as_ref())
|
125
|
+
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
126
|
+
.low_memory(low_memory)
|
127
|
+
.with_comment_char(comment_char)
|
128
|
+
.with_null_values(null_values)
|
129
|
+
.with_parse_dates(parse_dates)
|
130
|
+
.with_quote_char(quote_char)
|
131
|
+
.with_end_of_line_char(eol_char)
|
132
|
+
.with_skip_rows_after_header(skip_rows_after_header)
|
133
|
+
.with_row_count(row_count)
|
134
|
+
.sample_size(sample_size)
|
45
135
|
.finish()
|
46
136
|
.map_err(RbPolarsErr::from)?;
|
47
137
|
Ok(df.into())
|
@@ -56,6 +146,27 @@ impl RbDataFrame {
|
|
56
146
|
.map(|v| v.into())
|
57
147
|
}
|
58
148
|
|
149
|
+
pub fn read_ipc(
|
150
|
+
rb_f: Value,
|
151
|
+
columns: Option<Vec<String>>,
|
152
|
+
projection: Option<Vec<usize>>,
|
153
|
+
n_rows: Option<usize>,
|
154
|
+
row_count: Option<(String, IdxSize)>,
|
155
|
+
memory_map: bool,
|
156
|
+
) -> RbResult<Self> {
|
157
|
+
let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
|
158
|
+
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
159
|
+
let df = IpcReader::new(mmap_bytes_r)
|
160
|
+
.with_projection(projection)
|
161
|
+
.with_columns(columns)
|
162
|
+
.with_n_rows(n_rows)
|
163
|
+
.with_row_count(row_count)
|
164
|
+
.memory_mapped(memory_map)
|
165
|
+
.finish()
|
166
|
+
.map_err(RbPolarsErr::from)?;
|
167
|
+
Ok(RbDataFrame::new(df))
|
168
|
+
}
|
169
|
+
|
59
170
|
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
60
171
|
// memmap the file first
|
61
172
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
@@ -180,6 +291,28 @@ impl RbDataFrame {
|
|
180
291
|
Ok(())
|
181
292
|
}
|
182
293
|
|
294
|
+
pub fn write_ipc(
|
295
|
+
&self,
|
296
|
+
rb_f: Value,
|
297
|
+
compression: Wrap<Option<IpcCompression>>,
|
298
|
+
) -> RbResult<()> {
|
299
|
+
if let Ok(s) = rb_f.try_convert::<String>() {
|
300
|
+
let f = std::fs::File::create(&s).unwrap();
|
301
|
+
IpcWriter::new(f)
|
302
|
+
.with_compression(compression.0)
|
303
|
+
.finish(&mut self.df.borrow_mut())
|
304
|
+
.map_err(RbPolarsErr::from)?;
|
305
|
+
} else {
|
306
|
+
let mut buf = get_file_like(rb_f, true)?;
|
307
|
+
|
308
|
+
IpcWriter::new(&mut buf)
|
309
|
+
.with_compression(compression.0)
|
310
|
+
.finish(&mut self.df.borrow_mut())
|
311
|
+
.map_err(RbPolarsErr::from)?;
|
312
|
+
}
|
313
|
+
Ok(())
|
314
|
+
}
|
315
|
+
|
183
316
|
pub fn write_parquet(
|
184
317
|
&self,
|
185
318
|
rb_f: Value,
|
@@ -213,6 +346,11 @@ impl RbDataFrame {
|
|
213
346
|
format!("{}", self.df.borrow())
|
214
347
|
}
|
215
348
|
|
349
|
+
pub fn get_columns(&self) -> Vec<RbSeries> {
|
350
|
+
let cols = self.df.borrow().get_columns().clone();
|
351
|
+
to_rbseries_collection(cols)
|
352
|
+
}
|
353
|
+
|
216
354
|
pub fn columns(&self) -> Vec<String> {
|
217
355
|
self.df
|
218
356
|
.borrow()
|
@@ -222,14 +360,27 @@ impl RbDataFrame {
|
|
222
360
|
.collect()
|
223
361
|
}
|
224
362
|
|
225
|
-
pub fn
|
363
|
+
pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
|
364
|
+
self.df
|
365
|
+
.borrow_mut()
|
366
|
+
.set_column_names(&names)
|
367
|
+
.map_err(RbPolarsErr::from)?;
|
368
|
+
Ok(())
|
369
|
+
}
|
370
|
+
|
371
|
+
pub fn dtypes(&self) -> Vec<Value> {
|
226
372
|
self.df
|
227
373
|
.borrow()
|
228
374
|
.iter()
|
229
|
-
.map(|s| s.dtype().
|
375
|
+
.map(|s| Wrap(s.dtype().clone()).into())
|
230
376
|
.collect()
|
231
377
|
}
|
232
378
|
|
379
|
+
pub fn n_chunks(&self) -> RbResult<usize> {
|
380
|
+
let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
|
381
|
+
Ok(n)
|
382
|
+
}
|
383
|
+
|
233
384
|
pub fn shape(&self) -> (usize, usize) {
|
234
385
|
self.df.borrow().shape()
|
235
386
|
}
|
@@ -258,6 +409,28 @@ impl RbDataFrame {
|
|
258
409
|
.map_err(RbPolarsErr::from)
|
259
410
|
}
|
260
411
|
|
412
|
+
pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
|
413
|
+
let df = self
|
414
|
+
.df
|
415
|
+
.borrow()
|
416
|
+
.select(selection)
|
417
|
+
.map_err(RbPolarsErr::from)?;
|
418
|
+
Ok(RbDataFrame::new(df))
|
419
|
+
}
|
420
|
+
|
421
|
+
pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
|
422
|
+
let indices = IdxCa::from_vec("", indices);
|
423
|
+
let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
|
424
|
+
Ok(RbDataFrame::new(df))
|
425
|
+
}
|
426
|
+
|
427
|
+
pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
|
428
|
+
let binding = indices.series.borrow();
|
429
|
+
let idx = binding.idx().map_err(RbPolarsErr::from)?;
|
430
|
+
let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
|
431
|
+
Ok(RbDataFrame::new(df))
|
432
|
+
}
|
433
|
+
|
261
434
|
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
|
262
435
|
let df = self
|
263
436
|
.df
|
@@ -273,6 +446,38 @@ impl RbDataFrame {
|
|
273
446
|
Ok(RbDataFrame::new(df))
|
274
447
|
}
|
275
448
|
|
449
|
+
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
450
|
+
self.df
|
451
|
+
.borrow_mut()
|
452
|
+
.replace(&column, new_col.series.borrow().clone())
|
453
|
+
.map_err(RbPolarsErr::from)?;
|
454
|
+
Ok(())
|
455
|
+
}
|
456
|
+
|
457
|
+
pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
458
|
+
self.df
|
459
|
+
.borrow_mut()
|
460
|
+
.replace_at_idx(index, new_col.series.borrow().clone())
|
461
|
+
.map_err(RbPolarsErr::from)?;
|
462
|
+
Ok(())
|
463
|
+
}
|
464
|
+
|
465
|
+
pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
466
|
+
self.df
|
467
|
+
.borrow_mut()
|
468
|
+
.insert_at_idx(index, new_col.series.borrow().clone())
|
469
|
+
.map_err(RbPolarsErr::from)?;
|
470
|
+
Ok(())
|
471
|
+
}
|
472
|
+
|
473
|
+
pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
|
474
|
+
let df = self.df.borrow().slice(
|
475
|
+
offset as i64,
|
476
|
+
length.unwrap_or_else(|| self.df.borrow().height()),
|
477
|
+
);
|
478
|
+
df.into()
|
479
|
+
}
|
480
|
+
|
276
481
|
pub fn head(&self, length: Option<usize>) -> Self {
|
277
482
|
self.df.borrow().head(length).into()
|
278
483
|
}
|
@@ -281,6 +486,20 @@ impl RbDataFrame {
|
|
281
486
|
self.df.borrow().tail(length).into()
|
282
487
|
}
|
283
488
|
|
489
|
+
pub fn is_unique(&self) -> RbResult<RbSeries> {
|
490
|
+
let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
|
491
|
+
Ok(mask.into_series().into())
|
492
|
+
}
|
493
|
+
|
494
|
+
pub fn is_duplicated(&self) -> RbResult<RbSeries> {
|
495
|
+
let mask = self
|
496
|
+
.df
|
497
|
+
.borrow()
|
498
|
+
.is_duplicated()
|
499
|
+
.map_err(RbPolarsErr::from)?;
|
500
|
+
Ok(mask.into_series().into())
|
501
|
+
}
|
502
|
+
|
284
503
|
pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
285
504
|
if null_equal {
|
286
505
|
self.df.borrow().frame_equal_missing(&other.df.borrow())
|
@@ -289,16 +508,202 @@ impl RbDataFrame {
|
|
289
508
|
}
|
290
509
|
}
|
291
510
|
|
511
|
+
pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
512
|
+
let df = self
|
513
|
+
.df
|
514
|
+
.borrow()
|
515
|
+
.with_row_count(&name, offset)
|
516
|
+
.map_err(RbPolarsErr::from)?;
|
517
|
+
Ok(df.into())
|
518
|
+
}
|
519
|
+
|
520
|
+
pub fn clone(&self) -> Self {
|
521
|
+
RbDataFrame::new(self.df.borrow().clone())
|
522
|
+
}
|
523
|
+
|
524
|
+
pub fn melt(
|
525
|
+
&self,
|
526
|
+
id_vars: Vec<String>,
|
527
|
+
value_vars: Vec<String>,
|
528
|
+
value_name: Option<String>,
|
529
|
+
variable_name: Option<String>,
|
530
|
+
) -> RbResult<Self> {
|
531
|
+
let args = MeltArgs {
|
532
|
+
id_vars,
|
533
|
+
value_vars,
|
534
|
+
value_name,
|
535
|
+
variable_name,
|
536
|
+
};
|
537
|
+
|
538
|
+
let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
|
539
|
+
Ok(RbDataFrame::new(df))
|
540
|
+
}
|
541
|
+
|
542
|
+
pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
|
543
|
+
let out = if stable {
|
544
|
+
self.df.borrow().partition_by_stable(groups)
|
545
|
+
} else {
|
546
|
+
self.df.borrow().partition_by(groups)
|
547
|
+
}
|
548
|
+
.map_err(RbPolarsErr::from)?;
|
549
|
+
Ok(out.into_iter().map(RbDataFrame::new).collect())
|
550
|
+
}
|
551
|
+
|
552
|
+
pub fn shift(&self, periods: i64) -> Self {
|
553
|
+
self.df.borrow().shift(periods).into()
|
554
|
+
}
|
555
|
+
|
556
|
+
pub fn unique(
|
557
|
+
&self,
|
558
|
+
maintain_order: bool,
|
559
|
+
subset: Option<Vec<String>>,
|
560
|
+
keep: Wrap<UniqueKeepStrategy>,
|
561
|
+
) -> RbResult<Self> {
|
562
|
+
let subset = subset.as_ref().map(|v| v.as_ref());
|
563
|
+
let df = match maintain_order {
|
564
|
+
true => self.df.borrow().unique_stable(subset, keep.0),
|
565
|
+
false => self.df.borrow().unique(subset, keep.0),
|
566
|
+
}
|
567
|
+
.map_err(RbPolarsErr::from)?;
|
568
|
+
Ok(df.into())
|
569
|
+
}
|
570
|
+
|
292
571
|
pub fn lazy(&self) -> RbLazyFrame {
|
293
572
|
self.df.borrow().clone().lazy().into()
|
294
573
|
}
|
295
574
|
|
575
|
+
pub fn max(&self) -> Self {
|
576
|
+
self.df.borrow().max().into()
|
577
|
+
}
|
578
|
+
|
579
|
+
pub fn min(&self) -> Self {
|
580
|
+
self.df.borrow().min().into()
|
581
|
+
}
|
582
|
+
|
583
|
+
pub fn sum(&self) -> Self {
|
584
|
+
self.df.borrow().sum().into()
|
585
|
+
}
|
586
|
+
|
296
587
|
pub fn mean(&self) -> Self {
|
297
588
|
self.df.borrow().mean().into()
|
298
589
|
}
|
299
590
|
|
591
|
+
pub fn std(&self, ddof: u8) -> Self {
|
592
|
+
self.df.borrow().std(ddof).into()
|
593
|
+
}
|
594
|
+
|
595
|
+
pub fn var(&self, ddof: u8) -> Self {
|
596
|
+
self.df.borrow().var(ddof).into()
|
597
|
+
}
|
598
|
+
|
599
|
+
pub fn median(&self) -> Self {
|
600
|
+
self.df.borrow().median().into()
|
601
|
+
}
|
602
|
+
|
603
|
+
pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
604
|
+
let s = self
|
605
|
+
.df
|
606
|
+
.borrow()
|
607
|
+
.hmean(null_strategy.0)
|
608
|
+
.map_err(RbPolarsErr::from)?;
|
609
|
+
Ok(s.map(|s| s.into()))
|
610
|
+
}
|
611
|
+
|
612
|
+
pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
|
613
|
+
let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
|
614
|
+
Ok(s.map(|s| s.into()))
|
615
|
+
}
|
616
|
+
|
617
|
+
pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
|
618
|
+
let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
|
619
|
+
Ok(s.map(|s| s.into()))
|
620
|
+
}
|
621
|
+
|
622
|
+
pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
|
623
|
+
let s = self
|
624
|
+
.df
|
625
|
+
.borrow()
|
626
|
+
.hsum(null_strategy.0)
|
627
|
+
.map_err(RbPolarsErr::from)?;
|
628
|
+
Ok(s.map(|s| s.into()))
|
629
|
+
}
|
630
|
+
|
631
|
+
pub fn quantile(
|
632
|
+
&self,
|
633
|
+
quantile: f64,
|
634
|
+
interpolation: Wrap<QuantileInterpolOptions>,
|
635
|
+
) -> RbResult<Self> {
|
636
|
+
let df = self
|
637
|
+
.df
|
638
|
+
.borrow()
|
639
|
+
.quantile(quantile, interpolation.0)
|
640
|
+
.map_err(RbPolarsErr::from)?;
|
641
|
+
Ok(df.into())
|
642
|
+
}
|
643
|
+
|
644
|
+
pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
|
645
|
+
let df = match columns {
|
646
|
+
Some(cols) => self
|
647
|
+
.df
|
648
|
+
.borrow()
|
649
|
+
.columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
|
650
|
+
None => self.df.borrow().to_dummies(),
|
651
|
+
}
|
652
|
+
.map_err(RbPolarsErr::from)?;
|
653
|
+
Ok(df.into())
|
654
|
+
}
|
655
|
+
|
300
656
|
pub fn null_count(&self) -> Self {
|
301
657
|
let df = self.df.borrow().null_count();
|
302
658
|
df.into()
|
303
659
|
}
|
660
|
+
|
661
|
+
pub fn shrink_to_fit(&self) {
|
662
|
+
self.df.borrow_mut().shrink_to_fit();
|
663
|
+
}
|
664
|
+
|
665
|
+
pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
|
666
|
+
let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
|
667
|
+
if include_header {
|
668
|
+
let s = Utf8Chunked::from_iter_values(
|
669
|
+
&names,
|
670
|
+
self.df.borrow().get_columns().iter().map(|s| s.name()),
|
671
|
+
)
|
672
|
+
.into_series();
|
673
|
+
df.insert_at_idx(0, s).unwrap();
|
674
|
+
}
|
675
|
+
Ok(df.into())
|
676
|
+
}
|
677
|
+
|
678
|
+
pub fn upsample(
|
679
|
+
&self,
|
680
|
+
by: Vec<String>,
|
681
|
+
index_column: String,
|
682
|
+
every: String,
|
683
|
+
offset: String,
|
684
|
+
stable: bool,
|
685
|
+
) -> RbResult<Self> {
|
686
|
+
let out = if stable {
|
687
|
+
self.df.borrow().upsample_stable(
|
688
|
+
by,
|
689
|
+
&index_column,
|
690
|
+
Duration::parse(&every),
|
691
|
+
Duration::parse(&offset),
|
692
|
+
)
|
693
|
+
} else {
|
694
|
+
self.df.borrow().upsample(
|
695
|
+
by,
|
696
|
+
&index_column,
|
697
|
+
Duration::parse(&every),
|
698
|
+
Duration::parse(&offset),
|
699
|
+
)
|
700
|
+
};
|
701
|
+
let out = out.map_err(RbPolarsErr::from)?;
|
702
|
+
Ok(out.into())
|
703
|
+
}
|
704
|
+
|
705
|
+
pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
|
706
|
+
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
707
|
+
Ok(df.into())
|
708
|
+
}
|
304
709
|
}
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
use magnus::exception::arg_error;
|
2
2
|
use magnus::Error;
|
3
|
+
use polars::error::ArrowError;
|
3
4
|
use polars::prelude::PolarsError;
|
4
5
|
|
5
6
|
pub struct RbPolarsErr {}
|
@@ -10,6 +11,14 @@ impl RbPolarsErr {
|
|
10
11
|
Error::runtime_error(e.to_string())
|
11
12
|
}
|
12
13
|
|
14
|
+
pub fn arrow(e: ArrowError) -> Error {
|
15
|
+
Error::runtime_error(e.to_string())
|
16
|
+
}
|
17
|
+
|
18
|
+
pub fn io(e: std::io::Error) -> Error {
|
19
|
+
Error::runtime_error(e.to_string())
|
20
|
+
}
|
21
|
+
|
13
22
|
pub fn other(message: String) -> Error {
|
14
23
|
Error::runtime_error(message)
|
15
24
|
}
|
data/ext/polars/src/file.rs
CHANGED
@@ -1,18 +1,19 @@
|
|
1
1
|
use magnus::{Error, RString, Value};
|
2
2
|
use polars::io::mmap::MmapBytesReader;
|
3
|
-
use std::fs::
|
3
|
+
use std::fs::File;
|
4
4
|
use std::io::Cursor;
|
5
5
|
use std::path::PathBuf;
|
6
6
|
|
7
7
|
use crate::RbResult;
|
8
8
|
|
9
9
|
pub fn get_file_like(f: Value, truncate: bool) -> RbResult<File> {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
let str_slice = f.try_convert::<PathBuf>()?;
|
11
|
+
let f = if truncate {
|
12
|
+
File::create(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
13
|
+
} else {
|
14
|
+
File::open(str_slice).map_err(|e| Error::runtime_error(e.to_string()))?
|
15
|
+
};
|
16
|
+
Ok(f)
|
16
17
|
}
|
17
18
|
|
18
19
|
pub fn get_mmap_bytes_reader(rb_f: Value) -> RbResult<Box<dyn MmapBytesReader>> {
|