polars-df 0.7.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +17 -6
- data/ext/polars/src/batched_csv.rs +6 -7
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
- data/ext/polars/src/dataframe.rs +96 -116
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +124 -37
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +16 -10
- data/ext/polars/src/expr/string.rs +68 -17
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
- data/ext/polars/src/lib.rs +245 -187
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +23 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +40 -9
- data/lib/polars/lazy_functions.rb +0 -1197
data/ext/polars/src/dataframe.rs
CHANGED
@@ -6,12 +6,13 @@ use polars::frame::row::{rows_to_schema_supertypes, Row};
|
|
6
6
|
use polars::frame::NullStrategy;
|
7
7
|
use polars::io::avro::AvroCompression;
|
8
8
|
use polars::io::mmap::ReaderBytes;
|
9
|
-
use polars::io::
|
9
|
+
use polars::io::RowIndex;
|
10
10
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
11
11
|
use polars::prelude::*;
|
12
12
|
use polars_core::utils::try_get_supertype;
|
13
13
|
use std::cell::RefCell;
|
14
14
|
use std::io::{BufWriter, Cursor};
|
15
|
+
use std::num::NonZeroUsize;
|
15
16
|
use std::ops::Deref;
|
16
17
|
|
17
18
|
use crate::conversion::*;
|
@@ -45,44 +46,51 @@ impl RbDataFrame {
|
|
45
46
|
fn finish_from_rows(
|
46
47
|
rows: Vec<Row>,
|
47
48
|
infer_schema_length: Option<usize>,
|
48
|
-
|
49
|
+
schema: Option<Schema>,
|
50
|
+
schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
|
49
51
|
) -> RbResult<Self> {
|
50
|
-
//
|
51
|
-
crate::
|
52
|
+
// Object builder must be registered
|
53
|
+
crate::on_startup::register_object_builder();
|
52
54
|
|
53
|
-
let
|
55
|
+
let mut final_schema =
|
54
56
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
55
57
|
.map_err(RbPolarsErr::from)?;
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
62
|
-
DataType::Decimal(_, _) => {
|
63
|
-
fld.coerce(DataType::Decimal(None, None));
|
64
|
-
fld
|
58
|
+
|
59
|
+
// Erase scale from inferred decimals.
|
60
|
+
for dtype in final_schema.iter_dtypes_mut() {
|
61
|
+
if let DataType::Decimal(_, _) = dtype {
|
62
|
+
*dtype = DataType::Decimal(None, None)
|
65
63
|
}
|
66
|
-
|
67
|
-
});
|
68
|
-
let mut schema = Schema::from_iter(fields);
|
64
|
+
}
|
69
65
|
|
70
|
-
|
71
|
-
|
72
|
-
|
66
|
+
// Integrate explicit/inferred schema.
|
67
|
+
if let Some(schema) = schema {
|
68
|
+
for (i, (name, dtype)) in schema.into_iter().enumerate() {
|
69
|
+
if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
|
73
70
|
*name_ = name;
|
74
71
|
|
75
|
-
//
|
72
|
+
// If schema dtype is Unknown, overwrite with inferred datatype.
|
76
73
|
if !matches!(dtype, DataType::Unknown) {
|
77
74
|
*dtype_ = dtype;
|
78
75
|
}
|
79
76
|
} else {
|
80
|
-
|
77
|
+
final_schema.with_column(name, dtype);
|
81
78
|
}
|
82
79
|
}
|
83
80
|
}
|
84
81
|
|
85
|
-
|
82
|
+
// Optional per-field overrides; these supersede default/inferred dtypes.
|
83
|
+
if let Some(overrides) = schema_overrides_by_idx {
|
84
|
+
for (i, dtype) in overrides {
|
85
|
+
if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
|
86
|
+
if !matches!(dtype, DataType::Unknown) {
|
87
|
+
*dtype_ = dtype;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
let df =
|
93
|
+
DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
|
86
94
|
Ok(df.into())
|
87
95
|
}
|
88
96
|
|
@@ -120,21 +128,20 @@ impl RbDataFrame {
|
|
120
128
|
// TODO fix
|
121
129
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
130
|
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
-
let
|
131
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
132
|
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
133
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
134
|
let try_parse_dates = bool::try_convert(arguments[20])?;
|
127
135
|
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
128
|
-
let
|
136
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
129
137
|
let sample_size = usize::try_convert(arguments[23])?;
|
130
138
|
let eol_char = String::try_convert(arguments[24])?;
|
131
139
|
// end arguments
|
132
140
|
|
133
141
|
let null_values = null_values.map(|w| w.0);
|
134
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
135
142
|
let eol_char = eol_char.as_bytes()[0];
|
136
143
|
|
137
|
-
let
|
144
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
138
145
|
|
139
146
|
let quote_char = if let Some(s) = quote_char {
|
140
147
|
if s.is_empty() {
|
@@ -181,13 +188,13 @@ impl RbDataFrame {
|
|
181
188
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
182
189
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
183
190
|
.low_memory(low_memory)
|
184
|
-
.
|
191
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
185
192
|
.with_null_values(null_values)
|
186
193
|
.with_try_parse_dates(try_parse_dates)
|
187
194
|
.with_quote_char(quote_char)
|
188
195
|
.with_end_of_line_char(eol_char)
|
189
196
|
.with_skip_rows_after_header(skip_rows_after_header)
|
190
|
-
.
|
197
|
+
.with_row_index(row_index)
|
191
198
|
.sample_size(sample_size)
|
192
199
|
.finish()
|
193
200
|
.map_err(RbPolarsErr::from)?;
|
@@ -201,19 +208,19 @@ impl RbDataFrame {
|
|
201
208
|
projection: Option<Vec<usize>>,
|
202
209
|
n_rows: Option<usize>,
|
203
210
|
parallel: Wrap<ParallelStrategy>,
|
204
|
-
|
211
|
+
row_index: Option<(String, IdxSize)>,
|
205
212
|
low_memory: bool,
|
206
213
|
use_statistics: bool,
|
207
214
|
rechunk: bool,
|
208
215
|
) -> RbResult<Self> {
|
209
|
-
let
|
216
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
210
217
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
211
218
|
let df = ParquetReader::new(mmap_bytes_r)
|
212
219
|
.with_projection(projection)
|
213
220
|
.with_columns(columns)
|
214
221
|
.read_parallel(parallel.0)
|
215
222
|
.with_n_rows(n_rows)
|
216
|
-
.
|
223
|
+
.with_row_index(row_index)
|
217
224
|
.set_low_memory(low_memory)
|
218
225
|
.use_statistics(use_statistics)
|
219
226
|
.set_rechunk(rechunk)
|
@@ -227,16 +234,16 @@ impl RbDataFrame {
|
|
227
234
|
columns: Option<Vec<String>>,
|
228
235
|
projection: Option<Vec<usize>>,
|
229
236
|
n_rows: Option<usize>,
|
230
|
-
|
237
|
+
row_index: Option<(String, IdxSize)>,
|
231
238
|
memory_map: bool,
|
232
239
|
) -> RbResult<Self> {
|
233
|
-
let
|
240
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
234
241
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
235
242
|
let df = IpcReader::new(mmap_bytes_r)
|
236
243
|
.with_projection(projection)
|
237
244
|
.with_columns(columns)
|
238
245
|
.with_n_rows(n_rows)
|
239
|
-
.
|
246
|
+
.with_row_index(row_index)
|
240
247
|
.memory_mapped(memory_map)
|
241
248
|
.finish()
|
242
249
|
.map_err(RbPolarsErr::from)?;
|
@@ -297,12 +304,18 @@ impl RbDataFrame {
|
|
297
304
|
Ok(df) => Ok(df.into()),
|
298
305
|
// try arrow json reader instead
|
299
306
|
// this is row oriented
|
300
|
-
Err(
|
301
|
-
let
|
302
|
-
|
303
|
-
.
|
304
|
-
|
305
|
-
|
307
|
+
Err(e) => {
|
308
|
+
let msg = format!("{e}");
|
309
|
+
if msg.contains("successful parse invalid data") {
|
310
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
311
|
+
Err(e)
|
312
|
+
} else {
|
313
|
+
let out = JsonReader::new(mmap_bytes_r)
|
314
|
+
.with_json_format(JsonFormat::Json)
|
315
|
+
.finish()
|
316
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
317
|
+
Ok(out.into())
|
318
|
+
}
|
306
319
|
}
|
307
320
|
}
|
308
321
|
}
|
@@ -347,7 +360,7 @@ impl RbDataFrame {
|
|
347
360
|
pub fn read_rows(
|
348
361
|
rb_rows: RArray,
|
349
362
|
infer_schema_length: Option<usize>,
|
350
|
-
|
363
|
+
schema: Option<Wrap<Schema>>,
|
351
364
|
) -> RbResult<Self> {
|
352
365
|
let mut rows = Vec::with_capacity(rb_rows.len());
|
353
366
|
for v in rb_rows.each() {
|
@@ -358,30 +371,34 @@ impl RbDataFrame {
|
|
358
371
|
}
|
359
372
|
rows.push(Row(row));
|
360
373
|
}
|
361
|
-
Self::finish_from_rows(
|
362
|
-
rows,
|
363
|
-
infer_schema_length,
|
364
|
-
schema_overwrite.map(|wrap| wrap.0),
|
365
|
-
)
|
374
|
+
Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
|
366
375
|
}
|
367
376
|
|
368
377
|
pub fn read_hashes(
|
369
378
|
dicts: Value,
|
370
379
|
infer_schema_length: Option<usize>,
|
371
|
-
|
380
|
+
schema: Option<Wrap<Schema>>,
|
381
|
+
schema_overrides: Option<Wrap<Schema>>,
|
372
382
|
) -> RbResult<Self> {
|
373
|
-
let
|
383
|
+
let mut schema_columns = PlIndexSet::new();
|
384
|
+
if let Some(s) = &schema {
|
385
|
+
schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
|
386
|
+
}
|
387
|
+
let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
|
374
388
|
|
375
|
-
|
376
|
-
if let Some(
|
377
|
-
for (
|
378
|
-
|
389
|
+
let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
|
390
|
+
if let Some(overrides) = schema_overrides {
|
391
|
+
for (idx, name) in names.iter().enumerate() {
|
392
|
+
if let Some(dtype) = overrides.0.get(name) {
|
393
|
+
schema_overrides_by_idx.push((idx, dtype.clone()));
|
394
|
+
}
|
379
395
|
}
|
380
396
|
}
|
381
397
|
let rbdf = Self::finish_from_rows(
|
382
398
|
rows,
|
383
399
|
infer_schema_length,
|
384
|
-
|
400
|
+
schema.map(|wrap| wrap.0),
|
401
|
+
Some(schema_overrides_by_idx),
|
385
402
|
)?;
|
386
403
|
|
387
404
|
unsafe {
|
@@ -422,13 +439,14 @@ impl RbDataFrame {
|
|
422
439
|
include_header: bool,
|
423
440
|
separator: u8,
|
424
441
|
quote_char: u8,
|
425
|
-
batch_size:
|
442
|
+
batch_size: Wrap<NonZeroUsize>,
|
426
443
|
datetime_format: Option<String>,
|
427
444
|
date_format: Option<String>,
|
428
445
|
time_format: Option<String>,
|
429
446
|
float_precision: Option<usize>,
|
430
447
|
null_value: Option<String>,
|
431
448
|
) -> RbResult<()> {
|
449
|
+
let batch_size = batch_size.0;
|
432
450
|
let null = null_value.unwrap_or_default();
|
433
451
|
|
434
452
|
if let Ok(s) = String::try_convert(rb_f) {
|
@@ -504,7 +522,7 @@ impl RbDataFrame {
|
|
504
522
|
.get_columns()
|
505
523
|
.iter()
|
506
524
|
.map(|s| match s.dtype() {
|
507
|
-
DataType::Object(_) => {
|
525
|
+
DataType::Object(_, _) => {
|
508
526
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
509
527
|
obj.unwrap().to_object()
|
510
528
|
}
|
@@ -523,7 +541,7 @@ impl RbDataFrame {
|
|
523
541
|
.get_columns()
|
524
542
|
.iter()
|
525
543
|
.map(|s| match s.dtype() {
|
526
|
-
DataType::Object(_) => {
|
544
|
+
DataType::Object(_, _) => {
|
527
545
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
528
546
|
obj.unwrap().to_object()
|
529
547
|
}
|
@@ -785,16 +803,15 @@ impl RbDataFrame {
|
|
785
803
|
.map(|s| RbSeries::new(s.clone()))
|
786
804
|
}
|
787
805
|
|
788
|
-
pub fn
|
789
|
-
self.df.borrow().
|
806
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
807
|
+
self.df.borrow().get_column_index(&name)
|
790
808
|
}
|
791
809
|
|
792
|
-
|
793
|
-
pub fn column(&self, name: String) -> RbResult<RbSeries> {
|
810
|
+
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
794
811
|
self.df
|
795
812
|
.borrow()
|
796
813
|
.column(&name)
|
797
|
-
.map(|
|
814
|
+
.map(|s| RbSeries::new(s.clone()))
|
798
815
|
.map_err(RbPolarsErr::from)
|
799
816
|
}
|
800
817
|
|
@@ -828,18 +845,18 @@ impl RbDataFrame {
|
|
828
845
|
Ok(())
|
829
846
|
}
|
830
847
|
|
831
|
-
pub fn
|
848
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
832
849
|
self.df
|
833
850
|
.borrow_mut()
|
834
|
-
.
|
851
|
+
.replace_column(index, new_col.series.borrow().clone())
|
835
852
|
.map_err(RbPolarsErr::from)?;
|
836
853
|
Ok(())
|
837
854
|
}
|
838
855
|
|
839
|
-
pub fn
|
856
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
840
857
|
self.df
|
841
858
|
.borrow_mut()
|
842
|
-
.
|
859
|
+
.insert_column(index, new_col.series.borrow().clone())
|
843
860
|
.map_err(RbPolarsErr::from)?;
|
844
861
|
Ok(())
|
845
862
|
}
|
@@ -874,19 +891,19 @@ impl RbDataFrame {
|
|
874
891
|
Ok(mask.into_series().into())
|
875
892
|
}
|
876
893
|
|
877
|
-
pub fn
|
894
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
878
895
|
if null_equal {
|
879
|
-
self.df.borrow().
|
896
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
880
897
|
} else {
|
881
|
-
self.df.borrow().
|
898
|
+
self.df.borrow().equals(&other.df.borrow())
|
882
899
|
}
|
883
900
|
}
|
884
901
|
|
885
|
-
pub fn
|
902
|
+
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
886
903
|
let df = self
|
887
904
|
.df
|
888
905
|
.borrow()
|
889
|
-
.
|
906
|
+
.with_row_index(&name, offset)
|
890
907
|
.map_err(RbPolarsErr::from)?;
|
891
908
|
Ok(df.into())
|
892
909
|
}
|
@@ -917,9 +934,9 @@ impl RbDataFrame {
|
|
917
934
|
#[allow(clippy::too_many_arguments)]
|
918
935
|
pub fn pivot_expr(
|
919
936
|
&self,
|
920
|
-
values: Vec<String>,
|
921
937
|
index: Vec<String>,
|
922
938
|
columns: Vec<String>,
|
939
|
+
values: Option<Vec<String>>,
|
923
940
|
maintain_order: bool,
|
924
941
|
sort_columns: bool,
|
925
942
|
aggregate_expr: Option<&RbExpr>,
|
@@ -932,9 +949,9 @@ impl RbDataFrame {
|
|
932
949
|
let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
|
933
950
|
let df = fun(
|
934
951
|
&self.df.borrow(),
|
935
|
-
values,
|
936
952
|
index,
|
937
953
|
columns,
|
954
|
+
values,
|
938
955
|
sort_columns,
|
939
956
|
agg_expr,
|
940
957
|
separator.as_deref(),
|
@@ -966,34 +983,6 @@ impl RbDataFrame {
|
|
966
983
|
self.df.borrow().clone().lazy().into()
|
967
984
|
}
|
968
985
|
|
969
|
-
pub fn max(&self) -> Self {
|
970
|
-
self.df.borrow().max().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn min(&self) -> Self {
|
974
|
-
self.df.borrow().min().into()
|
975
|
-
}
|
976
|
-
|
977
|
-
pub fn sum(&self) -> Self {
|
978
|
-
self.df.borrow().sum().into()
|
979
|
-
}
|
980
|
-
|
981
|
-
pub fn mean(&self) -> Self {
|
982
|
-
self.df.borrow().mean().into()
|
983
|
-
}
|
984
|
-
|
985
|
-
pub fn std(&self, ddof: u8) -> Self {
|
986
|
-
self.df.borrow().std(ddof).into()
|
987
|
-
}
|
988
|
-
|
989
|
-
pub fn var(&self, ddof: u8) -> Self {
|
990
|
-
self.df.borrow().var(ddof).into()
|
991
|
-
}
|
992
|
-
|
993
|
-
pub fn median(&self) -> Self {
|
994
|
-
self.df.borrow().median().into()
|
995
|
-
}
|
996
|
-
|
997
986
|
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
998
987
|
let s = self
|
999
988
|
.df
|
@@ -1040,19 +1029,6 @@ impl RbDataFrame {
|
|
1040
1029
|
Ok(s.map(|s| s.into()))
|
1041
1030
|
}
|
1042
1031
|
|
1043
|
-
pub fn quantile(
|
1044
|
-
&self,
|
1045
|
-
quantile: f64,
|
1046
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
1047
|
-
) -> RbResult<Self> {
|
1048
|
-
let df = self
|
1049
|
-
.df
|
1050
|
-
.borrow()
|
1051
|
-
.quantile(quantile, interpolation.0)
|
1052
|
-
.map_err(RbPolarsErr::from)?;
|
1053
|
-
Ok(df.into())
|
1054
|
-
}
|
1055
|
-
|
1056
1032
|
pub fn to_dummies(
|
1057
1033
|
&self,
|
1058
1034
|
columns: Option<Vec<String>>,
|
@@ -1124,7 +1100,7 @@ impl RbDataFrame {
|
|
1124
1100
|
.into_datetime(tu, tz)
|
1125
1101
|
.into_series()
|
1126
1102
|
}
|
1127
|
-
Some(DataType::
|
1103
|
+
Some(DataType::String) => {
|
1128
1104
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1129
1105
|
}
|
1130
1106
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
@@ -1157,7 +1133,7 @@ impl RbDataFrame {
|
|
1157
1133
|
};
|
1158
1134
|
Ok(self
|
1159
1135
|
.df
|
1160
|
-
.
|
1136
|
+
.borrow_mut()
|
1161
1137
|
.transpose(keep_names_as.as_deref(), new_col_names)
|
1162
1138
|
.map_err(RbPolarsErr::from)?
|
1163
1139
|
.into())
|
@@ -1199,4 +1175,8 @@ impl RbDataFrame {
|
|
1199
1175
|
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
1200
1176
|
Ok(df.into())
|
1201
1177
|
}
|
1178
|
+
|
1179
|
+
pub fn clear(&self) -> Self {
|
1180
|
+
self.df.borrow().clear().into()
|
1181
|
+
}
|
1202
1182
|
}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
1
3
|
use crate::RbExpr;
|
2
4
|
|
3
5
|
impl RbExpr {
|
@@ -12,4 +14,76 @@ impl RbExpr {
|
|
12
14
|
pub fn array_sum(&self) -> Self {
|
13
15
|
self.inner.clone().arr().sum().into()
|
14
16
|
}
|
17
|
+
|
18
|
+
pub fn arr_unique(&self, maintain_order: bool) -> Self {
|
19
|
+
if maintain_order {
|
20
|
+
self.inner.clone().arr().unique_stable().into()
|
21
|
+
} else {
|
22
|
+
self.inner.clone().arr().unique().into()
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arr_to_list(&self) -> Self {
|
27
|
+
self.inner.clone().arr().to_list().into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn arr_all(&self) -> Self {
|
31
|
+
self.inner.clone().arr().all().into()
|
32
|
+
}
|
33
|
+
|
34
|
+
pub fn arr_any(&self) -> Self {
|
35
|
+
self.inner.clone().arr().any().into()
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
39
|
+
self.inner
|
40
|
+
.clone()
|
41
|
+
.arr()
|
42
|
+
.sort(SortOptions {
|
43
|
+
descending,
|
44
|
+
nulls_last,
|
45
|
+
..Default::default()
|
46
|
+
})
|
47
|
+
.into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn arr_reverse(&self) -> Self {
|
51
|
+
self.inner.clone().arr().reverse().into()
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn arr_arg_min(&self) -> Self {
|
55
|
+
self.inner.clone().arr().arg_min().into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn arr_arg_max(&self) -> Self {
|
59
|
+
self.inner.clone().arr().arg_max().into()
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn arr_get(&self, index: &RbExpr) -> Self {
|
63
|
+
self.inner.clone().arr().get(index.inner.clone()).into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
|
67
|
+
self.inner
|
68
|
+
.clone()
|
69
|
+
.arr()
|
70
|
+
.join(separator.inner.clone(), ignore_nulls)
|
71
|
+
.into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn arr_contains(&self, other: &RbExpr) -> Self {
|
75
|
+
self.inner
|
76
|
+
.clone()
|
77
|
+
.arr()
|
78
|
+
.contains(other.inner.clone())
|
79
|
+
.into()
|
80
|
+
}
|
81
|
+
|
82
|
+
pub fn arr_count_matches(&self, expr: &RbExpr) -> Self {
|
83
|
+
self.inner
|
84
|
+
.clone()
|
85
|
+
.arr()
|
86
|
+
.count_matches(expr.inner.clone())
|
87
|
+
.into()
|
88
|
+
}
|
15
89
|
}
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -61,6 +61,14 @@ impl RbExpr {
|
|
61
61
|
self.inner.clone().dt().month_end().into()
|
62
62
|
}
|
63
63
|
|
64
|
+
pub fn dt_base_utc_offset(&self) -> Self {
|
65
|
+
self.inner.clone().dt().base_utc_offset().into()
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn dt_dst_offset(&self) -> Self {
|
69
|
+
self.inner.clone().dt().dst_offset().into()
|
70
|
+
}
|
71
|
+
|
64
72
|
pub fn dt_round(&self, every: String, offset: String) -> Self {
|
65
73
|
self.inner.clone().dt().round(&every, &offset).into()
|
66
74
|
}
|
@@ -149,73 +157,31 @@ impl RbExpr {
|
|
149
157
|
self.inner.clone().dt().timestamp(tu.0).into()
|
150
158
|
}
|
151
159
|
|
152
|
-
pub fn
|
153
|
-
self.inner
|
154
|
-
.clone()
|
155
|
-
.map(
|
156
|
-
|s| Ok(Some(s.duration()?.days().into_series())),
|
157
|
-
GetOutput::from_type(DataType::Int64),
|
158
|
-
)
|
159
|
-
.into()
|
160
|
+
pub fn dt_total_days(&self) -> Self {
|
161
|
+
self.inner.clone().dt().total_days().into()
|
160
162
|
}
|
161
163
|
|
162
|
-
pub fn
|
163
|
-
self.inner
|
164
|
-
.clone()
|
165
|
-
.map(
|
166
|
-
|s| Ok(Some(s.duration()?.hours().into_series())),
|
167
|
-
GetOutput::from_type(DataType::Int64),
|
168
|
-
)
|
169
|
-
.into()
|
164
|
+
pub fn dt_total_hours(&self) -> Self {
|
165
|
+
self.inner.clone().dt().total_hours().into()
|
170
166
|
}
|
171
167
|
|
172
|
-
pub fn
|
173
|
-
self.inner
|
174
|
-
.clone()
|
175
|
-
.map(
|
176
|
-
|s| Ok(Some(s.duration()?.minutes().into_series())),
|
177
|
-
GetOutput::from_type(DataType::Int64),
|
178
|
-
)
|
179
|
-
.into()
|
168
|
+
pub fn dt_total_minutes(&self) -> Self {
|
169
|
+
self.inner.clone().dt().total_minutes().into()
|
180
170
|
}
|
181
171
|
|
182
|
-
pub fn
|
183
|
-
self.inner
|
184
|
-
.clone()
|
185
|
-
.map(
|
186
|
-
|s| Ok(Some(s.duration()?.seconds().into_series())),
|
187
|
-
GetOutput::from_type(DataType::Int64),
|
188
|
-
)
|
189
|
-
.into()
|
172
|
+
pub fn dt_total_seconds(&self) -> Self {
|
173
|
+
self.inner.clone().dt().total_seconds().into()
|
190
174
|
}
|
191
175
|
|
192
|
-
pub fn
|
193
|
-
self.inner
|
194
|
-
.clone()
|
195
|
-
.map(
|
196
|
-
|s| Ok(Some(s.duration()?.milliseconds().into_series())),
|
197
|
-
GetOutput::from_type(DataType::Int64),
|
198
|
-
)
|
199
|
-
.into()
|
176
|
+
pub fn dt_total_milliseconds(&self) -> Self {
|
177
|
+
self.inner.clone().dt().total_milliseconds().into()
|
200
178
|
}
|
201
179
|
|
202
|
-
pub fn
|
203
|
-
self.inner
|
204
|
-
.clone()
|
205
|
-
.map(
|
206
|
-
|s| Ok(Some(s.duration()?.microseconds().into_series())),
|
207
|
-
GetOutput::from_type(DataType::Int64),
|
208
|
-
)
|
209
|
-
.into()
|
180
|
+
pub fn dt_total_microseconds(&self) -> Self {
|
181
|
+
self.inner.clone().dt().total_microseconds().into()
|
210
182
|
}
|
211
183
|
|
212
|
-
pub fn
|
213
|
-
self.inner
|
214
|
-
.clone()
|
215
|
-
.map(
|
216
|
-
|s| Ok(Some(s.duration()?.nanoseconds().into_series())),
|
217
|
-
GetOutput::from_type(DataType::Int64),
|
218
|
-
)
|
219
|
-
.into()
|
184
|
+
pub fn dt_total_nanoseconds(&self) -> Self {
|
185
|
+
self.inner.clone().dt().total_nanoseconds().into()
|
220
186
|
}
|
221
187
|
}
|