polars-df 0.7.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -0
- data/Cargo.lock +353 -237
- data/Cargo.toml +0 -3
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/polars/Cargo.toml +17 -6
- data/ext/polars/src/batched_csv.rs +6 -7
- data/ext/polars/src/conversion/anyvalue.rs +185 -0
- data/ext/polars/src/conversion/chunked_array.rs +140 -0
- data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
- data/ext/polars/src/dataframe.rs +96 -116
- data/ext/polars/src/expr/array.rs +74 -0
- data/ext/polars/src/expr/categorical.rs +8 -1
- data/ext/polars/src/expr/datetime.rs +22 -56
- data/ext/polars/src/expr/general.rs +124 -37
- data/ext/polars/src/expr/list.rs +52 -4
- data/ext/polars/src/expr/meta.rs +48 -0
- data/ext/polars/src/expr/rolling.rs +16 -10
- data/ext/polars/src/expr/string.rs +68 -17
- data/ext/polars/src/expr/struct.rs +8 -4
- data/ext/polars/src/functions/aggregation.rs +6 -0
- data/ext/polars/src/functions/lazy.rs +103 -48
- data/ext/polars/src/functions/meta.rs +45 -1
- data/ext/polars/src/functions/range.rs +5 -10
- data/ext/polars/src/functions/string_cache.rs +14 -0
- data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
- data/ext/polars/src/lib.rs +245 -187
- data/ext/polars/src/map/dataframe.rs +1 -1
- data/ext/polars/src/map/mod.rs +2 -2
- data/ext/polars/src/map/series.rs +6 -6
- data/ext/polars/src/object.rs +0 -30
- data/ext/polars/src/on_startup.rs +32 -0
- data/ext/polars/src/series/aggregation.rs +23 -0
- data/ext/polars/src/series/construction.rs +1 -1
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
- data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
- data/ext/polars/src/utils.rs +1 -1
- data/lib/polars/array_expr.rb +449 -0
- data/lib/polars/array_name_space.rb +346 -0
- data/lib/polars/cat_expr.rb +24 -0
- data/lib/polars/cat_name_space.rb +75 -0
- data/lib/polars/config.rb +2 -2
- data/lib/polars/data_frame.rb +248 -108
- data/lib/polars/data_types.rb +195 -29
- data/lib/polars/date_time_expr.rb +41 -24
- data/lib/polars/date_time_name_space.rb +12 -12
- data/lib/polars/exceptions.rb +12 -1
- data/lib/polars/expr.rb +1080 -195
- data/lib/polars/functions/aggregation/horizontal.rb +246 -0
- data/lib/polars/functions/aggregation/vertical.rb +282 -0
- data/lib/polars/functions/as_datatype.rb +248 -0
- data/lib/polars/functions/col.rb +47 -0
- data/lib/polars/functions/eager.rb +182 -0
- data/lib/polars/functions/lazy.rb +1280 -0
- data/lib/polars/functions/len.rb +49 -0
- data/lib/polars/functions/lit.rb +35 -0
- data/lib/polars/functions/random.rb +16 -0
- data/lib/polars/functions/range/date_range.rb +103 -0
- data/lib/polars/functions/range/int_range.rb +51 -0
- data/lib/polars/functions/repeat.rb +144 -0
- data/lib/polars/functions/whenthen.rb +27 -0
- data/lib/polars/functions.rb +29 -416
- data/lib/polars/group_by.rb +3 -3
- data/lib/polars/io.rb +21 -28
- data/lib/polars/lazy_frame.rb +390 -76
- data/lib/polars/list_expr.rb +152 -6
- data/lib/polars/list_name_space.rb +102 -0
- data/lib/polars/meta_expr.rb +175 -7
- data/lib/polars/series.rb +557 -59
- data/lib/polars/sql_context.rb +1 -1
- data/lib/polars/string_cache.rb +75 -0
- data/lib/polars/string_expr.rb +412 -96
- data/lib/polars/string_name_space.rb +4 -4
- data/lib/polars/struct_expr.rb +1 -1
- data/lib/polars/struct_name_space.rb +1 -1
- data/lib/polars/testing.rb +507 -0
- data/lib/polars/utils.rb +64 -20
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -2
- metadata +40 -9
- data/lib/polars/lazy_functions.rb +0 -1197
data/ext/polars/src/dataframe.rs
CHANGED
@@ -6,12 +6,13 @@ use polars::frame::row::{rows_to_schema_supertypes, Row};
|
|
6
6
|
use polars::frame::NullStrategy;
|
7
7
|
use polars::io::avro::AvroCompression;
|
8
8
|
use polars::io::mmap::ReaderBytes;
|
9
|
-
use polars::io::
|
9
|
+
use polars::io::RowIndex;
|
10
10
|
use polars::prelude::pivot::{pivot, pivot_stable};
|
11
11
|
use polars::prelude::*;
|
12
12
|
use polars_core::utils::try_get_supertype;
|
13
13
|
use std::cell::RefCell;
|
14
14
|
use std::io::{BufWriter, Cursor};
|
15
|
+
use std::num::NonZeroUsize;
|
15
16
|
use std::ops::Deref;
|
16
17
|
|
17
18
|
use crate::conversion::*;
|
@@ -45,44 +46,51 @@ impl RbDataFrame {
|
|
45
46
|
fn finish_from_rows(
|
46
47
|
rows: Vec<Row>,
|
47
48
|
infer_schema_length: Option<usize>,
|
48
|
-
|
49
|
+
schema: Option<Schema>,
|
50
|
+
schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
|
49
51
|
) -> RbResult<Self> {
|
50
|
-
//
|
51
|
-
crate::
|
52
|
+
// Object builder must be registered
|
53
|
+
crate::on_startup::register_object_builder();
|
52
54
|
|
53
|
-
let
|
55
|
+
let mut final_schema =
|
54
56
|
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
|
55
57
|
.map_err(RbPolarsErr::from)?;
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
62
|
-
DataType::Decimal(_, _) => {
|
63
|
-
fld.coerce(DataType::Decimal(None, None));
|
64
|
-
fld
|
58
|
+
|
59
|
+
// Erase scale from inferred decimals.
|
60
|
+
for dtype in final_schema.iter_dtypes_mut() {
|
61
|
+
if let DataType::Decimal(_, _) = dtype {
|
62
|
+
*dtype = DataType::Decimal(None, None)
|
65
63
|
}
|
66
|
-
|
67
|
-
});
|
68
|
-
let mut schema = Schema::from_iter(fields);
|
64
|
+
}
|
69
65
|
|
70
|
-
|
71
|
-
|
72
|
-
|
66
|
+
// Integrate explicit/inferred schema.
|
67
|
+
if let Some(schema) = schema {
|
68
|
+
for (i, (name, dtype)) in schema.into_iter().enumerate() {
|
69
|
+
if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
|
73
70
|
*name_ = name;
|
74
71
|
|
75
|
-
//
|
72
|
+
// If schema dtype is Unknown, overwrite with inferred datatype.
|
76
73
|
if !matches!(dtype, DataType::Unknown) {
|
77
74
|
*dtype_ = dtype;
|
78
75
|
}
|
79
76
|
} else {
|
80
|
-
|
77
|
+
final_schema.with_column(name, dtype);
|
81
78
|
}
|
82
79
|
}
|
83
80
|
}
|
84
81
|
|
85
|
-
|
82
|
+
// Optional per-field overrides; these supersede default/inferred dtypes.
|
83
|
+
if let Some(overrides) = schema_overrides_by_idx {
|
84
|
+
for (i, dtype) in overrides {
|
85
|
+
if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
|
86
|
+
if !matches!(dtype, DataType::Unknown) {
|
87
|
+
*dtype_ = dtype;
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
}
|
92
|
+
let df =
|
93
|
+
DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
|
86
94
|
Ok(df.into())
|
87
95
|
}
|
88
96
|
|
@@ -120,21 +128,20 @@ impl RbDataFrame {
|
|
120
128
|
// TODO fix
|
121
129
|
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
130
|
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
-
let
|
131
|
+
let comment_prefix = Option::<String>::try_convert(arguments[17])?;
|
124
132
|
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
133
|
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
134
|
let try_parse_dates = bool::try_convert(arguments[20])?;
|
127
135
|
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
128
|
-
let
|
136
|
+
let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
129
137
|
let sample_size = usize::try_convert(arguments[23])?;
|
130
138
|
let eol_char = String::try_convert(arguments[24])?;
|
131
139
|
// end arguments
|
132
140
|
|
133
141
|
let null_values = null_values.map(|w| w.0);
|
134
|
-
let comment_char = comment_char.map(|s| s.as_bytes()[0]);
|
135
142
|
let eol_char = eol_char.as_bytes()[0];
|
136
143
|
|
137
|
-
let
|
144
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
138
145
|
|
139
146
|
let quote_char = if let Some(s) = quote_char {
|
140
147
|
if s.is_empty() {
|
@@ -181,13 +188,13 @@ impl RbDataFrame {
|
|
181
188
|
.with_dtypes(overwrite_dtype.map(Arc::new))
|
182
189
|
.with_dtypes_slice(overwrite_dtype_slice.as_deref())
|
183
190
|
.low_memory(low_memory)
|
184
|
-
.
|
191
|
+
.with_comment_prefix(comment_prefix.as_deref())
|
185
192
|
.with_null_values(null_values)
|
186
193
|
.with_try_parse_dates(try_parse_dates)
|
187
194
|
.with_quote_char(quote_char)
|
188
195
|
.with_end_of_line_char(eol_char)
|
189
196
|
.with_skip_rows_after_header(skip_rows_after_header)
|
190
|
-
.
|
197
|
+
.with_row_index(row_index)
|
191
198
|
.sample_size(sample_size)
|
192
199
|
.finish()
|
193
200
|
.map_err(RbPolarsErr::from)?;
|
@@ -201,19 +208,19 @@ impl RbDataFrame {
|
|
201
208
|
projection: Option<Vec<usize>>,
|
202
209
|
n_rows: Option<usize>,
|
203
210
|
parallel: Wrap<ParallelStrategy>,
|
204
|
-
|
211
|
+
row_index: Option<(String, IdxSize)>,
|
205
212
|
low_memory: bool,
|
206
213
|
use_statistics: bool,
|
207
214
|
rechunk: bool,
|
208
215
|
) -> RbResult<Self> {
|
209
|
-
let
|
216
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
210
217
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
211
218
|
let df = ParquetReader::new(mmap_bytes_r)
|
212
219
|
.with_projection(projection)
|
213
220
|
.with_columns(columns)
|
214
221
|
.read_parallel(parallel.0)
|
215
222
|
.with_n_rows(n_rows)
|
216
|
-
.
|
223
|
+
.with_row_index(row_index)
|
217
224
|
.set_low_memory(low_memory)
|
218
225
|
.use_statistics(use_statistics)
|
219
226
|
.set_rechunk(rechunk)
|
@@ -227,16 +234,16 @@ impl RbDataFrame {
|
|
227
234
|
columns: Option<Vec<String>>,
|
228
235
|
projection: Option<Vec<usize>>,
|
229
236
|
n_rows: Option<usize>,
|
230
|
-
|
237
|
+
row_index: Option<(String, IdxSize)>,
|
231
238
|
memory_map: bool,
|
232
239
|
) -> RbResult<Self> {
|
233
|
-
let
|
240
|
+
let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
|
234
241
|
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
|
235
242
|
let df = IpcReader::new(mmap_bytes_r)
|
236
243
|
.with_projection(projection)
|
237
244
|
.with_columns(columns)
|
238
245
|
.with_n_rows(n_rows)
|
239
|
-
.
|
246
|
+
.with_row_index(row_index)
|
240
247
|
.memory_mapped(memory_map)
|
241
248
|
.finish()
|
242
249
|
.map_err(RbPolarsErr::from)?;
|
@@ -297,12 +304,18 @@ impl RbDataFrame {
|
|
297
304
|
Ok(df) => Ok(df.into()),
|
298
305
|
// try arrow json reader instead
|
299
306
|
// this is row oriented
|
300
|
-
Err(
|
301
|
-
let
|
302
|
-
|
303
|
-
.
|
304
|
-
|
305
|
-
|
307
|
+
Err(e) => {
|
308
|
+
let msg = format!("{e}");
|
309
|
+
if msg.contains("successful parse invalid data") {
|
310
|
+
let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
|
311
|
+
Err(e)
|
312
|
+
} else {
|
313
|
+
let out = JsonReader::new(mmap_bytes_r)
|
314
|
+
.with_json_format(JsonFormat::Json)
|
315
|
+
.finish()
|
316
|
+
.map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
|
317
|
+
Ok(out.into())
|
318
|
+
}
|
306
319
|
}
|
307
320
|
}
|
308
321
|
}
|
@@ -347,7 +360,7 @@ impl RbDataFrame {
|
|
347
360
|
pub fn read_rows(
|
348
361
|
rb_rows: RArray,
|
349
362
|
infer_schema_length: Option<usize>,
|
350
|
-
|
363
|
+
schema: Option<Wrap<Schema>>,
|
351
364
|
) -> RbResult<Self> {
|
352
365
|
let mut rows = Vec::with_capacity(rb_rows.len());
|
353
366
|
for v in rb_rows.each() {
|
@@ -358,30 +371,34 @@ impl RbDataFrame {
|
|
358
371
|
}
|
359
372
|
rows.push(Row(row));
|
360
373
|
}
|
361
|
-
Self::finish_from_rows(
|
362
|
-
rows,
|
363
|
-
infer_schema_length,
|
364
|
-
schema_overwrite.map(|wrap| wrap.0),
|
365
|
-
)
|
374
|
+
Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
|
366
375
|
}
|
367
376
|
|
368
377
|
pub fn read_hashes(
|
369
378
|
dicts: Value,
|
370
379
|
infer_schema_length: Option<usize>,
|
371
|
-
|
380
|
+
schema: Option<Wrap<Schema>>,
|
381
|
+
schema_overrides: Option<Wrap<Schema>>,
|
372
382
|
) -> RbResult<Self> {
|
373
|
-
let
|
383
|
+
let mut schema_columns = PlIndexSet::new();
|
384
|
+
if let Some(s) = &schema {
|
385
|
+
schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
|
386
|
+
}
|
387
|
+
let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
|
374
388
|
|
375
|
-
|
376
|
-
if let Some(
|
377
|
-
for (
|
378
|
-
|
389
|
+
let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
|
390
|
+
if let Some(overrides) = schema_overrides {
|
391
|
+
for (idx, name) in names.iter().enumerate() {
|
392
|
+
if let Some(dtype) = overrides.0.get(name) {
|
393
|
+
schema_overrides_by_idx.push((idx, dtype.clone()));
|
394
|
+
}
|
379
395
|
}
|
380
396
|
}
|
381
397
|
let rbdf = Self::finish_from_rows(
|
382
398
|
rows,
|
383
399
|
infer_schema_length,
|
384
|
-
|
400
|
+
schema.map(|wrap| wrap.0),
|
401
|
+
Some(schema_overrides_by_idx),
|
385
402
|
)?;
|
386
403
|
|
387
404
|
unsafe {
|
@@ -422,13 +439,14 @@ impl RbDataFrame {
|
|
422
439
|
include_header: bool,
|
423
440
|
separator: u8,
|
424
441
|
quote_char: u8,
|
425
|
-
batch_size:
|
442
|
+
batch_size: Wrap<NonZeroUsize>,
|
426
443
|
datetime_format: Option<String>,
|
427
444
|
date_format: Option<String>,
|
428
445
|
time_format: Option<String>,
|
429
446
|
float_precision: Option<usize>,
|
430
447
|
null_value: Option<String>,
|
431
448
|
) -> RbResult<()> {
|
449
|
+
let batch_size = batch_size.0;
|
432
450
|
let null = null_value.unwrap_or_default();
|
433
451
|
|
434
452
|
if let Ok(s) = String::try_convert(rb_f) {
|
@@ -504,7 +522,7 @@ impl RbDataFrame {
|
|
504
522
|
.get_columns()
|
505
523
|
.iter()
|
506
524
|
.map(|s| match s.dtype() {
|
507
|
-
DataType::Object(_) => {
|
525
|
+
DataType::Object(_, _) => {
|
508
526
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
509
527
|
obj.unwrap().to_object()
|
510
528
|
}
|
@@ -523,7 +541,7 @@ impl RbDataFrame {
|
|
523
541
|
.get_columns()
|
524
542
|
.iter()
|
525
543
|
.map(|s| match s.dtype() {
|
526
|
-
DataType::Object(_) => {
|
544
|
+
DataType::Object(_, _) => {
|
527
545
|
let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
|
528
546
|
obj.unwrap().to_object()
|
529
547
|
}
|
@@ -785,16 +803,15 @@ impl RbDataFrame {
|
|
785
803
|
.map(|s| RbSeries::new(s.clone()))
|
786
804
|
}
|
787
805
|
|
788
|
-
pub fn
|
789
|
-
self.df.borrow().
|
806
|
+
pub fn get_column_index(&self, name: String) -> Option<usize> {
|
807
|
+
self.df.borrow().get_column_index(&name)
|
790
808
|
}
|
791
809
|
|
792
|
-
|
793
|
-
pub fn column(&self, name: String) -> RbResult<RbSeries> {
|
810
|
+
pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
|
794
811
|
self.df
|
795
812
|
.borrow()
|
796
813
|
.column(&name)
|
797
|
-
.map(|
|
814
|
+
.map(|s| RbSeries::new(s.clone()))
|
798
815
|
.map_err(RbPolarsErr::from)
|
799
816
|
}
|
800
817
|
|
@@ -828,18 +845,18 @@ impl RbDataFrame {
|
|
828
845
|
Ok(())
|
829
846
|
}
|
830
847
|
|
831
|
-
pub fn
|
848
|
+
pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
832
849
|
self.df
|
833
850
|
.borrow_mut()
|
834
|
-
.
|
851
|
+
.replace_column(index, new_col.series.borrow().clone())
|
835
852
|
.map_err(RbPolarsErr::from)?;
|
836
853
|
Ok(())
|
837
854
|
}
|
838
855
|
|
839
|
-
pub fn
|
856
|
+
pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
|
840
857
|
self.df
|
841
858
|
.borrow_mut()
|
842
|
-
.
|
859
|
+
.insert_column(index, new_col.series.borrow().clone())
|
843
860
|
.map_err(RbPolarsErr::from)?;
|
844
861
|
Ok(())
|
845
862
|
}
|
@@ -874,19 +891,19 @@ impl RbDataFrame {
|
|
874
891
|
Ok(mask.into_series().into())
|
875
892
|
}
|
876
893
|
|
877
|
-
pub fn
|
894
|
+
pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
|
878
895
|
if null_equal {
|
879
|
-
self.df.borrow().
|
896
|
+
self.df.borrow().equals_missing(&other.df.borrow())
|
880
897
|
} else {
|
881
|
-
self.df.borrow().
|
898
|
+
self.df.borrow().equals(&other.df.borrow())
|
882
899
|
}
|
883
900
|
}
|
884
901
|
|
885
|
-
pub fn
|
902
|
+
pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
|
886
903
|
let df = self
|
887
904
|
.df
|
888
905
|
.borrow()
|
889
|
-
.
|
906
|
+
.with_row_index(&name, offset)
|
890
907
|
.map_err(RbPolarsErr::from)?;
|
891
908
|
Ok(df.into())
|
892
909
|
}
|
@@ -917,9 +934,9 @@ impl RbDataFrame {
|
|
917
934
|
#[allow(clippy::too_many_arguments)]
|
918
935
|
pub fn pivot_expr(
|
919
936
|
&self,
|
920
|
-
values: Vec<String>,
|
921
937
|
index: Vec<String>,
|
922
938
|
columns: Vec<String>,
|
939
|
+
values: Option<Vec<String>>,
|
923
940
|
maintain_order: bool,
|
924
941
|
sort_columns: bool,
|
925
942
|
aggregate_expr: Option<&RbExpr>,
|
@@ -932,9 +949,9 @@ impl RbDataFrame {
|
|
932
949
|
let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
|
933
950
|
let df = fun(
|
934
951
|
&self.df.borrow(),
|
935
|
-
values,
|
936
952
|
index,
|
937
953
|
columns,
|
954
|
+
values,
|
938
955
|
sort_columns,
|
939
956
|
agg_expr,
|
940
957
|
separator.as_deref(),
|
@@ -966,34 +983,6 @@ impl RbDataFrame {
|
|
966
983
|
self.df.borrow().clone().lazy().into()
|
967
984
|
}
|
968
985
|
|
969
|
-
pub fn max(&self) -> Self {
|
970
|
-
self.df.borrow().max().into()
|
971
|
-
}
|
972
|
-
|
973
|
-
pub fn min(&self) -> Self {
|
974
|
-
self.df.borrow().min().into()
|
975
|
-
}
|
976
|
-
|
977
|
-
pub fn sum(&self) -> Self {
|
978
|
-
self.df.borrow().sum().into()
|
979
|
-
}
|
980
|
-
|
981
|
-
pub fn mean(&self) -> Self {
|
982
|
-
self.df.borrow().mean().into()
|
983
|
-
}
|
984
|
-
|
985
|
-
pub fn std(&self, ddof: u8) -> Self {
|
986
|
-
self.df.borrow().std(ddof).into()
|
987
|
-
}
|
988
|
-
|
989
|
-
pub fn var(&self, ddof: u8) -> Self {
|
990
|
-
self.df.borrow().var(ddof).into()
|
991
|
-
}
|
992
|
-
|
993
|
-
pub fn median(&self) -> Self {
|
994
|
-
self.df.borrow().median().into()
|
995
|
-
}
|
996
|
-
|
997
986
|
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
998
987
|
let s = self
|
999
988
|
.df
|
@@ -1040,19 +1029,6 @@ impl RbDataFrame {
|
|
1040
1029
|
Ok(s.map(|s| s.into()))
|
1041
1030
|
}
|
1042
1031
|
|
1043
|
-
pub fn quantile(
|
1044
|
-
&self,
|
1045
|
-
quantile: f64,
|
1046
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
1047
|
-
) -> RbResult<Self> {
|
1048
|
-
let df = self
|
1049
|
-
.df
|
1050
|
-
.borrow()
|
1051
|
-
.quantile(quantile, interpolation.0)
|
1052
|
-
.map_err(RbPolarsErr::from)?;
|
1053
|
-
Ok(df.into())
|
1054
|
-
}
|
1055
|
-
|
1056
1032
|
pub fn to_dummies(
|
1057
1033
|
&self,
|
1058
1034
|
columns: Option<Vec<String>>,
|
@@ -1124,7 +1100,7 @@ impl RbDataFrame {
|
|
1124
1100
|
.into_datetime(tu, tz)
|
1125
1101
|
.into_series()
|
1126
1102
|
}
|
1127
|
-
Some(DataType::
|
1103
|
+
Some(DataType::String) => {
|
1128
1104
|
apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
|
1129
1105
|
}
|
1130
1106
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
@@ -1157,7 +1133,7 @@ impl RbDataFrame {
|
|
1157
1133
|
};
|
1158
1134
|
Ok(self
|
1159
1135
|
.df
|
1160
|
-
.
|
1136
|
+
.borrow_mut()
|
1161
1137
|
.transpose(keep_names_as.as_deref(), new_col_names)
|
1162
1138
|
.map_err(RbPolarsErr::from)?
|
1163
1139
|
.into())
|
@@ -1199,4 +1175,8 @@ impl RbDataFrame {
|
|
1199
1175
|
let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
|
1200
1176
|
Ok(df.into())
|
1201
1177
|
}
|
1178
|
+
|
1179
|
+
pub fn clear(&self) -> Self {
|
1180
|
+
self.df.borrow().clear().into()
|
1181
|
+
}
|
1202
1182
|
}
|
@@ -1,3 +1,5 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
1
3
|
use crate::RbExpr;
|
2
4
|
|
3
5
|
impl RbExpr {
|
@@ -12,4 +14,76 @@ impl RbExpr {
|
|
12
14
|
pub fn array_sum(&self) -> Self {
|
13
15
|
self.inner.clone().arr().sum().into()
|
14
16
|
}
|
17
|
+
|
18
|
+
pub fn arr_unique(&self, maintain_order: bool) -> Self {
|
19
|
+
if maintain_order {
|
20
|
+
self.inner.clone().arr().unique_stable().into()
|
21
|
+
} else {
|
22
|
+
self.inner.clone().arr().unique().into()
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
pub fn arr_to_list(&self) -> Self {
|
27
|
+
self.inner.clone().arr().to_list().into()
|
28
|
+
}
|
29
|
+
|
30
|
+
pub fn arr_all(&self) -> Self {
|
31
|
+
self.inner.clone().arr().all().into()
|
32
|
+
}
|
33
|
+
|
34
|
+
pub fn arr_any(&self) -> Self {
|
35
|
+
self.inner.clone().arr().any().into()
|
36
|
+
}
|
37
|
+
|
38
|
+
pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
|
39
|
+
self.inner
|
40
|
+
.clone()
|
41
|
+
.arr()
|
42
|
+
.sort(SortOptions {
|
43
|
+
descending,
|
44
|
+
nulls_last,
|
45
|
+
..Default::default()
|
46
|
+
})
|
47
|
+
.into()
|
48
|
+
}
|
49
|
+
|
50
|
+
pub fn arr_reverse(&self) -> Self {
|
51
|
+
self.inner.clone().arr().reverse().into()
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn arr_arg_min(&self) -> Self {
|
55
|
+
self.inner.clone().arr().arg_min().into()
|
56
|
+
}
|
57
|
+
|
58
|
+
pub fn arr_arg_max(&self) -> Self {
|
59
|
+
self.inner.clone().arr().arg_max().into()
|
60
|
+
}
|
61
|
+
|
62
|
+
pub fn arr_get(&self, index: &RbExpr) -> Self {
|
63
|
+
self.inner.clone().arr().get(index.inner.clone()).into()
|
64
|
+
}
|
65
|
+
|
66
|
+
pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
|
67
|
+
self.inner
|
68
|
+
.clone()
|
69
|
+
.arr()
|
70
|
+
.join(separator.inner.clone(), ignore_nulls)
|
71
|
+
.into()
|
72
|
+
}
|
73
|
+
|
74
|
+
pub fn arr_contains(&self, other: &RbExpr) -> Self {
|
75
|
+
self.inner
|
76
|
+
.clone()
|
77
|
+
.arr()
|
78
|
+
.contains(other.inner.clone())
|
79
|
+
.into()
|
80
|
+
}
|
81
|
+
|
82
|
+
pub fn arr_count_matches(&self, expr: &RbExpr) -> Self {
|
83
|
+
self.inner
|
84
|
+
.clone()
|
85
|
+
.arr()
|
86
|
+
.count_matches(expr.inner.clone())
|
87
|
+
.into()
|
88
|
+
}
|
15
89
|
}
|
@@ -5,6 +5,13 @@ use crate::RbExpr;
|
|
5
5
|
|
6
6
|
impl RbExpr {
|
7
7
|
pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
|
8
|
-
self.inner
|
8
|
+
self.inner
|
9
|
+
.clone()
|
10
|
+
.cast(DataType::Categorical(None, ordering.0))
|
11
|
+
.into()
|
12
|
+
}
|
13
|
+
|
14
|
+
pub fn cat_get_categories(&self) -> Self {
|
15
|
+
self.inner.clone().cat().get_categories().into()
|
9
16
|
}
|
10
17
|
}
|
@@ -61,6 +61,14 @@ impl RbExpr {
|
|
61
61
|
self.inner.clone().dt().month_end().into()
|
62
62
|
}
|
63
63
|
|
64
|
+
pub fn dt_base_utc_offset(&self) -> Self {
|
65
|
+
self.inner.clone().dt().base_utc_offset().into()
|
66
|
+
}
|
67
|
+
|
68
|
+
pub fn dt_dst_offset(&self) -> Self {
|
69
|
+
self.inner.clone().dt().dst_offset().into()
|
70
|
+
}
|
71
|
+
|
64
72
|
pub fn dt_round(&self, every: String, offset: String) -> Self {
|
65
73
|
self.inner.clone().dt().round(&every, &offset).into()
|
66
74
|
}
|
@@ -149,73 +157,31 @@ impl RbExpr {
|
|
149
157
|
self.inner.clone().dt().timestamp(tu.0).into()
|
150
158
|
}
|
151
159
|
|
152
|
-
pub fn
|
153
|
-
self.inner
|
154
|
-
.clone()
|
155
|
-
.map(
|
156
|
-
|s| Ok(Some(s.duration()?.days().into_series())),
|
157
|
-
GetOutput::from_type(DataType::Int64),
|
158
|
-
)
|
159
|
-
.into()
|
160
|
+
pub fn dt_total_days(&self) -> Self {
|
161
|
+
self.inner.clone().dt().total_days().into()
|
160
162
|
}
|
161
163
|
|
162
|
-
pub fn
|
163
|
-
self.inner
|
164
|
-
.clone()
|
165
|
-
.map(
|
166
|
-
|s| Ok(Some(s.duration()?.hours().into_series())),
|
167
|
-
GetOutput::from_type(DataType::Int64),
|
168
|
-
)
|
169
|
-
.into()
|
164
|
+
pub fn dt_total_hours(&self) -> Self {
|
165
|
+
self.inner.clone().dt().total_hours().into()
|
170
166
|
}
|
171
167
|
|
172
|
-
pub fn
|
173
|
-
self.inner
|
174
|
-
.clone()
|
175
|
-
.map(
|
176
|
-
|s| Ok(Some(s.duration()?.minutes().into_series())),
|
177
|
-
GetOutput::from_type(DataType::Int64),
|
178
|
-
)
|
179
|
-
.into()
|
168
|
+
pub fn dt_total_minutes(&self) -> Self {
|
169
|
+
self.inner.clone().dt().total_minutes().into()
|
180
170
|
}
|
181
171
|
|
182
|
-
pub fn
|
183
|
-
self.inner
|
184
|
-
.clone()
|
185
|
-
.map(
|
186
|
-
|s| Ok(Some(s.duration()?.seconds().into_series())),
|
187
|
-
GetOutput::from_type(DataType::Int64),
|
188
|
-
)
|
189
|
-
.into()
|
172
|
+
pub fn dt_total_seconds(&self) -> Self {
|
173
|
+
self.inner.clone().dt().total_seconds().into()
|
190
174
|
}
|
191
175
|
|
192
|
-
pub fn
|
193
|
-
self.inner
|
194
|
-
.clone()
|
195
|
-
.map(
|
196
|
-
|s| Ok(Some(s.duration()?.milliseconds().into_series())),
|
197
|
-
GetOutput::from_type(DataType::Int64),
|
198
|
-
)
|
199
|
-
.into()
|
176
|
+
pub fn dt_total_milliseconds(&self) -> Self {
|
177
|
+
self.inner.clone().dt().total_milliseconds().into()
|
200
178
|
}
|
201
179
|
|
202
|
-
pub fn
|
203
|
-
self.inner
|
204
|
-
.clone()
|
205
|
-
.map(
|
206
|
-
|s| Ok(Some(s.duration()?.microseconds().into_series())),
|
207
|
-
GetOutput::from_type(DataType::Int64),
|
208
|
-
)
|
209
|
-
.into()
|
180
|
+
pub fn dt_total_microseconds(&self) -> Self {
|
181
|
+
self.inner.clone().dt().total_microseconds().into()
|
210
182
|
}
|
211
183
|
|
212
|
-
pub fn
|
213
|
-
self.inner
|
214
|
-
.clone()
|
215
|
-
.map(
|
216
|
-
|s| Ok(Some(s.duration()?.nanoseconds().into_series())),
|
217
|
-
GetOutput::from_type(DataType::Int64),
|
218
|
-
)
|
219
|
-
.into()
|
184
|
+
pub fn dt_total_nanoseconds(&self) -> Self {
|
185
|
+
self.inner.clone().dt().total_nanoseconds().into()
|
220
186
|
}
|
221
187
|
}
|