polars-df 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/Cargo.lock +595 -709
- data/Cargo.toml +1 -0
- data/README.md +11 -9
- data/ext/polars/Cargo.toml +18 -10
- data/ext/polars/src/batched_csv.rs +26 -26
- data/ext/polars/src/conversion.rs +272 -136
- data/ext/polars/src/dataframe.rs +135 -94
- data/ext/polars/src/error.rs +8 -5
- data/ext/polars/src/expr/array.rs +15 -0
- data/ext/polars/src/expr/binary.rs +18 -6
- data/ext/polars/src/expr/datetime.rs +10 -12
- data/ext/polars/src/expr/general.rs +78 -264
- data/ext/polars/src/expr/list.rs +41 -28
- data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
- data/ext/polars/src/expr/name.rs +44 -0
- data/ext/polars/src/expr/rolling.rs +196 -0
- data/ext/polars/src/expr/string.rs +94 -66
- data/ext/polars/src/file.rs +3 -3
- data/ext/polars/src/functions/aggregation.rs +35 -0
- data/ext/polars/src/functions/eager.rs +7 -31
- data/ext/polars/src/functions/io.rs +10 -10
- data/ext/polars/src/functions/lazy.rs +119 -54
- data/ext/polars/src/functions/meta.rs +30 -0
- data/ext/polars/src/functions/misc.rs +8 -0
- data/ext/polars/src/functions/mod.rs +5 -0
- data/ext/polars/src/functions/random.rs +6 -0
- data/ext/polars/src/functions/range.rs +46 -0
- data/ext/polars/src/functions/string_cache.rs +11 -0
- data/ext/polars/src/functions/whenthen.rs +7 -7
- data/ext/polars/src/lazyframe.rs +61 -44
- data/ext/polars/src/lib.rs +173 -84
- data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
- data/ext/polars/src/{apply → map}/mod.rs +10 -6
- data/ext/polars/src/{apply → map}/series.rs +12 -16
- data/ext/polars/src/object.rs +2 -2
- data/ext/polars/src/rb_modules.rs +25 -6
- data/ext/polars/src/series/construction.rs +32 -6
- data/ext/polars/src/series/export.rs +2 -2
- data/ext/polars/src/series/set_at_idx.rs +33 -17
- data/ext/polars/src/series.rs +62 -42
- data/ext/polars/src/sql.rs +46 -0
- data/lib/polars/array_expr.rb +84 -0
- data/lib/polars/array_name_space.rb +77 -0
- data/lib/polars/batched_csv_reader.rb +1 -1
- data/lib/polars/config.rb +530 -0
- data/lib/polars/data_frame.rb +206 -131
- data/lib/polars/data_types.rb +163 -29
- data/lib/polars/date_time_expr.rb +13 -18
- data/lib/polars/date_time_name_space.rb +22 -28
- data/lib/polars/dynamic_group_by.rb +2 -2
- data/lib/polars/expr.rb +241 -151
- data/lib/polars/functions.rb +29 -38
- data/lib/polars/group_by.rb +38 -76
- data/lib/polars/io.rb +37 -2
- data/lib/polars/lazy_frame.rb +174 -95
- data/lib/polars/lazy_functions.rb +87 -63
- data/lib/polars/lazy_group_by.rb +7 -8
- data/lib/polars/list_expr.rb +40 -36
- data/lib/polars/list_name_space.rb +15 -15
- data/lib/polars/name_expr.rb +198 -0
- data/lib/polars/rolling_group_by.rb +6 -4
- data/lib/polars/series.rb +95 -28
- data/lib/polars/sql_context.rb +194 -0
- data/lib/polars/string_expr.rb +249 -69
- data/lib/polars/string_name_space.rb +155 -25
- data/lib/polars/utils.rb +119 -57
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +6 -0
- metadata +21 -7
- /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
data/ext/polars/src/dataframe.rs
CHANGED
@@ -1,4 +1,7 @@
|
|
1
|
-
use
|
1
|
+
use either::Either;
|
2
|
+
use magnus::{
|
3
|
+
prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
|
4
|
+
};
|
2
5
|
use polars::frame::row::{rows_to_schema_supertypes, Row};
|
3
6
|
use polars::frame::NullStrategy;
|
4
7
|
use polars::io::avro::AvroCompression;
|
@@ -11,12 +14,12 @@ use std::cell::RefCell;
|
|
11
14
|
use std::io::{BufWriter, Cursor};
|
12
15
|
use std::ops::Deref;
|
13
16
|
|
14
|
-
use crate::
|
17
|
+
use crate::conversion::*;
|
18
|
+
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
19
|
+
use crate::map::dataframe::{
|
15
20
|
apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
|
16
21
|
apply_lambda_with_utf8_out_type,
|
17
22
|
};
|
18
|
-
use crate::conversion::*;
|
19
|
-
use crate::file::{get_file_like, get_mmap_bytes_reader};
|
20
23
|
use crate::rb_modules;
|
21
24
|
use crate::series::{to_rbseries_collection, to_series_collection};
|
22
25
|
use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
|
@@ -53,7 +56,7 @@ impl RbDataFrame {
|
|
53
56
|
// replace inferred nulls with boolean
|
54
57
|
let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
|
55
58
|
DataType::Null => {
|
56
|
-
fld.coerce(DataType::Boolean);
|
59
|
+
// fld.coerce(DataType::Boolean);
|
57
60
|
fld
|
58
61
|
}
|
59
62
|
DataType::Decimal(_, _) => {
|
@@ -86,7 +89,7 @@ impl RbDataFrame {
|
|
86
89
|
pub fn init(columns: RArray) -> RbResult<Self> {
|
87
90
|
let mut cols = Vec::new();
|
88
91
|
for i in columns.each() {
|
89
|
-
cols.push(
|
92
|
+
cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
|
90
93
|
}
|
91
94
|
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
|
92
95
|
Ok(RbDataFrame::new(df))
|
@@ -99,32 +102,32 @@ impl RbDataFrame {
|
|
99
102
|
pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
|
100
103
|
// start arguments
|
101
104
|
// this pattern is needed for more than 16
|
102
|
-
let rb_f
|
103
|
-
let infer_schema_length
|
104
|
-
let chunk_size
|
105
|
-
let has_header
|
106
|
-
let ignore_errors
|
107
|
-
let n_rows
|
108
|
-
let skip_rows
|
109
|
-
let projection
|
110
|
-
let
|
111
|
-
let rechunk
|
112
|
-
let columns
|
113
|
-
let encoding
|
114
|
-
let n_threads
|
115
|
-
let path
|
116
|
-
let overwrite_dtype
|
105
|
+
let rb_f = arguments[0];
|
106
|
+
let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
|
107
|
+
let chunk_size = usize::try_convert(arguments[2])?;
|
108
|
+
let has_header = bool::try_convert(arguments[3])?;
|
109
|
+
let ignore_errors = bool::try_convert(arguments[4])?;
|
110
|
+
let n_rows = Option::<usize>::try_convert(arguments[5])?;
|
111
|
+
let skip_rows = usize::try_convert(arguments[6])?;
|
112
|
+
let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
|
113
|
+
let separator = String::try_convert(arguments[8])?;
|
114
|
+
let rechunk = bool::try_convert(arguments[9])?;
|
115
|
+
let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
|
116
|
+
let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
|
117
|
+
let n_threads = Option::<usize>::try_convert(arguments[12])?;
|
118
|
+
let path = Option::<String>::try_convert(arguments[13])?;
|
119
|
+
let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
|
117
120
|
// TODO fix
|
118
|
-
let overwrite_dtype_slice
|
119
|
-
let low_memory
|
120
|
-
let comment_char
|
121
|
-
let quote_char
|
122
|
-
let null_values
|
123
|
-
let try_parse_dates
|
124
|
-
let skip_rows_after_header
|
125
|
-
let row_count
|
126
|
-
let sample_size
|
127
|
-
let eol_char
|
121
|
+
let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
|
122
|
+
let low_memory = bool::try_convert(arguments[16])?;
|
123
|
+
let comment_char = Option::<String>::try_convert(arguments[17])?;
|
124
|
+
let quote_char = Option::<String>::try_convert(arguments[18])?;
|
125
|
+
let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
|
126
|
+
let try_parse_dates = bool::try_convert(arguments[20])?;
|
127
|
+
let skip_rows_after_header = usize::try_convert(arguments[21])?;
|
128
|
+
let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
|
129
|
+
let sample_size = usize::try_convert(arguments[23])?;
|
130
|
+
let eol_char = String::try_convert(arguments[24])?;
|
128
131
|
// end arguments
|
129
132
|
|
130
133
|
let null_values = null_values.map(|w| w.0);
|
@@ -165,7 +168,7 @@ impl RbDataFrame {
|
|
165
168
|
.infer_schema(infer_schema_length)
|
166
169
|
.has_header(has_header)
|
167
170
|
.with_n_rows(n_rows)
|
168
|
-
.
|
171
|
+
.with_separator(separator.as_bytes()[0])
|
169
172
|
.with_skip_rows(skip_rows)
|
170
173
|
.with_ignore_errors(ignore_errors)
|
171
174
|
.with_projection(projection)
|
@@ -265,7 +268,7 @@ impl RbDataFrame {
|
|
265
268
|
) -> RbResult<()> {
|
266
269
|
use polars::io::avro::AvroWriter;
|
267
270
|
|
268
|
-
if let Ok(s) =
|
271
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
269
272
|
let f = std::fs::File::create(s).unwrap();
|
270
273
|
AvroWriter::new(f)
|
271
274
|
.with_compression(compression.0)
|
@@ -341,6 +344,27 @@ impl RbDataFrame {
|
|
341
344
|
Ok(())
|
342
345
|
}
|
343
346
|
|
347
|
+
pub fn read_rows(
|
348
|
+
rb_rows: RArray,
|
349
|
+
infer_schema_length: Option<usize>,
|
350
|
+
schema_overwrite: Option<Wrap<Schema>>,
|
351
|
+
) -> RbResult<Self> {
|
352
|
+
let mut rows = Vec::with_capacity(rb_rows.len());
|
353
|
+
for v in rb_rows.each() {
|
354
|
+
let rb_row = RArray::try_convert(v?)?;
|
355
|
+
let mut row = Vec::with_capacity(rb_row.len());
|
356
|
+
for val in rb_row.each() {
|
357
|
+
row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
|
358
|
+
}
|
359
|
+
rows.push(Row(row));
|
360
|
+
}
|
361
|
+
Self::finish_from_rows(
|
362
|
+
rows,
|
363
|
+
infer_schema_length,
|
364
|
+
schema_overwrite.map(|wrap| wrap.0),
|
365
|
+
)
|
366
|
+
}
|
367
|
+
|
344
368
|
pub fn read_hashes(
|
345
369
|
dicts: Value,
|
346
370
|
infer_schema_length: Option<usize>,
|
@@ -395,9 +419,9 @@ impl RbDataFrame {
|
|
395
419
|
pub fn write_csv(
|
396
420
|
&self,
|
397
421
|
rb_f: Value,
|
398
|
-
|
399
|
-
|
400
|
-
|
422
|
+
include_header: bool,
|
423
|
+
separator: u8,
|
424
|
+
quote_char: u8,
|
401
425
|
batch_size: usize,
|
402
426
|
datetime_format: Option<String>,
|
403
427
|
date_format: Option<String>,
|
@@ -407,13 +431,13 @@ impl RbDataFrame {
|
|
407
431
|
) -> RbResult<()> {
|
408
432
|
let null = null_value.unwrap_or_default();
|
409
433
|
|
410
|
-
if let Ok(s) =
|
434
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
411
435
|
let f = std::fs::File::create(s).unwrap();
|
412
436
|
// no need for a buffered writer, because the csv writer does internal buffering
|
413
437
|
CsvWriter::new(f)
|
414
|
-
.
|
415
|
-
.
|
416
|
-
.
|
438
|
+
.include_header(include_header)
|
439
|
+
.with_separator(separator)
|
440
|
+
.with_quote_char(quote_char)
|
417
441
|
.with_batch_size(batch_size)
|
418
442
|
.with_datetime_format(datetime_format)
|
419
443
|
.with_date_format(date_format)
|
@@ -425,9 +449,9 @@ impl RbDataFrame {
|
|
425
449
|
} else {
|
426
450
|
let mut buf = Cursor::new(Vec::new());
|
427
451
|
CsvWriter::new(&mut buf)
|
428
|
-
.
|
429
|
-
.
|
430
|
-
.
|
452
|
+
.include_header(include_header)
|
453
|
+
.with_separator(separator)
|
454
|
+
.with_quote_char(quote_char)
|
431
455
|
.with_batch_size(batch_size)
|
432
456
|
.with_datetime_format(datetime_format)
|
433
457
|
.with_date_format(date_format)
|
@@ -449,19 +473,21 @@ impl RbDataFrame {
|
|
449
473
|
rb_f: Value,
|
450
474
|
compression: Wrap<Option<IpcCompression>>,
|
451
475
|
) -> RbResult<()> {
|
452
|
-
if let Ok(s) =
|
476
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
453
477
|
let f = std::fs::File::create(s).unwrap();
|
454
478
|
IpcWriter::new(f)
|
455
479
|
.with_compression(compression.0)
|
456
480
|
.finish(&mut self.df.borrow_mut())
|
457
481
|
.map_err(RbPolarsErr::from)?;
|
458
482
|
} else {
|
459
|
-
let mut buf =
|
460
|
-
|
483
|
+
let mut buf = Cursor::new(Vec::new());
|
461
484
|
IpcWriter::new(&mut buf)
|
462
485
|
.with_compression(compression.0)
|
463
486
|
.finish(&mut self.df.borrow_mut())
|
464
487
|
.map_err(RbPolarsErr::from)?;
|
488
|
+
// TODO less copying
|
489
|
+
let rb_str = RString::from_slice(&buf.into_inner());
|
490
|
+
rb_f.funcall::<_, _, Value>("write", (rb_str,))?;
|
465
491
|
}
|
466
492
|
Ok(())
|
467
493
|
}
|
@@ -485,7 +511,7 @@ impl RbDataFrame {
|
|
485
511
|
_ => Wrap(s.get(idx).unwrap()).into_value(),
|
486
512
|
}),
|
487
513
|
)
|
488
|
-
.
|
514
|
+
.as_value()
|
489
515
|
}
|
490
516
|
|
491
517
|
pub fn row_tuples(&self) -> Value {
|
@@ -505,7 +531,7 @@ impl RbDataFrame {
|
|
505
531
|
}),
|
506
532
|
)
|
507
533
|
}))
|
508
|
-
.
|
534
|
+
.as_value()
|
509
535
|
}
|
510
536
|
|
511
537
|
pub fn to_numo(&self) -> Option<Value> {
|
@@ -535,7 +561,7 @@ impl RbDataFrame {
|
|
535
561
|
) -> RbResult<()> {
|
536
562
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
537
563
|
|
538
|
-
if let Ok(s) =
|
564
|
+
if let Ok(s) = String::try_convert(rb_f) {
|
539
565
|
let f = std::fs::File::create(s).unwrap();
|
540
566
|
ParquetWriter::new(f)
|
541
567
|
.with_compression(compression)
|
@@ -602,7 +628,7 @@ impl RbDataFrame {
|
|
602
628
|
|
603
629
|
pub fn sample_n(
|
604
630
|
&self,
|
605
|
-
n:
|
631
|
+
n: &RbSeries,
|
606
632
|
with_replacement: bool,
|
607
633
|
shuffle: bool,
|
608
634
|
seed: Option<u64>,
|
@@ -610,14 +636,14 @@ impl RbDataFrame {
|
|
610
636
|
let df = self
|
611
637
|
.df
|
612
638
|
.borrow()
|
613
|
-
.sample_n(n, with_replacement, shuffle, seed)
|
639
|
+
.sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
|
614
640
|
.map_err(RbPolarsErr::from)?;
|
615
641
|
Ok(df.into())
|
616
642
|
}
|
617
643
|
|
618
644
|
pub fn sample_frac(
|
619
645
|
&self,
|
620
|
-
frac:
|
646
|
+
frac: &RbSeries,
|
621
647
|
with_replacement: bool,
|
622
648
|
shuffle: bool,
|
623
649
|
seed: Option<u64>,
|
@@ -625,7 +651,7 @@ impl RbDataFrame {
|
|
625
651
|
let df = self
|
626
652
|
.df
|
627
653
|
.borrow()
|
628
|
-
.sample_frac(frac, with_replacement, shuffle, seed)
|
654
|
+
.sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
|
629
655
|
.map_err(RbPolarsErr::from)?;
|
630
656
|
Ok(df.into())
|
631
657
|
}
|
@@ -794,22 +820,6 @@ impl RbDataFrame {
|
|
794
820
|
Ok(RbDataFrame::new(df))
|
795
821
|
}
|
796
822
|
|
797
|
-
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
|
798
|
-
let df = self
|
799
|
-
.df
|
800
|
-
.borrow()
|
801
|
-
.sort_with_options(
|
802
|
-
&by_column,
|
803
|
-
SortOptions {
|
804
|
-
descending: reverse,
|
805
|
-
nulls_last,
|
806
|
-
multithreaded: true,
|
807
|
-
},
|
808
|
-
)
|
809
|
-
.map_err(RbPolarsErr::from)?;
|
810
|
-
Ok(RbDataFrame::new(df))
|
811
|
-
}
|
812
|
-
|
813
823
|
pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
|
814
824
|
self.df
|
815
825
|
.borrow_mut()
|
@@ -933,11 +943,16 @@ impl RbDataFrame {
|
|
933
943
|
Ok(RbDataFrame::new(df))
|
934
944
|
}
|
935
945
|
|
936
|
-
pub fn partition_by(
|
937
|
-
|
938
|
-
|
946
|
+
pub fn partition_by(
|
947
|
+
&self,
|
948
|
+
by: Vec<String>,
|
949
|
+
maintain_order: bool,
|
950
|
+
include_key: bool,
|
951
|
+
) -> RbResult<RArray> {
|
952
|
+
let out = if maintain_order {
|
953
|
+
self.df.borrow().partition_by_stable(by, include_key)
|
939
954
|
} else {
|
940
|
-
self.df.borrow().partition_by(
|
955
|
+
self.df.borrow().partition_by(by, include_key)
|
941
956
|
}
|
942
957
|
.map_err(RbPolarsErr::from)?;
|
943
958
|
Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new)))
|
@@ -979,30 +994,48 @@ impl RbDataFrame {
|
|
979
994
|
self.df.borrow().median().into()
|
980
995
|
}
|
981
996
|
|
982
|
-
pub fn
|
997
|
+
pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
983
998
|
let s = self
|
984
999
|
.df
|
985
1000
|
.borrow()
|
986
|
-
.
|
1001
|
+
.max_horizontal()
|
987
1002
|
.map_err(RbPolarsErr::from)?;
|
988
1003
|
Ok(s.map(|s| s.into()))
|
989
1004
|
}
|
990
1005
|
|
991
|
-
pub fn
|
992
|
-
let s = self
|
1006
|
+
pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
|
1007
|
+
let s = self
|
1008
|
+
.df
|
1009
|
+
.borrow()
|
1010
|
+
.min_horizontal()
|
1011
|
+
.map_err(RbPolarsErr::from)?;
|
993
1012
|
Ok(s.map(|s| s.into()))
|
994
1013
|
}
|
995
1014
|
|
996
|
-
pub fn
|
997
|
-
let
|
1015
|
+
pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1016
|
+
let null_strategy = if ignore_nulls {
|
1017
|
+
NullStrategy::Ignore
|
1018
|
+
} else {
|
1019
|
+
NullStrategy::Propagate
|
1020
|
+
};
|
1021
|
+
let s = self
|
1022
|
+
.df
|
1023
|
+
.borrow()
|
1024
|
+
.sum_horizontal(null_strategy)
|
1025
|
+
.map_err(RbPolarsErr::from)?;
|
998
1026
|
Ok(s.map(|s| s.into()))
|
999
1027
|
}
|
1000
1028
|
|
1001
|
-
pub fn
|
1029
|
+
pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
|
1030
|
+
let null_strategy = if ignore_nulls {
|
1031
|
+
NullStrategy::Ignore
|
1032
|
+
} else {
|
1033
|
+
NullStrategy::Propagate
|
1034
|
+
};
|
1002
1035
|
let s = self
|
1003
1036
|
.df
|
1004
1037
|
.borrow()
|
1005
|
-
.
|
1038
|
+
.mean_horizontal(null_strategy)
|
1006
1039
|
.map_err(RbPolarsErr::from)?;
|
1007
1040
|
Ok(s.map(|s| s.into()))
|
1008
1041
|
}
|
@@ -1024,13 +1057,18 @@ impl RbDataFrame {
|
|
1024
1057
|
&self,
|
1025
1058
|
columns: Option<Vec<String>>,
|
1026
1059
|
separator: Option<String>,
|
1060
|
+
drop_first: bool,
|
1027
1061
|
) -> RbResult<Self> {
|
1028
1062
|
let df = match columns {
|
1029
1063
|
Some(cols) => self.df.borrow().columns_to_dummies(
|
1030
1064
|
cols.iter().map(|x| x as &str).collect(),
|
1031
1065
|
separator.as_deref(),
|
1066
|
+
drop_first,
|
1032
1067
|
),
|
1033
|
-
None => self
|
1068
|
+
None => self
|
1069
|
+
.df
|
1070
|
+
.borrow()
|
1071
|
+
.to_dummies(separator.as_deref(), drop_first),
|
1034
1072
|
}
|
1035
1073
|
.map_err(RbPolarsErr::from)?;
|
1036
1074
|
Ok(df.into())
|
@@ -1092,7 +1130,7 @@ impl RbDataFrame {
|
|
1092
1130
|
_ => return apply_lambda_unknown(df, lambda, inference_size),
|
1093
1131
|
};
|
1094
1132
|
|
1095
|
-
Ok((RbSeries::from(out).
|
1133
|
+
Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
|
1096
1134
|
}
|
1097
1135
|
|
1098
1136
|
pub fn shrink_to_fit(&self) {
|
@@ -1109,17 +1147,20 @@ impl RbDataFrame {
|
|
1109
1147
|
Ok(hash.into_series().into())
|
1110
1148
|
}
|
1111
1149
|
|
1112
|
-
pub fn transpose(&self,
|
1113
|
-
let
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1150
|
+
pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
|
1151
|
+
let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
|
1152
|
+
Some(Either::Right(name))
|
1153
|
+
} else if let Ok(name) = String::try_convert(column_names) {
|
1154
|
+
Some(Either::Left(name))
|
1155
|
+
} else {
|
1156
|
+
None
|
1157
|
+
};
|
1158
|
+
Ok(self
|
1159
|
+
.df
|
1160
|
+
.borrow()
|
1161
|
+
.transpose(keep_names_as.as_deref(), new_col_names)
|
1162
|
+
.map_err(RbPolarsErr::from)?
|
1163
|
+
.into())
|
1123
1164
|
}
|
1124
1165
|
|
1125
1166
|
pub fn upsample(
|
data/ext/polars/src/error.rs
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
use magnus::exception;
|
2
2
|
use magnus::Error;
|
3
|
-
use polars::error::ArrowError;
|
4
3
|
use polars::prelude::PolarsError;
|
5
4
|
|
6
5
|
pub struct RbPolarsErr {}
|
@@ -11,10 +10,6 @@ impl RbPolarsErr {
|
|
11
10
|
Error::new(exception::runtime_error(), e.to_string())
|
12
11
|
}
|
13
12
|
|
14
|
-
pub fn arrow(e: ArrowError) -> Error {
|
15
|
-
Error::new(exception::runtime_error(), e.to_string())
|
16
|
-
}
|
17
|
-
|
18
13
|
pub fn io(e: std::io::Error) -> Error {
|
19
14
|
Error::new(exception::runtime_error(), e.to_string())
|
20
15
|
}
|
@@ -24,6 +19,14 @@ impl RbPolarsErr {
|
|
24
19
|
}
|
25
20
|
}
|
26
21
|
|
22
|
+
pub struct RbTypeError {}
|
23
|
+
|
24
|
+
impl RbTypeError {
|
25
|
+
pub fn new_err(message: String) -> Error {
|
26
|
+
Error::new(exception::type_error(), message)
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
27
30
|
pub struct RbValueError {}
|
28
31
|
|
29
32
|
impl RbValueError {
|
@@ -0,0 +1,15 @@
|
|
1
|
+
use crate::RbExpr;
|
2
|
+
|
3
|
+
impl RbExpr {
|
4
|
+
pub fn array_max(&self) -> Self {
|
5
|
+
self.inner.clone().arr().max().into()
|
6
|
+
}
|
7
|
+
|
8
|
+
pub fn array_min(&self) -> Self {
|
9
|
+
self.inner.clone().arr().min().into()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn array_sum(&self) -> Self {
|
13
|
+
self.inner.clone().arr().sum().into()
|
14
|
+
}
|
15
|
+
}
|
@@ -3,16 +3,28 @@ use polars::prelude::*;
|
|
3
3
|
use crate::RbExpr;
|
4
4
|
|
5
5
|
impl RbExpr {
|
6
|
-
pub fn bin_contains(&self, lit:
|
7
|
-
self.inner
|
6
|
+
pub fn bin_contains(&self, lit: &RbExpr) -> Self {
|
7
|
+
self.inner
|
8
|
+
.clone()
|
9
|
+
.binary()
|
10
|
+
.contains_literal(lit.inner.clone())
|
11
|
+
.into()
|
8
12
|
}
|
9
13
|
|
10
|
-
pub fn bin_ends_with(&self, sub:
|
11
|
-
self.inner
|
14
|
+
pub fn bin_ends_with(&self, sub: &RbExpr) -> Self {
|
15
|
+
self.inner
|
16
|
+
.clone()
|
17
|
+
.binary()
|
18
|
+
.ends_with(sub.inner.clone())
|
19
|
+
.into()
|
12
20
|
}
|
13
21
|
|
14
|
-
pub fn bin_starts_with(&self, sub:
|
15
|
-
self.inner
|
22
|
+
pub fn bin_starts_with(&self, sub: &RbExpr) -> Self {
|
23
|
+
self.inner
|
24
|
+
.clone()
|
25
|
+
.binary()
|
26
|
+
.starts_with(sub.inner.clone())
|
27
|
+
.into()
|
16
28
|
}
|
17
29
|
|
18
30
|
pub fn bin_hex_decode(&self, strict: bool) -> Self {
|
@@ -8,9 +8,8 @@ impl RbExpr {
|
|
8
8
|
self.inner.clone().dt().to_string(&format).into()
|
9
9
|
}
|
10
10
|
|
11
|
-
pub fn dt_offset_by(&self, by:
|
12
|
-
|
13
|
-
self.inner.clone().dt().offset_by(by).into()
|
11
|
+
pub fn dt_offset_by(&self, by: &RbExpr) -> Self {
|
12
|
+
self.inner.clone().dt().offset_by(by.inner.clone()).into()
|
14
13
|
}
|
15
14
|
|
16
15
|
pub fn dt_epoch_seconds(&self) -> Self {
|
@@ -38,21 +37,20 @@ impl RbExpr {
|
|
38
37
|
self.inner.clone().dt().cast_time_unit(tu.0).into()
|
39
38
|
}
|
40
39
|
|
41
|
-
pub fn dt_replace_time_zone(&self,
|
40
|
+
pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
|
42
41
|
self.inner
|
43
42
|
.clone()
|
44
43
|
.dt()
|
45
|
-
.replace_time_zone(
|
44
|
+
.replace_time_zone(time_zone, ambiguous.inner.clone())
|
46
45
|
.into()
|
47
46
|
}
|
48
47
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
self.inner.clone().dt().truncate(&every, &offset).into()
|
48
|
+
pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
|
49
|
+
self.inner
|
50
|
+
.clone()
|
51
|
+
.dt()
|
52
|
+
.truncate(every.inner.clone(), offset)
|
53
|
+
.into()
|
56
54
|
}
|
57
55
|
|
58
56
|
pub fn dt_month_start(&self) -> Self {
|