polars-df 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +4 -0
- data/Cargo.lock +2 -1
- data/README.md +1 -1
- data/ext/polars/Cargo.toml +7 -1
- data/ext/polars/src/conversion.rs +35 -2
- data/ext/polars/src/dataframe.rs +228 -11
- data/ext/polars/src/lazy/dataframe.rs +3 -3
- data/ext/polars/src/lazy/dsl.rs +59 -2
- data/ext/polars/src/lib.rs +151 -10
- data/ext/polars/src/series.rs +182 -29
- data/ext/polars/src/set.rs +91 -0
- data/ext/polars/src/utils.rs +19 -0
- data/lib/polars/batched_csv_reader.rb +1 -0
- data/lib/polars/cat_expr.rb +39 -0
- data/lib/polars/data_frame.rb +2284 -137
- data/lib/polars/date_time_expr.rb +1282 -7
- data/lib/polars/exceptions.rb +20 -0
- data/lib/polars/expr.rb +612 -7
- data/lib/polars/expr_dispatch.rb +14 -0
- data/lib/polars/functions.rb +219 -0
- data/lib/polars/group_by.rb +517 -0
- data/lib/polars/io.rb +421 -2
- data/lib/polars/lazy_frame.rb +1261 -67
- data/lib/polars/lazy_functions.rb +288 -10
- data/lib/polars/lazy_group_by.rb +79 -0
- data/lib/polars/list_expr.rb +5 -0
- data/lib/polars/meta_expr.rb +21 -0
- data/lib/polars/series.rb +1476 -212
- data/lib/polars/slice.rb +104 -0
- data/lib/polars/string_expr.rb +663 -2
- data/lib/polars/struct_expr.rb +73 -0
- data/lib/polars/utils.rb +43 -3
- data/lib/polars/version.rb +2 -1
- data/lib/polars/when.rb +1 -0
- data/lib/polars/when_then.rb +1 -0
- data/lib/polars.rb +7 -10
- metadata +9 -2
data/ext/polars/src/lib.rs
CHANGED
@@ -5,6 +5,8 @@ mod error;
|
|
5
5
|
mod file;
|
6
6
|
mod lazy;
|
7
7
|
mod series;
|
8
|
+
mod set;
|
9
|
+
mod utils;
|
8
10
|
|
9
11
|
use batched_csv::RbBatchedCsv;
|
10
12
|
use conversion::*;
|
@@ -13,14 +15,16 @@ use error::{RbPolarsErr, RbValueError};
|
|
13
15
|
use file::get_file_like;
|
14
16
|
use lazy::dataframe::{RbLazyFrame, RbLazyGroupBy};
|
15
17
|
use lazy::dsl::{RbExpr, RbWhen, RbWhenThen};
|
18
|
+
use lazy::utils::rb_exprs_to_exprs;
|
16
19
|
use magnus::{
|
17
20
|
define_module, function, memoize, method, prelude::*, Error, RArray, RClass, RHash, RModule,
|
18
21
|
Value,
|
19
22
|
};
|
20
|
-
use polars::datatypes::DataType;
|
23
|
+
use polars::datatypes::{DataType, TimeUnit};
|
21
24
|
use polars::error::PolarsResult;
|
22
25
|
use polars::frame::DataFrame;
|
23
26
|
use polars::functions::{diag_concat_df, hor_concat_df};
|
27
|
+
use polars::prelude::{ClosedWindow, Duration, IntoSeries, TimeZone};
|
24
28
|
use series::RbSeries;
|
25
29
|
|
26
30
|
type RbResult<T> = Result<T, Error>;
|
@@ -42,6 +46,9 @@ fn init() -> RbResult<()> {
|
|
42
46
|
module.define_singleton_method("_concat_series", function!(concat_series, 1))?;
|
43
47
|
module.define_singleton_method("_ipc_schema", function!(ipc_schema, 1))?;
|
44
48
|
module.define_singleton_method("_parquet_schema", function!(parquet_schema, 1))?;
|
49
|
+
module.define_singleton_method("_rb_date_range", function!(rb_date_range, 7))?;
|
50
|
+
module.define_singleton_method("_as_struct", function!(as_struct, 1))?;
|
51
|
+
module.define_singleton_method("_arg_where", function!(arg_where, 1))?;
|
45
52
|
|
46
53
|
let class = module.define_class("RbBatchedCsv", Default::default())?;
|
47
54
|
class.define_singleton_method("new", function!(RbBatchedCsv::new, -1))?;
|
@@ -50,7 +57,7 @@ fn init() -> RbResult<()> {
|
|
50
57
|
let class = module.define_class("RbDataFrame", Default::default())?;
|
51
58
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
52
59
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
53
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet,
|
60
|
+
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
|
54
61
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
55
62
|
class.define_singleton_method("read_hash", function!(RbDataFrame::read_hash, 1))?;
|
56
63
|
class.define_singleton_method("read_json", function!(RbDataFrame::read_json, 1))?;
|
@@ -60,7 +67,21 @@ fn init() -> RbResult<()> {
|
|
60
67
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
61
68
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
62
69
|
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 2))?;
|
70
|
+
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
71
|
+
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
63
72
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 5))?;
|
73
|
+
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
74
|
+
class.define_method("sub", method!(RbDataFrame::sub, 1))?;
|
75
|
+
class.define_method("div", method!(RbDataFrame::div, 1))?;
|
76
|
+
class.define_method("mul", method!(RbDataFrame::mul, 1))?;
|
77
|
+
class.define_method("rem", method!(RbDataFrame::rem, 1))?;
|
78
|
+
class.define_method("add_df", method!(RbDataFrame::add_df, 1))?;
|
79
|
+
class.define_method("sub_df", method!(RbDataFrame::sub_df, 1))?;
|
80
|
+
class.define_method("div_df", method!(RbDataFrame::div_df, 1))?;
|
81
|
+
class.define_method("mul_df", method!(RbDataFrame::mul_df, 1))?;
|
82
|
+
class.define_method("rem_df", method!(RbDataFrame::rem_df, 1))?;
|
83
|
+
class.define_method("sample_n", method!(RbDataFrame::sample_n, 4))?;
|
84
|
+
class.define_method("sample_frac", method!(RbDataFrame::sample_frac, 4))?;
|
64
85
|
class.define_method("rechunk", method!(RbDataFrame::rechunk, 0))?;
|
65
86
|
class.define_method("to_s", method!(RbDataFrame::to_s, 0))?;
|
66
87
|
class.define_method("get_columns", method!(RbDataFrame::get_columns, 0))?;
|
@@ -74,7 +95,19 @@ fn init() -> RbResult<()> {
|
|
74
95
|
class.define_method("shape", method!(RbDataFrame::shape, 0))?;
|
75
96
|
class.define_method("height", method!(RbDataFrame::height, 0))?;
|
76
97
|
class.define_method("width", method!(RbDataFrame::width, 0))?;
|
98
|
+
class.define_method("hstack_mut", method!(RbDataFrame::hstack_mut, 1))?;
|
99
|
+
class.define_method("hstack", method!(RbDataFrame::hstack, 1))?;
|
100
|
+
class.define_method("extend", method!(RbDataFrame::extend, 1))?;
|
101
|
+
class.define_method("vstack_mut", method!(RbDataFrame::vstack_mut, 1))?;
|
102
|
+
class.define_method("vstack", method!(RbDataFrame::vstack, 1))?;
|
103
|
+
class.define_method("drop_in_place", method!(RbDataFrame::drop_in_place, 1))?;
|
104
|
+
class.define_method("drop_nulls", method!(RbDataFrame::drop_nulls, 1))?;
|
105
|
+
class.define_method("drop", method!(RbDataFrame::drop, 1))?;
|
77
106
|
class.define_method("select_at_idx", method!(RbDataFrame::select_at_idx, 1))?;
|
107
|
+
class.define_method(
|
108
|
+
"find_idx_by_name",
|
109
|
+
method!(RbDataFrame::find_idx_by_name, 1),
|
110
|
+
)?;
|
78
111
|
class.define_method("column", method!(RbDataFrame::column, 1))?;
|
79
112
|
class.define_method("select", method!(RbDataFrame::select, 1))?;
|
80
113
|
class.define_method("take", method!(RbDataFrame::take, 1))?;
|
@@ -116,6 +149,7 @@ fn init() -> RbResult<()> {
|
|
116
149
|
class.define_method("shrink_to_fit", method!(RbDataFrame::shrink_to_fit, 0))?;
|
117
150
|
class.define_method("transpose", method!(RbDataFrame::transpose, 2))?;
|
118
151
|
class.define_method("upsample", method!(RbDataFrame::upsample, 5))?;
|
152
|
+
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
119
153
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
120
154
|
|
121
155
|
let class = module.define_class("RbExpr", Default::default())?;
|
@@ -304,6 +338,7 @@ fn init() -> RbResult<()> {
|
|
304
338
|
class.define_method("dt_tz_localize", method!(RbExpr::dt_tz_localize, 1))?;
|
305
339
|
class.define_method("dt_truncate", method!(RbExpr::dt_truncate, 2))?;
|
306
340
|
class.define_method("dt_round", method!(RbExpr::dt_round, 2))?;
|
341
|
+
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
307
342
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
308
343
|
class.define_method("keep_name", method!(RbExpr::keep_name, 0))?;
|
309
344
|
class.define_method("prefix", method!(RbExpr::prefix, 1))?;
|
@@ -353,6 +388,7 @@ fn init() -> RbResult<()> {
|
|
353
388
|
class.define_method("ewm_mean", method!(RbExpr::ewm_mean, 3))?;
|
354
389
|
class.define_method("ewm_std", method!(RbExpr::ewm_std, 4))?;
|
355
390
|
class.define_method("ewm_var", method!(RbExpr::ewm_var, 4))?;
|
391
|
+
class.define_method("extend_constant", method!(RbExpr::extend_constant, 2))?;
|
356
392
|
class.define_method("any", method!(RbExpr::any, 0))?;
|
357
393
|
class.define_method("all", method!(RbExpr::all, 0))?;
|
358
394
|
class.define_method(
|
@@ -369,6 +405,7 @@ fn init() -> RbResult<()> {
|
|
369
405
|
)?;
|
370
406
|
class.define_method("log", method!(RbExpr::log, 1))?;
|
371
407
|
class.define_method("exp", method!(RbExpr::exp, 0))?;
|
408
|
+
class.define_method("entropy", method!(RbExpr::entropy, 2))?;
|
372
409
|
|
373
410
|
// meta
|
374
411
|
class.define_method("meta_pop", method!(RbExpr::meta_pop, 0))?;
|
@@ -386,6 +423,7 @@ fn init() -> RbResult<()> {
|
|
386
423
|
class.define_singleton_method("fold", function!(crate::lazy::dsl::fold, 3))?;
|
387
424
|
class.define_singleton_method("lit", function!(crate::lazy::dsl::lit, 1))?;
|
388
425
|
class.define_singleton_method("arange", function!(crate::lazy::dsl::arange, 3))?;
|
426
|
+
class.define_singleton_method("repeat", function!(crate::lazy::dsl::repeat, 2))?;
|
389
427
|
class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
|
390
428
|
class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
|
391
429
|
class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
|
@@ -469,6 +507,7 @@ fn init() -> RbResult<()> {
|
|
469
507
|
class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
|
470
508
|
class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
|
471
509
|
class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
|
510
|
+
class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
|
472
511
|
class.define_method("is_sorted_flag", method!(RbSeries::is_sorted_flag, 0))?;
|
473
512
|
class.define_method(
|
474
513
|
"is_sorted_reverse_flag",
|
@@ -536,14 +575,85 @@ fn init() -> RbResult<()> {
|
|
536
575
|
class.define_method("kurtosis", method!(RbSeries::kurtosis, 2))?;
|
537
576
|
class.define_method("cast", method!(RbSeries::cast, 2))?;
|
538
577
|
class.define_method("time_unit", method!(RbSeries::time_unit, 0))?;
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
class.define_method("
|
543
|
-
class.define_method("
|
544
|
-
class.define_method("
|
545
|
-
class.define_method("
|
546
|
-
class.define_method("
|
578
|
+
class.define_method("set_at_idx", method!(RbSeries::set_at_idx, 2))?;
|
579
|
+
|
580
|
+
// eq
|
581
|
+
class.define_method("eq_u8", method!(RbSeries::eq_u8, 1))?;
|
582
|
+
class.define_method("eq_u16", method!(RbSeries::eq_u16, 1))?;
|
583
|
+
class.define_method("eq_u32", method!(RbSeries::eq_u32, 1))?;
|
584
|
+
class.define_method("eq_u64", method!(RbSeries::eq_u64, 1))?;
|
585
|
+
class.define_method("eq_i8", method!(RbSeries::eq_i8, 1))?;
|
586
|
+
class.define_method("eq_i16", method!(RbSeries::eq_i16, 1))?;
|
587
|
+
class.define_method("eq_i32", method!(RbSeries::eq_i32, 1))?;
|
588
|
+
class.define_method("eq_i64", method!(RbSeries::eq_i64, 1))?;
|
589
|
+
class.define_method("eq_f32", method!(RbSeries::eq_f32, 1))?;
|
590
|
+
class.define_method("eq_f64", method!(RbSeries::eq_f64, 1))?;
|
591
|
+
// class.define_method("eq_str", method!(RbSeries::eq_str, 1))?;
|
592
|
+
|
593
|
+
// neq
|
594
|
+
class.define_method("neq_u8", method!(RbSeries::neq_u8, 1))?;
|
595
|
+
class.define_method("neq_u16", method!(RbSeries::neq_u16, 1))?;
|
596
|
+
class.define_method("neq_u32", method!(RbSeries::neq_u32, 1))?;
|
597
|
+
class.define_method("neq_u64", method!(RbSeries::neq_u64, 1))?;
|
598
|
+
class.define_method("neq_i8", method!(RbSeries::neq_i8, 1))?;
|
599
|
+
class.define_method("neq_i16", method!(RbSeries::neq_i16, 1))?;
|
600
|
+
class.define_method("neq_i32", method!(RbSeries::neq_i32, 1))?;
|
601
|
+
class.define_method("neq_i64", method!(RbSeries::neq_i64, 1))?;
|
602
|
+
class.define_method("neq_f32", method!(RbSeries::neq_f32, 1))?;
|
603
|
+
class.define_method("neq_f64", method!(RbSeries::neq_f64, 1))?;
|
604
|
+
// class.define_method("neq_str", method!(RbSeries::neq_str, 1))?;
|
605
|
+
|
606
|
+
// gt
|
607
|
+
class.define_method("gt_u8", method!(RbSeries::gt_u8, 1))?;
|
608
|
+
class.define_method("gt_u16", method!(RbSeries::gt_u16, 1))?;
|
609
|
+
class.define_method("gt_u32", method!(RbSeries::gt_u32, 1))?;
|
610
|
+
class.define_method("gt_u64", method!(RbSeries::gt_u64, 1))?;
|
611
|
+
class.define_method("gt_i8", method!(RbSeries::gt_i8, 1))?;
|
612
|
+
class.define_method("gt_i16", method!(RbSeries::gt_i16, 1))?;
|
613
|
+
class.define_method("gt_i32", method!(RbSeries::gt_i32, 1))?;
|
614
|
+
class.define_method("gt_i64", method!(RbSeries::gt_i64, 1))?;
|
615
|
+
class.define_method("gt_f32", method!(RbSeries::gt_f32, 1))?;
|
616
|
+
class.define_method("gt_f64", method!(RbSeries::gt_f64, 1))?;
|
617
|
+
// class.define_method("gt_str", method!(RbSeries::gt_str, 1))?;
|
618
|
+
|
619
|
+
// gt_eq
|
620
|
+
class.define_method("gt_eq_u8", method!(RbSeries::gt_eq_u8, 1))?;
|
621
|
+
class.define_method("gt_eq_u16", method!(RbSeries::gt_eq_u16, 1))?;
|
622
|
+
class.define_method("gt_eq_u32", method!(RbSeries::gt_eq_u32, 1))?;
|
623
|
+
class.define_method("gt_eq_u64", method!(RbSeries::gt_eq_u64, 1))?;
|
624
|
+
class.define_method("gt_eq_i8", method!(RbSeries::gt_eq_i8, 1))?;
|
625
|
+
class.define_method("gt_eq_i16", method!(RbSeries::gt_eq_i16, 1))?;
|
626
|
+
class.define_method("gt_eq_i32", method!(RbSeries::gt_eq_i32, 1))?;
|
627
|
+
class.define_method("gt_eq_i64", method!(RbSeries::gt_eq_i64, 1))?;
|
628
|
+
class.define_method("gt_eq_f32", method!(RbSeries::gt_eq_f32, 1))?;
|
629
|
+
class.define_method("gt_eq_f64", method!(RbSeries::gt_eq_f64, 1))?;
|
630
|
+
// class.define_method("gt_eq_str", method!(RbSeries::gt_eq_str, 1))?;
|
631
|
+
|
632
|
+
// lt
|
633
|
+
class.define_method("lt_u8", method!(RbSeries::lt_u8, 1))?;
|
634
|
+
class.define_method("lt_u16", method!(RbSeries::lt_u16, 1))?;
|
635
|
+
class.define_method("lt_u32", method!(RbSeries::lt_u32, 1))?;
|
636
|
+
class.define_method("lt_u64", method!(RbSeries::lt_u64, 1))?;
|
637
|
+
class.define_method("lt_i8", method!(RbSeries::lt_i8, 1))?;
|
638
|
+
class.define_method("lt_i16", method!(RbSeries::lt_i16, 1))?;
|
639
|
+
class.define_method("lt_i32", method!(RbSeries::lt_i32, 1))?;
|
640
|
+
class.define_method("lt_i64", method!(RbSeries::lt_i64, 1))?;
|
641
|
+
class.define_method("lt_f32", method!(RbSeries::lt_f32, 1))?;
|
642
|
+
class.define_method("lt_f64", method!(RbSeries::lt_f64, 1))?;
|
643
|
+
// class.define_method("lt_str", method!(RbSeries::lt_str, 1))?;
|
644
|
+
|
645
|
+
// lt_eq
|
646
|
+
class.define_method("lt_eq_u8", method!(RbSeries::lt_eq_u8, 1))?;
|
647
|
+
class.define_method("lt_eq_u16", method!(RbSeries::lt_eq_u16, 1))?;
|
648
|
+
class.define_method("lt_eq_u32", method!(RbSeries::lt_eq_u32, 1))?;
|
649
|
+
class.define_method("lt_eq_u64", method!(RbSeries::lt_eq_u64, 1))?;
|
650
|
+
class.define_method("lt_eq_i8", method!(RbSeries::lt_eq_i8, 1))?;
|
651
|
+
class.define_method("lt_eq_i16", method!(RbSeries::lt_eq_i16, 1))?;
|
652
|
+
class.define_method("lt_eq_i32", method!(RbSeries::lt_eq_i32, 1))?;
|
653
|
+
class.define_method("lt_eq_i64", method!(RbSeries::lt_eq_i64, 1))?;
|
654
|
+
class.define_method("lt_eq_f32", method!(RbSeries::lt_eq_f32, 1))?;
|
655
|
+
class.define_method("lt_eq_f64", method!(RbSeries::lt_eq_f64, 1))?;
|
656
|
+
// class.define_method("lt_eq_str", method!(RbSeries::lt_eq_str, 1))?;
|
547
657
|
|
548
658
|
let class = module.define_class("RbWhen", Default::default())?;
|
549
659
|
class.define_method("_then", method!(RbWhen::then, 1))?;
|
@@ -641,3 +751,34 @@ fn parquet_schema(rb_f: Value) -> RbResult<Value> {
|
|
641
751
|
}
|
642
752
|
Ok(dict.into())
|
643
753
|
}
|
754
|
+
|
755
|
+
fn rb_date_range(
|
756
|
+
start: i64,
|
757
|
+
stop: i64,
|
758
|
+
every: String,
|
759
|
+
closed: Wrap<ClosedWindow>,
|
760
|
+
name: String,
|
761
|
+
tu: Wrap<TimeUnit>,
|
762
|
+
tz: Option<TimeZone>,
|
763
|
+
) -> RbSeries {
|
764
|
+
polars::time::date_range_impl(
|
765
|
+
&name,
|
766
|
+
start,
|
767
|
+
stop,
|
768
|
+
Duration::parse(&every),
|
769
|
+
closed.0,
|
770
|
+
tu.0,
|
771
|
+
tz,
|
772
|
+
)
|
773
|
+
.into_series()
|
774
|
+
.into()
|
775
|
+
}
|
776
|
+
|
777
|
+
fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
|
778
|
+
let exprs = rb_exprs_to_exprs(exprs)?;
|
779
|
+
Ok(polars::lazy::dsl::as_struct(&exprs).into())
|
780
|
+
}
|
781
|
+
|
782
|
+
fn arg_where(condition: &RbExpr) -> RbExpr {
|
783
|
+
polars::lazy::dsl::arg_where(condition.inner.clone()).into()
|
784
|
+
}
|
data/ext/polars/src/series.rs
CHANGED
@@ -5,6 +5,7 @@ use polars::series::IsSorted;
|
|
5
5
|
use std::cell::RefCell;
|
6
6
|
|
7
7
|
use crate::conversion::*;
|
8
|
+
use crate::set::set_at_idx;
|
8
9
|
use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
|
9
10
|
|
10
11
|
#[magnus::wrap(class = "Polars::RbSeries")]
|
@@ -474,6 +475,12 @@ impl RbSeries {
|
|
474
475
|
s.into_iter().collect()
|
475
476
|
} else if let Ok(s) = series.utf8() {
|
476
477
|
s.into_iter().collect()
|
478
|
+
} else if let Ok(_s) = series.date() {
|
479
|
+
let a = RArray::with_capacity(series.len());
|
480
|
+
for v in series.iter() {
|
481
|
+
a.push::<Value>(Wrap(v).into()).unwrap();
|
482
|
+
}
|
483
|
+
a
|
477
484
|
} else {
|
478
485
|
unimplemented!();
|
479
486
|
}
|
@@ -594,44 +601,190 @@ impl RbSeries {
|
|
594
601
|
}
|
595
602
|
}
|
596
603
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
604
|
+
pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> {
|
605
|
+
let mut s = self.series.borrow_mut();
|
606
|
+
match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) {
|
607
|
+
Ok(out) => {
|
608
|
+
*s = out;
|
609
|
+
Ok(())
|
610
|
+
}
|
611
|
+
Err(e) => Err(RbPolarsErr::from(e)),
|
612
|
+
}
|
601
613
|
}
|
614
|
+
}
|
602
615
|
|
603
|
-
|
604
|
-
|
605
|
-
|
616
|
+
macro_rules! impl_eq_num {
|
617
|
+
($name:ident, $type:ty) => {
|
618
|
+
impl RbSeries {
|
619
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
620
|
+
let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?;
|
621
|
+
Ok(RbSeries::new(s.into_series()))
|
622
|
+
}
|
623
|
+
}
|
624
|
+
};
|
625
|
+
}
|
606
626
|
|
607
|
-
|
608
|
-
|
609
|
-
|
627
|
+
impl_eq_num!(eq_u8, u8);
|
628
|
+
impl_eq_num!(eq_u16, u16);
|
629
|
+
impl_eq_num!(eq_u32, u32);
|
630
|
+
impl_eq_num!(eq_u64, u64);
|
631
|
+
impl_eq_num!(eq_i8, i8);
|
632
|
+
impl_eq_num!(eq_i16, i16);
|
633
|
+
impl_eq_num!(eq_i32, i32);
|
634
|
+
impl_eq_num!(eq_i64, i64);
|
635
|
+
impl_eq_num!(eq_f32, f32);
|
636
|
+
impl_eq_num!(eq_f64, f64);
|
637
|
+
// impl_eq_num!(eq_str, &str);
|
638
|
+
|
639
|
+
macro_rules! impl_neq_num {
|
640
|
+
($name:ident, $type:ty) => {
|
641
|
+
impl RbSeries {
|
642
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
643
|
+
let s = self
|
644
|
+
.series
|
645
|
+
.borrow()
|
646
|
+
.not_equal(rhs)
|
647
|
+
.map_err(RbPolarsErr::from)?;
|
648
|
+
Ok(RbSeries::new(s.into_series()))
|
649
|
+
}
|
650
|
+
}
|
651
|
+
};
|
652
|
+
}
|
610
653
|
|
611
|
-
|
612
|
-
|
613
|
-
|
654
|
+
impl_neq_num!(neq_u8, u8);
|
655
|
+
impl_neq_num!(neq_u16, u16);
|
656
|
+
impl_neq_num!(neq_u32, u32);
|
657
|
+
impl_neq_num!(neq_u64, u64);
|
658
|
+
impl_neq_num!(neq_i8, i8);
|
659
|
+
impl_neq_num!(neq_i16, i16);
|
660
|
+
impl_neq_num!(neq_i32, i32);
|
661
|
+
impl_neq_num!(neq_i64, i64);
|
662
|
+
impl_neq_num!(neq_f32, f32);
|
663
|
+
impl_neq_num!(neq_f64, f64);
|
664
|
+
// impl_neq_num!(neq_str, &str);
|
665
|
+
|
666
|
+
macro_rules! impl_gt_num {
|
667
|
+
($name:ident, $type:ty) => {
|
668
|
+
impl RbSeries {
|
669
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
670
|
+
let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?;
|
671
|
+
Ok(RbSeries::new(s.into_series()))
|
672
|
+
}
|
673
|
+
}
|
674
|
+
};
|
675
|
+
}
|
614
676
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
677
|
+
impl_gt_num!(gt_u8, u8);
|
678
|
+
impl_gt_num!(gt_u16, u16);
|
679
|
+
impl_gt_num!(gt_u32, u32);
|
680
|
+
impl_gt_num!(gt_u64, u64);
|
681
|
+
impl_gt_num!(gt_i8, i8);
|
682
|
+
impl_gt_num!(gt_i16, i16);
|
683
|
+
impl_gt_num!(gt_i32, i32);
|
684
|
+
impl_gt_num!(gt_i64, i64);
|
685
|
+
impl_gt_num!(gt_f32, f32);
|
686
|
+
impl_gt_num!(gt_f64, f64);
|
687
|
+
// impl_gt_num!(gt_str, &str);
|
688
|
+
|
689
|
+
macro_rules! impl_gt_eq_num {
|
690
|
+
($name:ident, $type:ty) => {
|
691
|
+
impl RbSeries {
|
692
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
693
|
+
let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?;
|
694
|
+
Ok(RbSeries::new(s.into_series()))
|
695
|
+
}
|
696
|
+
}
|
697
|
+
};
|
698
|
+
}
|
619
699
|
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
700
|
+
impl_gt_eq_num!(gt_eq_u8, u8);
|
701
|
+
impl_gt_eq_num!(gt_eq_u16, u16);
|
702
|
+
impl_gt_eq_num!(gt_eq_u32, u32);
|
703
|
+
impl_gt_eq_num!(gt_eq_u64, u64);
|
704
|
+
impl_gt_eq_num!(gt_eq_i8, i8);
|
705
|
+
impl_gt_eq_num!(gt_eq_i16, i16);
|
706
|
+
impl_gt_eq_num!(gt_eq_i32, i32);
|
707
|
+
impl_gt_eq_num!(gt_eq_i64, i64);
|
708
|
+
impl_gt_eq_num!(gt_eq_f32, f32);
|
709
|
+
impl_gt_eq_num!(gt_eq_f64, f64);
|
710
|
+
// impl_gt_eq_num!(gt_eq_str, &str);
|
711
|
+
|
712
|
+
macro_rules! impl_lt_num {
|
713
|
+
($name:ident, $type:ty) => {
|
714
|
+
impl RbSeries {
|
715
|
+
pub fn $name(&self, rhs: $type) -> RbResult<RbSeries> {
|
716
|
+
let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?;
|
717
|
+
Ok(RbSeries::new(s.into_series()))
|
718
|
+
}
|
719
|
+
}
|
720
|
+
};
|
721
|
+
}
|
624
722
|
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
723
|
+
impl_lt_num!(lt_u8, u8);
|
724
|
+
impl_lt_num!(lt_u16, u16);
|
725
|
+
impl_lt_num!(lt_u32, u32);
|
726
|
+
impl_lt_num!(lt_u64, u64);
|
727
|
+
impl_lt_num!(lt_i8, i8);
|
728
|
+
impl_lt_num!(lt_i16, i16);
|
729
|
+
impl_lt_num!(lt_i32, i32);
|
730
|
+
impl_lt_num!(lt_i64, i64);
|
731
|
+
impl_lt_num!(lt_f32, f32);
|
732
|
+
impl_lt_num!(lt_f64, f64);
|
733
|
+
// impl_lt_num!(lt_str, &str);
|
734
|
+
|
735
|
+
macro_rules! impl_lt_eq_num {
|
736
|
+
($name:ident, $type:ty) => {
|
737
|
+
impl RbSeries {
|
738
|
+
pub fn $name(&self, rhs: $type) -> RbResult<Self> {
|
739
|
+
let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?;
|
740
|
+
Ok(RbSeries::new(s.into_series()))
|
741
|
+
}
|
742
|
+
}
|
743
|
+
};
|
744
|
+
}
|
745
|
+
|
746
|
+
impl_lt_eq_num!(lt_eq_u8, u8);
|
747
|
+
impl_lt_eq_num!(lt_eq_u16, u16);
|
748
|
+
impl_lt_eq_num!(lt_eq_u32, u32);
|
749
|
+
impl_lt_eq_num!(lt_eq_u64, u64);
|
750
|
+
impl_lt_eq_num!(lt_eq_i8, i8);
|
751
|
+
impl_lt_eq_num!(lt_eq_i16, i16);
|
752
|
+
impl_lt_eq_num!(lt_eq_i32, i32);
|
753
|
+
impl_lt_eq_num!(lt_eq_i64, i64);
|
754
|
+
impl_lt_eq_num!(lt_eq_f32, f32);
|
755
|
+
impl_lt_eq_num!(lt_eq_f64, f64);
|
756
|
+
// impl_lt_eq_num!(lt_eq_str, &str);
|
757
|
+
|
758
|
+
pub fn to_series_collection(rs: RArray) -> RbResult<Vec<Series>> {
|
759
|
+
let mut series = Vec::new();
|
760
|
+
for item in rs.each() {
|
761
|
+
series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone());
|
762
|
+
}
|
763
|
+
Ok(series)
|
633
764
|
}
|
634
765
|
|
635
766
|
pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
|
636
767
|
s.into_iter().map(RbSeries::new).collect()
|
637
768
|
}
|
769
|
+
|
770
|
+
impl RbSeries {
|
771
|
+
pub fn new_opt_date(name: String, values: RArray, _strict: Option<bool>) -> RbResult<Self> {
|
772
|
+
let len = values.len();
|
773
|
+
let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(&name, len);
|
774
|
+
for item in values.each() {
|
775
|
+
let v = item?;
|
776
|
+
if v.is_nil() {
|
777
|
+
builder.append_null();
|
778
|
+
} else {
|
779
|
+
// convert to DateTime for UTC
|
780
|
+
let v: Value = v.funcall("to_datetime", ())?;
|
781
|
+
let v: Value = v.funcall("to_time", ())?;
|
782
|
+
let v: Value = v.funcall("to_i", ())?;
|
783
|
+
// TODO use strict
|
784
|
+
builder.append_value(v.try_convert::<i32>()? / 86400);
|
785
|
+
}
|
786
|
+
}
|
787
|
+
let ca: ChunkedArray<Int32Type> = builder.finish();
|
788
|
+
Ok(ca.into_date().into_series().into())
|
789
|
+
}
|
790
|
+
}
|
@@ -0,0 +1,91 @@
|
|
1
|
+
// use polars::export::arrow2::array::Array;
|
2
|
+
use polars::prelude::*;
|
3
|
+
|
4
|
+
pub fn set_at_idx(mut s: Series, idx: &Series, values: &Series) -> PolarsResult<Series> {
|
5
|
+
let logical_dtype = s.dtype().clone();
|
6
|
+
let idx = idx.cast(&IDX_DTYPE)?;
|
7
|
+
let idx = idx.rechunk();
|
8
|
+
let idx = idx.idx().unwrap();
|
9
|
+
let idx = idx.downcast_iter().next().unwrap();
|
10
|
+
|
11
|
+
// if idx.null_count() > 0 {
|
12
|
+
// return Err(PolarsError::ComputeError(
|
13
|
+
// "index values should not be null".into(),
|
14
|
+
// ));
|
15
|
+
// }
|
16
|
+
|
17
|
+
let idx = idx.values().as_slice();
|
18
|
+
|
19
|
+
let values = values.to_physical_repr().cast(&s.dtype().to_physical())?;
|
20
|
+
|
21
|
+
// do not shadow, otherwise s is not dropped immediately
|
22
|
+
// and we want to have mutable access
|
23
|
+
s = s.to_physical_repr().into_owned();
|
24
|
+
let mutable_s = s._get_inner_mut();
|
25
|
+
|
26
|
+
let s = match logical_dtype.to_physical() {
|
27
|
+
DataType::Int8 => {
|
28
|
+
let ca: &mut ChunkedArray<Int8Type> = mutable_s.as_mut();
|
29
|
+
let values = values.i8()?;
|
30
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
31
|
+
}
|
32
|
+
DataType::Int16 => {
|
33
|
+
let ca: &mut ChunkedArray<Int16Type> = mutable_s.as_mut();
|
34
|
+
let values = values.i16()?;
|
35
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
36
|
+
}
|
37
|
+
DataType::Int32 => {
|
38
|
+
let ca: &mut ChunkedArray<Int32Type> = mutable_s.as_mut();
|
39
|
+
let values = values.i32()?;
|
40
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
41
|
+
}
|
42
|
+
DataType::Int64 => {
|
43
|
+
let ca: &mut ChunkedArray<Int64Type> = mutable_s.as_mut();
|
44
|
+
let values = values.i64()?;
|
45
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
46
|
+
}
|
47
|
+
DataType::UInt8 => {
|
48
|
+
let ca: &mut ChunkedArray<UInt8Type> = mutable_s.as_mut();
|
49
|
+
let values = values.u8()?;
|
50
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
51
|
+
}
|
52
|
+
DataType::UInt16 => {
|
53
|
+
let ca: &mut ChunkedArray<UInt16Type> = mutable_s.as_mut();
|
54
|
+
let values = values.u16()?;
|
55
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
56
|
+
}
|
57
|
+
DataType::UInt32 => {
|
58
|
+
let ca: &mut ChunkedArray<UInt32Type> = mutable_s.as_mut();
|
59
|
+
let values = values.u32()?;
|
60
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
61
|
+
}
|
62
|
+
DataType::UInt64 => {
|
63
|
+
let ca: &mut ChunkedArray<UInt64Type> = mutable_s.as_mut();
|
64
|
+
let values = values.u64()?;
|
65
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
66
|
+
}
|
67
|
+
DataType::Float32 => {
|
68
|
+
let ca: &mut ChunkedArray<Float32Type> = mutable_s.as_mut();
|
69
|
+
let values = values.f32()?;
|
70
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
71
|
+
}
|
72
|
+
DataType::Float64 => {
|
73
|
+
let ca: &mut ChunkedArray<Float64Type> = mutable_s.as_mut();
|
74
|
+
let values = values.f64()?;
|
75
|
+
std::mem::take(ca).set_at_idx2(idx, values.into_iter())
|
76
|
+
}
|
77
|
+
DataType::Boolean => {
|
78
|
+
let ca = s.bool()?;
|
79
|
+
let values = values.bool()?;
|
80
|
+
ca.set_at_idx2(idx, values)
|
81
|
+
}
|
82
|
+
DataType::Utf8 => {
|
83
|
+
let ca = s.utf8()?;
|
84
|
+
let values = values.utf8()?;
|
85
|
+
ca.set_at_idx2(idx, values)
|
86
|
+
}
|
87
|
+
_ => panic!("not yet implemented for dtype: {}", logical_dtype),
|
88
|
+
};
|
89
|
+
|
90
|
+
s.and_then(|s| s.cast(&logical_dtype))
|
91
|
+
}
|
@@ -0,0 +1,19 @@
|
|
1
|
+
use polars::prelude::*;
|
2
|
+
|
3
|
+
pub fn reinterpret(s: &Series, signed: bool) -> polars::prelude::PolarsResult<Series> {
|
4
|
+
match (s.dtype(), signed) {
|
5
|
+
(DataType::UInt64, true) => {
|
6
|
+
let ca = s.u64().unwrap();
|
7
|
+
Ok(ca.reinterpret_signed().into_series())
|
8
|
+
}
|
9
|
+
(DataType::UInt64, false) => Ok(s.clone()),
|
10
|
+
(DataType::Int64, false) => {
|
11
|
+
let ca = s.i64().unwrap();
|
12
|
+
Ok(ca.reinterpret_unsigned().into_series())
|
13
|
+
}
|
14
|
+
(DataType::Int64, true) => Ok(s.clone()),
|
15
|
+
_ => Err(PolarsError::ComputeError(
|
16
|
+
"reinterpret is only allowed for 64bit integers dtype, use cast otherwise".into(),
|
17
|
+
)),
|
18
|
+
}
|
19
|
+
}
|
data/lib/polars/cat_expr.rb
CHANGED
@@ -1,11 +1,50 @@
|
|
1
1
|
module Polars
|
2
|
+
# Namespace for categorical related expressions.
|
2
3
|
class CatExpr
|
4
|
+
# @private
|
3
5
|
attr_accessor :_rbexpr
|
4
6
|
|
7
|
+
# @private
|
5
8
|
def initialize(expr)
|
6
9
|
self._rbexpr = expr._rbexpr
|
7
10
|
end
|
8
11
|
|
12
|
+
# Determine how this categorical series should be sorted.
|
13
|
+
#
|
14
|
+
# @param ordering ["physical", "lexical"]
|
15
|
+
# Ordering type:
|
16
|
+
#
|
17
|
+
# - 'physical' -> Use the physical representation of the categories to determine the order (default).
|
18
|
+
# - 'lexical' -> Use the string values to determine the ordering.
|
19
|
+
#
|
20
|
+
# @return [Expr]
|
21
|
+
#
|
22
|
+
# @example
|
23
|
+
# df = Polars::DataFrame.new(
|
24
|
+
# {"cats" => ["z", "z", "k", "a", "b"], "vals" => [3, 1, 2, 2, 3]}
|
25
|
+
# ).with_columns(
|
26
|
+
# [
|
27
|
+
# Polars.col("cats").cast(:cat).cat.set_ordering("lexical")
|
28
|
+
# ]
|
29
|
+
# )
|
30
|
+
# df.sort(["cats", "vals"])
|
31
|
+
# # =>
|
32
|
+
# # shape: (5, 2)
|
33
|
+
# # ┌──────┬──────┐
|
34
|
+
# # │ cats ┆ vals │
|
35
|
+
# # │ --- ┆ --- │
|
36
|
+
# # │ cat ┆ i64 │
|
37
|
+
# # ╞══════╪══════╡
|
38
|
+
# # │ a ┆ 2 │
|
39
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
40
|
+
# # │ b ┆ 3 │
|
41
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
42
|
+
# # │ k ┆ 2 │
|
43
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
44
|
+
# # │ z ┆ 1 │
|
45
|
+
# # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
|
46
|
+
# # │ z ┆ 3 │
|
47
|
+
# # └──────┴──────┘
|
9
48
|
def set_ordering(ordering)
|
10
49
|
Utils.wrap_expr(_rbexpr.cat_set_ordering(ordering))
|
11
50
|
end
|