polars-df 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::frame::LazyFrame;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use std::io::Read;
|
5
|
+
|
6
|
+
use crate::file::get_file_like;
|
7
|
+
use crate::{RbLazyFrame, RbResult, RbValueError};
|
8
|
+
|
9
|
+
impl RbLazyFrame {
|
10
|
+
// TODO change to serialize_json
|
11
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
12
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
13
|
+
// so don't bother with files.
|
14
|
+
let mut json = String::new();
|
15
|
+
let _ = get_file_like(rb_f, false)?
|
16
|
+
.read_to_string(&mut json)
|
17
|
+
.unwrap();
|
18
|
+
|
19
|
+
// Safety
|
20
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
21
|
+
// so we actually don't have a lifetime at all when serializing.
|
22
|
+
|
23
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
24
|
+
// in this scope
|
25
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
|
+
|
27
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
29
|
+
Ok(LazyFrame::from(lp).into())
|
30
|
+
}
|
31
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -3,6 +3,7 @@ mod batched_csv;
|
|
3
3
|
mod conversion;
|
4
4
|
mod dataframe;
|
5
5
|
mod error;
|
6
|
+
mod exceptions;
|
6
7
|
mod expr;
|
7
8
|
mod file;
|
8
9
|
mod functions;
|
@@ -21,21 +22,31 @@ mod utils;
|
|
21
22
|
use batched_csv::RbBatchedCsv;
|
22
23
|
use conversion::*;
|
23
24
|
use dataframe::RbDataFrame;
|
24
|
-
use error::
|
25
|
+
use error::RbPolarsErr;
|
26
|
+
use exceptions::{RbTypeError, RbValueError};
|
25
27
|
use expr::rb_exprs_to_exprs;
|
26
28
|
use expr::RbExpr;
|
27
29
|
use functions::string_cache::RbStringCacheHolder;
|
28
30
|
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
31
|
+
use interop::arrow::to_ruby::RbArrowArrayStream;
|
29
32
|
use lazyframe::RbLazyFrame;
|
30
33
|
use lazygroupby::RbLazyGroupBy;
|
31
|
-
use magnus::{define_module, function, method, prelude::*,
|
34
|
+
use magnus::{define_module, function, method, prelude::*, Ruby};
|
32
35
|
use series::RbSeries;
|
33
36
|
use sql::RbSQLContext;
|
34
37
|
|
35
|
-
|
38
|
+
use magnus::error::Result as RbResult;
|
39
|
+
use magnus::Error as RbErr;
|
40
|
+
|
41
|
+
// TODO move
|
42
|
+
fn re_escape(pattern: String) -> String {
|
43
|
+
regex::escape(&pattern)
|
44
|
+
}
|
36
45
|
|
37
46
|
#[magnus::init]
|
38
47
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
48
|
+
crate::on_startup::register_startup_deps();
|
49
|
+
|
39
50
|
let module = define_module("Polars")?;
|
40
51
|
|
41
52
|
let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
|
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
45
56
|
let class = module.define_class("RbDataFrame", ruby.class_object())?;
|
46
57
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
47
58
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
48
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
49
59
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
50
60
|
class.define_singleton_method(
|
51
61
|
"read_ipc_stream",
|
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
58
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
59
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
60
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
61
|
-
class.define_method("write_avro", method!(RbDataFrame::write_avro,
|
71
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
62
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
63
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
64
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
65
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
66
76
|
class.define_method(
|
67
77
|
"write_ipc_stream",
|
68
|
-
method!(RbDataFrame::write_ipc_stream,
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 3),
|
69
79
|
)?;
|
70
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
71
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
82
|
+
class.define_method(
|
83
|
+
"arrow_c_stream",
|
84
|
+
method!(RbDataFrame::__arrow_c_stream__, 0),
|
85
|
+
)?;
|
72
86
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
73
87
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
74
88
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
@@ -129,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
129
143
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
130
144
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
131
145
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
132
|
-
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
133
|
-
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
134
|
-
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
135
|
-
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
136
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
137
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
138
148
|
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
@@ -143,6 +153,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
153
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
144
154
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
145
155
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
156
|
+
class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
|
146
157
|
|
147
158
|
let class = module.define_class("RbExpr", ruby.class_object())?;
|
148
159
|
class.define_method("+", method!(RbExpr::add, 1))?;
|
@@ -286,6 +297,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
286
297
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
287
298
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
288
299
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
300
|
+
// class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
|
289
301
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
290
302
|
class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
|
291
303
|
class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
|
@@ -412,7 +424,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
412
424
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
413
425
|
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
414
426
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
415
|
-
class.define_method("map_batches", method!(RbExpr::map_batches,
|
427
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
|
416
428
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
417
429
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
418
430
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
@@ -552,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
552
564
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
553
565
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
554
566
|
class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
|
555
|
-
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr,
|
567
|
+
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
|
556
568
|
class.define_singleton_method(
|
557
569
|
"spearman_rank_corr",
|
558
|
-
function!(functions::lazy::spearman_rank_corr,
|
570
|
+
function!(functions::lazy::spearman_rank_corr, 3),
|
559
571
|
)?;
|
560
572
|
class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
|
561
573
|
class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
|
@@ -567,6 +579,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
567
579
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
568
580
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
569
581
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
582
|
+
class.define_singleton_method(
|
583
|
+
"business_day_count",
|
584
|
+
function!(functions::business::business_day_count, 4),
|
585
|
+
)?;
|
570
586
|
class.define_singleton_method(
|
571
587
|
"all_horizontal",
|
572
588
|
function!(functions::aggregation::all_horizontal, 1),
|
@@ -585,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
585
601
|
)?;
|
586
602
|
class.define_singleton_method(
|
587
603
|
"sum_horizontal",
|
588
|
-
function!(functions::aggregation::sum_horizontal,
|
604
|
+
function!(functions::aggregation::sum_horizontal, 2),
|
589
605
|
)?;
|
590
606
|
class.define_singleton_method(
|
591
607
|
"mean_horizontal",
|
592
|
-
function!(functions::aggregation::mean_horizontal,
|
608
|
+
function!(functions::aggregation::mean_horizontal, 2),
|
593
609
|
)?;
|
594
610
|
class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
|
595
611
|
class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
|
@@ -640,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
640
656
|
function!(functions::meta::get_index_type, 0),
|
641
657
|
)?;
|
642
658
|
class.define_singleton_method(
|
643
|
-
"
|
644
|
-
function!(functions::meta::
|
659
|
+
"thread_pool_size",
|
660
|
+
function!(functions::meta::thread_pool_size, 0),
|
645
661
|
)?;
|
646
662
|
class.define_singleton_method(
|
647
663
|
"enable_string_cache",
|
@@ -699,19 +715,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
699
715
|
"set_random_seed",
|
700
716
|
function!(functions::random::set_random_seed, 1),
|
701
717
|
)?;
|
718
|
+
class.define_singleton_method("re_escape", function!(re_escape, 1))?;
|
702
719
|
|
703
720
|
let class = module.define_class("RbLazyFrame", ruby.class_object())?;
|
704
721
|
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
705
722
|
class.define_singleton_method(
|
706
723
|
"new_from_ndjson",
|
707
|
-
function!(RbLazyFrame::new_from_ndjson,
|
724
|
+
function!(RbLazyFrame::new_from_ndjson, 8),
|
708
725
|
)?;
|
709
726
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
710
727
|
class.define_singleton_method(
|
711
728
|
"new_from_parquet",
|
712
|
-
function!(RbLazyFrame::new_from_parquet,
|
729
|
+
function!(RbLazyFrame::new_from_parquet, -1),
|
713
730
|
)?;
|
714
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
731
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
715
732
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
716
733
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
717
734
|
class.define_method(
|
@@ -726,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
726
743
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
727
744
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
728
745
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
729
|
-
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet,
|
730
|
-
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc,
|
746
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
|
+
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
731
748
|
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
732
|
-
class.define_method("sink_json", method!(RbLazyFrame::sink_json,
|
749
|
+
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
733
750
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
734
751
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
735
752
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -741,15 +758,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
741
758
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
742
759
|
)?;
|
743
760
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
744
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
745
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
761
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
|
762
|
+
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
746
763
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
747
764
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
748
765
|
class.define_method(
|
749
766
|
"with_columns_seq",
|
750
767
|
method!(RbLazyFrame::with_columns_seq, 1),
|
751
768
|
)?;
|
752
|
-
class.define_method("rename", method!(RbLazyFrame::rename,
|
769
|
+
class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
|
753
770
|
class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
|
754
771
|
class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
|
755
772
|
class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
|
@@ -770,6 +787,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
770
787
|
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
771
788
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
772
789
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
790
|
+
class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
|
773
791
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
774
792
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
775
793
|
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
@@ -810,7 +828,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
810
828
|
class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
|
811
829
|
class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
|
812
830
|
class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
|
831
|
+
class.define_singleton_method(
|
832
|
+
"from_arrow_c_stream",
|
833
|
+
function!(RbSeries::from_arrow_c_stream, 1),
|
834
|
+
)?;
|
813
835
|
class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
|
836
|
+
class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
|
814
837
|
class.define_method(
|
815
838
|
"is_sorted_flag",
|
816
839
|
method!(RbSeries::is_sorted_ascending_flag, 0),
|
@@ -1081,5 +1104,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1081
1104
|
let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
|
1082
1105
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1083
1106
|
|
1107
|
+
// arrow array stream
|
1108
|
+
let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
|
1109
|
+
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1110
|
+
|
1084
1111
|
Ok(())
|
1085
1112
|
}
|
@@ -7,13 +7,16 @@ use super::*;
|
|
7
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
8
8
|
|
9
9
|
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
-
df.get_columns()
|
10
|
+
df.get_columns()
|
11
|
+
.iter()
|
12
|
+
.map(|s| s.as_materialized_series().iter())
|
13
|
+
.collect()
|
11
14
|
}
|
12
15
|
|
13
16
|
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
17
|
df.get_columns()
|
15
18
|
.iter()
|
16
|
-
.map(|s| s.iter().skip(skip))
|
19
|
+
.map(|s| s.as_materialized_series().iter().skip(skip))
|
17
20
|
.collect()
|
18
21
|
}
|
19
22
|
|
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
|
|
113
116
|
true,
|
114
117
|
));
|
115
118
|
} else if out.is_kind_of(class::array()) {
|
116
|
-
return Err(RbPolarsErr::
|
119
|
+
return Err(RbPolarsErr::Other(
|
117
120
|
"A list output type is invalid. Do you mean to create polars List Series?\
|
118
121
|
Then return a Series object."
|
119
122
|
.into(),
|
120
|
-
)
|
123
|
+
)
|
124
|
+
.into());
|
121
125
|
} else {
|
122
|
-
return Err(RbPolarsErr::
|
126
|
+
return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
|
123
127
|
}
|
124
128
|
}
|
125
|
-
Err(RbPolarsErr::
|
129
|
+
Err(RbPolarsErr::Other("Could not determine output type".into()).into())
|
126
130
|
}
|
127
131
|
|
128
132
|
fn apply_iter<T>(
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -1,10 +1,70 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::rb_modules::*;
|
5
|
+
use crate::{RbExpr, RbSeries, Wrap};
|
5
6
|
|
6
|
-
|
7
|
-
|
7
|
+
fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
|
8
|
+
let rb_rbseries = match v.funcall("_s", ()) {
|
9
|
+
Ok(s) => s,
|
10
|
+
// the lambda did not return a series, we try to create a new Ruby Series
|
11
|
+
_ => {
|
12
|
+
let data = RArray::new();
|
13
|
+
data.push(v).unwrap();
|
14
|
+
let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
|
15
|
+
|
16
|
+
match res {
|
17
|
+
Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
|
18
|
+
Err(_) => {
|
19
|
+
polars_bail!(ComputeError:
|
20
|
+
"expected a something that could convert to a `Series` but got: {}",
|
21
|
+
unsafe { v.classname() }
|
22
|
+
)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
};
|
27
|
+
// Finally get the actual Series
|
28
|
+
Ok(rb_rbseries.series.borrow().clone())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
|
32
|
+
// create a RbSeries struct/object for Ruby
|
33
|
+
let rbseries_a = RbSeries::new(a);
|
34
|
+
let rbseries_b = RbSeries::new(b);
|
35
|
+
|
36
|
+
// Wrap this RbSeries object in the Ruby side Series wrapper
|
37
|
+
let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
|
38
|
+
let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
|
39
|
+
|
40
|
+
// call the lambda and get a Ruby side Series wrapper
|
41
|
+
let result_series_wrapper: Value =
|
42
|
+
match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
|
43
|
+
Ok(rbobj) => rbobj,
|
44
|
+
Err(e) => polars_bail!(
|
45
|
+
ComputeError: "custom Ruby function failed: {}", e,
|
46
|
+
),
|
47
|
+
};
|
48
|
+
let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
|
49
|
+
{
|
50
|
+
let expr = rbexpr.inner.clone();
|
51
|
+
let df = DataFrame::empty();
|
52
|
+
let out = df
|
53
|
+
.lazy()
|
54
|
+
.select([expr])
|
55
|
+
.with_predicate_pushdown(false)
|
56
|
+
.with_projection_pushdown(false)
|
57
|
+
.collect()?;
|
58
|
+
|
59
|
+
let s = out.select_at_idx(0).unwrap().clone();
|
60
|
+
RbSeries::new(s.take_materialized_series())
|
61
|
+
} else {
|
62
|
+
return Some(to_series(result_series_wrapper, "")).transpose();
|
63
|
+
};
|
64
|
+
|
65
|
+
// Finally get the actual Series
|
66
|
+
let binding = rbseries.series.borrow();
|
67
|
+
Ok(Some(binding.clone()))
|
8
68
|
}
|
9
69
|
|
10
70
|
pub fn map_single(
|
@@ -13,6 +73,7 @@ pub fn map_single(
|
|
13
73
|
_output_type: Option<Wrap<DataType>>,
|
14
74
|
_agg_list: bool,
|
15
75
|
_is_elementwise: bool,
|
76
|
+
_returns_scalar: bool,
|
16
77
|
) -> RbExpr {
|
17
78
|
todo!();
|
18
79
|
}
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -35,7 +35,7 @@ fn iterator_to_struct(
|
|
35
35
|
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
36
36
|
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
37
37
|
_ => {
|
38
|
-
return Err(crate::
|
38
|
+
return Err(crate::exceptions::ComputeError::new_err(format!(
|
39
39
|
"expected struct got {first_value:?}",
|
40
40
|
)))
|
41
41
|
}
|
@@ -70,7 +70,7 @@ fn iterator_to_struct(
|
|
70
70
|
Some(dict) => {
|
71
71
|
let dict = RHash::try_convert(dict)?;
|
72
72
|
if dict.len() != struct_width {
|
73
|
-
return Err(crate::
|
73
|
+
return Err(crate::exceptions::ComputeError::new_err(
|
74
74
|
format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
|
75
75
|
));
|
76
76
|
}
|
@@ -93,10 +93,12 @@ fn iterator_to_struct(
|
|
93
93
|
.collect::<Vec<_>>()
|
94
94
|
});
|
95
95
|
|
96
|
-
Ok(
|
97
|
-
.
|
98
|
-
|
99
|
-
|
96
|
+
Ok(
|
97
|
+
StructChunked::from_series(name, fields[0].len(), fields.iter())
|
98
|
+
.unwrap()
|
99
|
+
.into_series()
|
100
|
+
.into(),
|
101
|
+
)
|
100
102
|
}
|
101
103
|
|
102
104
|
fn iterator_to_primitive<T>(
|
@@ -232,8 +234,7 @@ fn iterator_to_list(
|
|
232
234
|
name: PlSmallStr,
|
233
235
|
capacity: usize,
|
234
236
|
) -> RbResult<ListChunked> {
|
235
|
-
let mut builder =
|
236
|
-
get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
|
237
|
+
let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
|
237
238
|
for _ in 0..init_null_count {
|
238
239
|
builder.append_null()
|
239
240
|
}
|
@@ -11,7 +11,7 @@ use polars_core::prelude::AnyValue;
|
|
11
11
|
use crate::prelude::ObjectValue;
|
12
12
|
use crate::Wrap;
|
13
13
|
|
14
|
-
pub(crate) fn
|
14
|
+
pub(crate) fn register_startup_deps() {
|
15
15
|
if !registry::is_object_builder_registered() {
|
16
16
|
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
|
17
17
|
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
@@ -98,11 +98,7 @@ impl RbSeries {
|
|
98
98
|
.into_value())
|
99
99
|
}
|
100
100
|
|
101
|
-
pub fn quantile(
|
102
|
-
&self,
|
103
|
-
quantile: f64,
|
104
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
105
|
-
) -> RbResult<Value> {
|
101
|
+
pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> RbResult<Value> {
|
106
102
|
let bind = self
|
107
103
|
.series
|
108
104
|
.borrow()
|
@@ -2,33 +2,33 @@ use crate::{RbPolarsErr, RbResult, RbSeries};
|
|
2
2
|
|
3
3
|
impl RbSeries {
|
4
4
|
pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
|
5
|
-
(&*self.series.borrow() + &*other.series.borrow())
|
5
|
+
Ok((&*self.series.borrow() + &*other.series.borrow())
|
6
6
|
.map(Into::into)
|
7
|
-
.map_err(RbPolarsErr::from)
|
7
|
+
.map_err(RbPolarsErr::from)?)
|
8
8
|
}
|
9
9
|
|
10
10
|
pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
|
11
|
-
(&*self.series.borrow() - &*other.series.borrow())
|
11
|
+
Ok((&*self.series.borrow() - &*other.series.borrow())
|
12
12
|
.map(Into::into)
|
13
|
-
.map_err(RbPolarsErr::from)
|
13
|
+
.map_err(RbPolarsErr::from)?)
|
14
14
|
}
|
15
15
|
|
16
16
|
pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
|
17
|
-
(&*self.series.borrow() * &*other.series.borrow())
|
17
|
+
Ok((&*self.series.borrow() * &*other.series.borrow())
|
18
18
|
.map(Into::into)
|
19
|
-
.map_err(RbPolarsErr::from)
|
19
|
+
.map_err(RbPolarsErr::from)?)
|
20
20
|
}
|
21
21
|
|
22
22
|
pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
|
23
|
-
(&*self.series.borrow() / &*other.series.borrow())
|
23
|
+
Ok((&*self.series.borrow() / &*other.series.borrow())
|
24
24
|
.map(Into::into)
|
25
|
-
.map_err(RbPolarsErr::from)
|
25
|
+
.map_err(RbPolarsErr::from)?)
|
26
26
|
}
|
27
27
|
|
28
28
|
pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
|
29
|
-
(&*self.series.borrow() % &*other.series.borrow())
|
29
|
+
Ok((&*self.series.borrow() % &*other.series.borrow())
|
30
30
|
.map(Into::into)
|
31
|
-
.map_err(RbPolarsErr::from)
|
31
|
+
.map_err(RbPolarsErr::from)?)
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
@@ -4,7 +4,7 @@ use polars_core::prelude::*;
|
|
4
4
|
use crate::any_value::rb_object_to_any_value;
|
5
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
6
6
|
use crate::prelude::ObjectValue;
|
7
|
-
use crate::series::
|
7
|
+
use crate::series::to_series;
|
8
8
|
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
9
9
|
|
10
10
|
impl RbSeries {
|
@@ -185,7 +185,7 @@ impl RbSeries {
|
|
185
185
|
}
|
186
186
|
|
187
187
|
pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
188
|
-
let series_vec =
|
188
|
+
let series_vec = to_series(val)?;
|
189
189
|
Ok(Series::new(name.into(), &series_vec).into())
|
190
190
|
}
|
191
191
|
|
@@ -31,7 +31,7 @@ impl RbSeries {
|
|
31
31
|
for i in 0..series.len() {
|
32
32
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
33
|
match obj {
|
34
|
-
Some(val) => v.push(val.
|
34
|
+
Some(val) => v.push(val.to_value()).unwrap(),
|
35
35
|
None => v.push(qnil()).unwrap(),
|
36
36
|
};
|
37
37
|
}
|