polars-df 0.14.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +35 -0
- data/Cargo.lock +1523 -378
- data/LICENSE.txt +1 -0
- data/README.md +38 -4
- data/ext/polars/Cargo.toml +15 -5
- data/ext/polars/src/batched_csv.rs +7 -10
- data/ext/polars/src/conversion/any_value.rs +31 -21
- data/ext/polars/src/conversion/mod.rs +155 -48
- data/ext/polars/src/dataframe/construction.rs +0 -3
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +15 -57
- data/ext/polars/src/dataframe/io.rs +77 -169
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +5 -4
- data/ext/polars/src/expr/general.rs +16 -22
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +9 -36
- data/ext/polars/src/file.rs +78 -23
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +22 -12
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +920 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -827
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +54 -27
- data/ext/polars/src/map/dataframe.rs +10 -6
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +9 -8
- data/ext/polars/src/on_startup.rs +1 -1
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +2 -2
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +631 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +452 -101
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +3 -1
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +103 -2
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +95 -13
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/io/ipc.rb +14 -4
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +684 -20
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1226 -0
- data/lib/polars/series.rb +465 -35
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +17 -1
- metadata +16 -9
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::frame::LazyFrame;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use std::io::Read;
|
5
|
+
|
6
|
+
use crate::file::get_file_like;
|
7
|
+
use crate::{RbLazyFrame, RbResult, RbValueError};
|
8
|
+
|
9
|
+
impl RbLazyFrame {
|
10
|
+
// TODO change to serialize_json
|
11
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
12
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
13
|
+
// so don't bother with files.
|
14
|
+
let mut json = String::new();
|
15
|
+
let _ = get_file_like(rb_f, false)?
|
16
|
+
.read_to_string(&mut json)
|
17
|
+
.unwrap();
|
18
|
+
|
19
|
+
// Safety
|
20
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
21
|
+
// so we actually don't have a lifetime at all when serializing.
|
22
|
+
|
23
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
24
|
+
// in this scope
|
25
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
|
+
|
27
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
29
|
+
Ok(LazyFrame::from(lp).into())
|
30
|
+
}
|
31
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -3,6 +3,7 @@ mod batched_csv;
|
|
3
3
|
mod conversion;
|
4
4
|
mod dataframe;
|
5
5
|
mod error;
|
6
|
+
mod exceptions;
|
6
7
|
mod expr;
|
7
8
|
mod file;
|
8
9
|
mod functions;
|
@@ -21,21 +22,31 @@ mod utils;
|
|
21
22
|
use batched_csv::RbBatchedCsv;
|
22
23
|
use conversion::*;
|
23
24
|
use dataframe::RbDataFrame;
|
24
|
-
use error::
|
25
|
+
use error::RbPolarsErr;
|
26
|
+
use exceptions::{RbTypeError, RbValueError};
|
25
27
|
use expr::rb_exprs_to_exprs;
|
26
28
|
use expr::RbExpr;
|
27
29
|
use functions::string_cache::RbStringCacheHolder;
|
28
30
|
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
31
|
+
use interop::arrow::to_ruby::RbArrowArrayStream;
|
29
32
|
use lazyframe::RbLazyFrame;
|
30
33
|
use lazygroupby::RbLazyGroupBy;
|
31
|
-
use magnus::{define_module, function, method, prelude::*,
|
34
|
+
use magnus::{define_module, function, method, prelude::*, Ruby};
|
32
35
|
use series::RbSeries;
|
33
36
|
use sql::RbSQLContext;
|
34
37
|
|
35
|
-
|
38
|
+
use magnus::error::Result as RbResult;
|
39
|
+
use magnus::Error as RbErr;
|
40
|
+
|
41
|
+
// TODO move
|
42
|
+
fn re_escape(pattern: String) -> String {
|
43
|
+
regex::escape(&pattern)
|
44
|
+
}
|
36
45
|
|
37
46
|
#[magnus::init]
|
38
47
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
48
|
+
crate::on_startup::register_startup_deps();
|
49
|
+
|
39
50
|
let module = define_module("Polars")?;
|
40
51
|
|
41
52
|
let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
|
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
45
56
|
let class = module.define_class("RbDataFrame", ruby.class_object())?;
|
46
57
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
47
58
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
48
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
49
59
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
50
60
|
class.define_singleton_method(
|
51
61
|
"read_ipc_stream",
|
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
58
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
59
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
60
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
61
|
-
class.define_method("write_avro", method!(RbDataFrame::write_avro,
|
71
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
62
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
63
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
64
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
65
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
66
76
|
class.define_method(
|
67
77
|
"write_ipc_stream",
|
68
|
-
method!(RbDataFrame::write_ipc_stream,
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 3),
|
69
79
|
)?;
|
70
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
71
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
82
|
+
class.define_method(
|
83
|
+
"arrow_c_stream",
|
84
|
+
method!(RbDataFrame::__arrow_c_stream__, 0),
|
85
|
+
)?;
|
72
86
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
73
87
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
74
88
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
@@ -129,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
129
143
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
130
144
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
131
145
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
132
|
-
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
133
|
-
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
134
|
-
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
135
|
-
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
136
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
137
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
138
148
|
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
@@ -143,6 +153,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
153
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
144
154
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
145
155
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
156
|
+
class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
|
146
157
|
|
147
158
|
let class = module.define_class("RbExpr", ruby.class_object())?;
|
148
159
|
class.define_method("+", method!(RbExpr::add, 1))?;
|
@@ -286,6 +297,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
286
297
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
287
298
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
288
299
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
300
|
+
// class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
|
289
301
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
290
302
|
class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
|
291
303
|
class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
|
@@ -412,7 +424,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
412
424
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
413
425
|
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
414
426
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
415
|
-
class.define_method("map_batches", method!(RbExpr::map_batches,
|
427
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
|
416
428
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
417
429
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
418
430
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
@@ -552,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
552
564
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
553
565
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
554
566
|
class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
|
555
|
-
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr,
|
567
|
+
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
|
556
568
|
class.define_singleton_method(
|
557
569
|
"spearman_rank_corr",
|
558
|
-
function!(functions::lazy::spearman_rank_corr,
|
570
|
+
function!(functions::lazy::spearman_rank_corr, 3),
|
559
571
|
)?;
|
560
572
|
class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
|
561
573
|
class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
|
@@ -567,6 +579,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
567
579
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
568
580
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
569
581
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
582
|
+
class.define_singleton_method(
|
583
|
+
"business_day_count",
|
584
|
+
function!(functions::business::business_day_count, 4),
|
585
|
+
)?;
|
570
586
|
class.define_singleton_method(
|
571
587
|
"all_horizontal",
|
572
588
|
function!(functions::aggregation::all_horizontal, 1),
|
@@ -585,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
585
601
|
)?;
|
586
602
|
class.define_singleton_method(
|
587
603
|
"sum_horizontal",
|
588
|
-
function!(functions::aggregation::sum_horizontal,
|
604
|
+
function!(functions::aggregation::sum_horizontal, 2),
|
589
605
|
)?;
|
590
606
|
class.define_singleton_method(
|
591
607
|
"mean_horizontal",
|
592
|
-
function!(functions::aggregation::mean_horizontal,
|
608
|
+
function!(functions::aggregation::mean_horizontal, 2),
|
593
609
|
)?;
|
594
610
|
class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
|
595
611
|
class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
|
@@ -640,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
640
656
|
function!(functions::meta::get_index_type, 0),
|
641
657
|
)?;
|
642
658
|
class.define_singleton_method(
|
643
|
-
"
|
644
|
-
function!(functions::meta::
|
659
|
+
"thread_pool_size",
|
660
|
+
function!(functions::meta::thread_pool_size, 0),
|
645
661
|
)?;
|
646
662
|
class.define_singleton_method(
|
647
663
|
"enable_string_cache",
|
@@ -699,19 +715,20 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
699
715
|
"set_random_seed",
|
700
716
|
function!(functions::random::set_random_seed, 1),
|
701
717
|
)?;
|
718
|
+
class.define_singleton_method("re_escape", function!(re_escape, 1))?;
|
702
719
|
|
703
720
|
let class = module.define_class("RbLazyFrame", ruby.class_object())?;
|
704
721
|
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
705
722
|
class.define_singleton_method(
|
706
723
|
"new_from_ndjson",
|
707
|
-
function!(RbLazyFrame::new_from_ndjson,
|
724
|
+
function!(RbLazyFrame::new_from_ndjson, 8),
|
708
725
|
)?;
|
709
726
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
710
727
|
class.define_singleton_method(
|
711
728
|
"new_from_parquet",
|
712
|
-
function!(RbLazyFrame::new_from_parquet,
|
729
|
+
function!(RbLazyFrame::new_from_parquet, -1),
|
713
730
|
)?;
|
714
|
-
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc,
|
731
|
+
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
715
732
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
716
733
|
class.define_method("describe_plan", method!(RbLazyFrame::describe_plan, 0))?;
|
717
734
|
class.define_method(
|
@@ -726,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
726
743
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
727
744
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
728
745
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
729
|
-
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet,
|
730
|
-
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc,
|
746
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
|
+
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
731
748
|
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
732
|
-
class.define_method("sink_json", method!(RbLazyFrame::sink_json,
|
749
|
+
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
733
750
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
734
751
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
735
752
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -741,15 +758,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
741
758
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
742
759
|
)?;
|
743
760
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
744
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
745
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
761
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
|
762
|
+
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
746
763
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
747
764
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
748
765
|
class.define_method(
|
749
766
|
"with_columns_seq",
|
750
767
|
method!(RbLazyFrame::with_columns_seq, 1),
|
751
768
|
)?;
|
752
|
-
class.define_method("rename", method!(RbLazyFrame::rename,
|
769
|
+
class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
|
753
770
|
class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
|
754
771
|
class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
|
755
772
|
class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
|
@@ -770,6 +787,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
770
787
|
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
771
788
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
772
789
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
790
|
+
class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
|
773
791
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
774
792
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
775
793
|
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
@@ -810,7 +828,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
810
828
|
class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
|
811
829
|
class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
|
812
830
|
class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
|
831
|
+
class.define_singleton_method(
|
832
|
+
"from_arrow_c_stream",
|
833
|
+
function!(RbSeries::from_arrow_c_stream, 1),
|
834
|
+
)?;
|
813
835
|
class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
|
836
|
+
class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
|
814
837
|
class.define_method(
|
815
838
|
"is_sorted_flag",
|
816
839
|
method!(RbSeries::is_sorted_ascending_flag, 0),
|
@@ -1081,5 +1104,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1081
1104
|
let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
|
1082
1105
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1083
1106
|
|
1107
|
+
// arrow array stream
|
1108
|
+
let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
|
1109
|
+
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1110
|
+
|
1084
1111
|
Ok(())
|
1085
1112
|
}
|
@@ -7,13 +7,16 @@ use super::*;
|
|
7
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
8
8
|
|
9
9
|
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
-
df.get_columns()
|
10
|
+
df.get_columns()
|
11
|
+
.iter()
|
12
|
+
.map(|s| s.as_materialized_series().iter())
|
13
|
+
.collect()
|
11
14
|
}
|
12
15
|
|
13
16
|
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
17
|
df.get_columns()
|
15
18
|
.iter()
|
16
|
-
.map(|s| s.iter().skip(skip))
|
19
|
+
.map(|s| s.as_materialized_series().iter().skip(skip))
|
17
20
|
.collect()
|
18
21
|
}
|
19
22
|
|
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
|
|
113
116
|
true,
|
114
117
|
));
|
115
118
|
} else if out.is_kind_of(class::array()) {
|
116
|
-
return Err(RbPolarsErr::
|
119
|
+
return Err(RbPolarsErr::Other(
|
117
120
|
"A list output type is invalid. Do you mean to create polars List Series?\
|
118
121
|
Then return a Series object."
|
119
122
|
.into(),
|
120
|
-
)
|
123
|
+
)
|
124
|
+
.into());
|
121
125
|
} else {
|
122
|
-
return Err(RbPolarsErr::
|
126
|
+
return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
|
123
127
|
}
|
124
128
|
}
|
125
|
-
Err(RbPolarsErr::
|
129
|
+
Err(RbPolarsErr::Other("Could not determine output type".into()).into())
|
126
130
|
}
|
127
131
|
|
128
132
|
fn apply_iter<T>(
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -1,10 +1,70 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::rb_modules::*;
|
5
|
+
use crate::{RbExpr, RbSeries, Wrap};
|
5
6
|
|
6
|
-
|
7
|
-
|
7
|
+
fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
|
8
|
+
let rb_rbseries = match v.funcall("_s", ()) {
|
9
|
+
Ok(s) => s,
|
10
|
+
// the lambda did not return a series, we try to create a new Ruby Series
|
11
|
+
_ => {
|
12
|
+
let data = RArray::new();
|
13
|
+
data.push(v).unwrap();
|
14
|
+
let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
|
15
|
+
|
16
|
+
match res {
|
17
|
+
Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
|
18
|
+
Err(_) => {
|
19
|
+
polars_bail!(ComputeError:
|
20
|
+
"expected a something that could convert to a `Series` but got: {}",
|
21
|
+
unsafe { v.classname() }
|
22
|
+
)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
};
|
27
|
+
// Finally get the actual Series
|
28
|
+
Ok(rb_rbseries.series.borrow().clone())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
|
32
|
+
// create a RbSeries struct/object for Ruby
|
33
|
+
let rbseries_a = RbSeries::new(a);
|
34
|
+
let rbseries_b = RbSeries::new(b);
|
35
|
+
|
36
|
+
// Wrap this RbSeries object in the Ruby side Series wrapper
|
37
|
+
let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
|
38
|
+
let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
|
39
|
+
|
40
|
+
// call the lambda and get a Ruby side Series wrapper
|
41
|
+
let result_series_wrapper: Value =
|
42
|
+
match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
|
43
|
+
Ok(rbobj) => rbobj,
|
44
|
+
Err(e) => polars_bail!(
|
45
|
+
ComputeError: "custom Ruby function failed: {}", e,
|
46
|
+
),
|
47
|
+
};
|
48
|
+
let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
|
49
|
+
{
|
50
|
+
let expr = rbexpr.inner.clone();
|
51
|
+
let df = DataFrame::empty();
|
52
|
+
let out = df
|
53
|
+
.lazy()
|
54
|
+
.select([expr])
|
55
|
+
.with_predicate_pushdown(false)
|
56
|
+
.with_projection_pushdown(false)
|
57
|
+
.collect()?;
|
58
|
+
|
59
|
+
let s = out.select_at_idx(0).unwrap().clone();
|
60
|
+
RbSeries::new(s.take_materialized_series())
|
61
|
+
} else {
|
62
|
+
return Some(to_series(result_series_wrapper, "")).transpose();
|
63
|
+
};
|
64
|
+
|
65
|
+
// Finally get the actual Series
|
66
|
+
let binding = rbseries.series.borrow();
|
67
|
+
Ok(Some(binding.clone()))
|
8
68
|
}
|
9
69
|
|
10
70
|
pub fn map_single(
|
@@ -13,6 +73,7 @@ pub fn map_single(
|
|
13
73
|
_output_type: Option<Wrap<DataType>>,
|
14
74
|
_agg_list: bool,
|
15
75
|
_is_elementwise: bool,
|
76
|
+
_returns_scalar: bool,
|
16
77
|
) -> RbExpr {
|
17
78
|
todo!();
|
18
79
|
}
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -35,7 +35,7 @@ fn iterator_to_struct(
|
|
35
35
|
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
36
36
|
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
37
37
|
_ => {
|
38
|
-
return Err(crate::
|
38
|
+
return Err(crate::exceptions::ComputeError::new_err(format!(
|
39
39
|
"expected struct got {first_value:?}",
|
40
40
|
)))
|
41
41
|
}
|
@@ -70,7 +70,7 @@ fn iterator_to_struct(
|
|
70
70
|
Some(dict) => {
|
71
71
|
let dict = RHash::try_convert(dict)?;
|
72
72
|
if dict.len() != struct_width {
|
73
|
-
return Err(crate::
|
73
|
+
return Err(crate::exceptions::ComputeError::new_err(
|
74
74
|
format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
|
75
75
|
));
|
76
76
|
}
|
@@ -93,10 +93,12 @@ fn iterator_to_struct(
|
|
93
93
|
.collect::<Vec<_>>()
|
94
94
|
});
|
95
95
|
|
96
|
-
Ok(
|
97
|
-
.
|
98
|
-
|
99
|
-
|
96
|
+
Ok(
|
97
|
+
StructChunked::from_series(name, fields[0].len(), fields.iter())
|
98
|
+
.unwrap()
|
99
|
+
.into_series()
|
100
|
+
.into(),
|
101
|
+
)
|
100
102
|
}
|
101
103
|
|
102
104
|
fn iterator_to_primitive<T>(
|
@@ -232,8 +234,7 @@ fn iterator_to_list(
|
|
232
234
|
name: PlSmallStr,
|
233
235
|
capacity: usize,
|
234
236
|
) -> RbResult<ListChunked> {
|
235
|
-
let mut builder =
|
236
|
-
get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
|
237
|
+
let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
|
237
238
|
for _ in 0..init_null_count {
|
238
239
|
builder.append_null()
|
239
240
|
}
|
@@ -11,7 +11,7 @@ use polars_core::prelude::AnyValue;
|
|
11
11
|
use crate::prelude::ObjectValue;
|
12
12
|
use crate::Wrap;
|
13
13
|
|
14
|
-
pub(crate) fn
|
14
|
+
pub(crate) fn register_startup_deps() {
|
15
15
|
if !registry::is_object_builder_registered() {
|
16
16
|
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
|
17
17
|
Box::new(ObjectChunkedBuilder::<ObjectValue>::new(name, capacity))
|
@@ -98,11 +98,7 @@ impl RbSeries {
|
|
98
98
|
.into_value())
|
99
99
|
}
|
100
100
|
|
101
|
-
pub fn quantile(
|
102
|
-
&self,
|
103
|
-
quantile: f64,
|
104
|
-
interpolation: Wrap<QuantileInterpolOptions>,
|
105
|
-
) -> RbResult<Value> {
|
101
|
+
pub fn quantile(&self, quantile: f64, interpolation: Wrap<QuantileMethod>) -> RbResult<Value> {
|
106
102
|
let bind = self
|
107
103
|
.series
|
108
104
|
.borrow()
|
@@ -2,33 +2,33 @@ use crate::{RbPolarsErr, RbResult, RbSeries};
|
|
2
2
|
|
3
3
|
impl RbSeries {
|
4
4
|
pub fn add(&self, other: &RbSeries) -> RbResult<Self> {
|
5
|
-
(&*self.series.borrow() + &*other.series.borrow())
|
5
|
+
Ok((&*self.series.borrow() + &*other.series.borrow())
|
6
6
|
.map(Into::into)
|
7
|
-
.map_err(RbPolarsErr::from)
|
7
|
+
.map_err(RbPolarsErr::from)?)
|
8
8
|
}
|
9
9
|
|
10
10
|
pub fn sub(&self, other: &RbSeries) -> RbResult<Self> {
|
11
|
-
(&*self.series.borrow() - &*other.series.borrow())
|
11
|
+
Ok((&*self.series.borrow() - &*other.series.borrow())
|
12
12
|
.map(Into::into)
|
13
|
-
.map_err(RbPolarsErr::from)
|
13
|
+
.map_err(RbPolarsErr::from)?)
|
14
14
|
}
|
15
15
|
|
16
16
|
pub fn mul(&self, other: &RbSeries) -> RbResult<Self> {
|
17
|
-
(&*self.series.borrow() * &*other.series.borrow())
|
17
|
+
Ok((&*self.series.borrow() * &*other.series.borrow())
|
18
18
|
.map(Into::into)
|
19
|
-
.map_err(RbPolarsErr::from)
|
19
|
+
.map_err(RbPolarsErr::from)?)
|
20
20
|
}
|
21
21
|
|
22
22
|
pub fn div(&self, other: &RbSeries) -> RbResult<Self> {
|
23
|
-
(&*self.series.borrow() / &*other.series.borrow())
|
23
|
+
Ok((&*self.series.borrow() / &*other.series.borrow())
|
24
24
|
.map(Into::into)
|
25
|
-
.map_err(RbPolarsErr::from)
|
25
|
+
.map_err(RbPolarsErr::from)?)
|
26
26
|
}
|
27
27
|
|
28
28
|
pub fn rem(&self, other: &RbSeries) -> RbResult<Self> {
|
29
|
-
(&*self.series.borrow() % &*other.series.borrow())
|
29
|
+
Ok((&*self.series.borrow() % &*other.series.borrow())
|
30
30
|
.map(Into::into)
|
31
|
-
.map_err(RbPolarsErr::from)
|
31
|
+
.map_err(RbPolarsErr::from)?)
|
32
32
|
}
|
33
33
|
}
|
34
34
|
|
@@ -4,7 +4,7 @@ use polars_core::prelude::*;
|
|
4
4
|
use crate::any_value::rb_object_to_any_value;
|
5
5
|
use crate::conversion::{slice_extract_wrapped, vec_extract_wrapped, Wrap};
|
6
6
|
use crate::prelude::ObjectValue;
|
7
|
-
use crate::series::
|
7
|
+
use crate::series::to_series;
|
8
8
|
use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
|
9
9
|
|
10
10
|
impl RbSeries {
|
@@ -185,7 +185,7 @@ impl RbSeries {
|
|
185
185
|
}
|
186
186
|
|
187
187
|
pub fn new_series_list(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
|
188
|
-
let series_vec =
|
188
|
+
let series_vec = to_series(val)?;
|
189
189
|
Ok(Series::new(name.into(), &series_vec).into())
|
190
190
|
}
|
191
191
|
|
@@ -31,7 +31,7 @@ impl RbSeries {
|
|
31
31
|
for i in 0..series.len() {
|
32
32
|
let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
|
33
33
|
match obj {
|
34
|
-
Some(val) => v.push(val.
|
34
|
+
Some(val) => v.push(val.to_value()).unwrap(),
|
35
35
|
None => v.push(qnil()).unwrap(),
|
36
36
|
};
|
37
37
|
}
|