polars-df 0.13.0 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +10 -13
- data/ext/polars/src/conversion/any_value.rs +37 -21
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +159 -46
- data/ext/polars/src/dataframe/construction.rs +4 -7
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +22 -16
- data/ext/polars/src/dataframe/io.rs +78 -174
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +16 -7
- data/ext/polars/src/expr/general.rs +14 -23
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +17 -37
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -825
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +44 -13
- data/ext/polars/src/map/dataframe.rs +46 -14
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +17 -16
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +52 -25
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::frame::LazyFrame;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use std::io::Read;
|
5
|
+
|
6
|
+
use crate::file::get_file_like;
|
7
|
+
use crate::{RbLazyFrame, RbResult, RbValueError};
|
8
|
+
|
9
|
+
impl RbLazyFrame {
|
10
|
+
// TODO change to serialize_json
|
11
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
12
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
13
|
+
// so don't bother with files.
|
14
|
+
let mut json = String::new();
|
15
|
+
let _ = get_file_like(rb_f, false)?
|
16
|
+
.read_to_string(&mut json)
|
17
|
+
.unwrap();
|
18
|
+
|
19
|
+
// Safety
|
20
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
21
|
+
// so we actually don't have a lifetime at all when serializing.
|
22
|
+
|
23
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
24
|
+
// in this scope
|
25
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
|
+
|
27
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
29
|
+
Ok(LazyFrame::from(lp).into())
|
30
|
+
}
|
31
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -3,6 +3,7 @@ mod batched_csv;
|
|
3
3
|
mod conversion;
|
4
4
|
mod dataframe;
|
5
5
|
mod error;
|
6
|
+
mod exceptions;
|
6
7
|
mod expr;
|
7
8
|
mod file;
|
8
9
|
mod functions;
|
@@ -21,21 +22,31 @@ mod utils;
|
|
21
22
|
use batched_csv::RbBatchedCsv;
|
22
23
|
use conversion::*;
|
23
24
|
use dataframe::RbDataFrame;
|
24
|
-
use error::
|
25
|
+
use error::RbPolarsErr;
|
26
|
+
use exceptions::{RbTypeError, RbValueError};
|
25
27
|
use expr::rb_exprs_to_exprs;
|
26
28
|
use expr::RbExpr;
|
27
29
|
use functions::string_cache::RbStringCacheHolder;
|
28
30
|
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
31
|
+
use interop::arrow::to_ruby::RbArrowArrayStream;
|
29
32
|
use lazyframe::RbLazyFrame;
|
30
33
|
use lazygroupby::RbLazyGroupBy;
|
31
|
-
use magnus::{define_module, function, method, prelude::*,
|
34
|
+
use magnus::{define_module, function, method, prelude::*, Ruby};
|
32
35
|
use series::RbSeries;
|
33
36
|
use sql::RbSQLContext;
|
34
37
|
|
35
|
-
|
38
|
+
use magnus::error::Result as RbResult;
|
39
|
+
use magnus::Error as RbErr;
|
40
|
+
|
41
|
+
// TODO move
|
42
|
+
fn re_escape(pattern: String) -> String {
|
43
|
+
regex::escape(&pattern)
|
44
|
+
}
|
36
45
|
|
37
46
|
#[magnus::init]
|
38
47
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
48
|
+
crate::on_startup::register_startup_deps();
|
49
|
+
|
39
50
|
let module = define_module("Polars")?;
|
40
51
|
|
41
52
|
let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
|
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
45
56
|
let class = module.define_class("RbDataFrame", ruby.class_object())?;
|
46
57
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
47
58
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
48
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
49
59
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
50
60
|
class.define_singleton_method(
|
51
61
|
"read_ipc_stream",
|
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
58
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
59
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
60
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
61
|
-
class.define_method("write_avro", method!(RbDataFrame::write_avro,
|
71
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
62
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
63
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
64
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
65
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 3))?;
|
66
76
|
class.define_method(
|
67
77
|
"write_ipc_stream",
|
68
|
-
method!(RbDataFrame::write_ipc_stream,
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 3),
|
69
79
|
)?;
|
70
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
71
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
82
|
+
class.define_method(
|
83
|
+
"arrow_c_stream",
|
84
|
+
method!(RbDataFrame::__arrow_c_stream__, 0),
|
85
|
+
)?;
|
72
86
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
73
87
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
74
88
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
@@ -143,6 +157,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
157
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
144
158
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
145
159
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
160
|
+
class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
|
146
161
|
|
147
162
|
let class = module.define_class("RbExpr", ruby.class_object())?;
|
148
163
|
class.define_method("+", method!(RbExpr::add, 1))?;
|
@@ -286,6 +301,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
286
301
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
287
302
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
288
303
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
304
|
+
// class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
|
289
305
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
290
306
|
class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
|
291
307
|
class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
|
@@ -412,7 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
412
428
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
413
429
|
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
414
430
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
415
|
-
class.define_method("map_batches", method!(RbExpr::map_batches,
|
431
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
|
416
432
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
417
433
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
418
434
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
@@ -567,6 +583,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
567
583
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
568
584
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
569
585
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
586
|
+
class.define_singleton_method(
|
587
|
+
"business_day_count",
|
588
|
+
function!(functions::business::business_day_count, 4),
|
589
|
+
)?;
|
570
590
|
class.define_singleton_method(
|
571
591
|
"all_horizontal",
|
572
592
|
function!(functions::aggregation::all_horizontal, 1),
|
@@ -699,17 +719,18 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
699
719
|
"set_random_seed",
|
700
720
|
function!(functions::random::set_random_seed, 1),
|
701
721
|
)?;
|
722
|
+
class.define_singleton_method("re_escape", function!(re_escape, 1))?;
|
702
723
|
|
703
724
|
let class = module.define_class("RbLazyFrame", ruby.class_object())?;
|
704
725
|
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
705
726
|
class.define_singleton_method(
|
706
727
|
"new_from_ndjson",
|
707
|
-
function!(RbLazyFrame::new_from_ndjson,
|
728
|
+
function!(RbLazyFrame::new_from_ndjson, 8),
|
708
729
|
)?;
|
709
730
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
710
731
|
class.define_singleton_method(
|
711
732
|
"new_from_parquet",
|
712
|
-
function!(RbLazyFrame::new_from_parquet,
|
733
|
+
function!(RbLazyFrame::new_from_parquet, -1),
|
713
734
|
)?;
|
714
735
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
715
736
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -741,15 +762,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
741
762
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
742
763
|
)?;
|
743
764
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
744
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
745
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
765
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
|
766
|
+
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
746
767
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
747
768
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
748
769
|
class.define_method(
|
749
770
|
"with_columns_seq",
|
750
771
|
method!(RbLazyFrame::with_columns_seq, 1),
|
751
772
|
)?;
|
752
|
-
class.define_method("rename", method!(RbLazyFrame::rename,
|
773
|
+
class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
|
753
774
|
class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
|
754
775
|
class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
|
755
776
|
class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
|
@@ -770,6 +791,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
770
791
|
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
771
792
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
772
793
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
794
|
+
class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
|
773
795
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
774
796
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
775
797
|
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
@@ -810,7 +832,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
810
832
|
class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
|
811
833
|
class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
|
812
834
|
class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
|
835
|
+
class.define_singleton_method(
|
836
|
+
"from_arrow_c_stream",
|
837
|
+
function!(RbSeries::from_arrow_c_stream, 1),
|
838
|
+
)?;
|
813
839
|
class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
|
840
|
+
class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
|
814
841
|
class.define_method(
|
815
842
|
"is_sorted_flag",
|
816
843
|
method!(RbSeries::is_sorted_ascending_flag, 0),
|
@@ -1081,5 +1108,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1081
1108
|
let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
|
1082
1109
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1083
1110
|
|
1111
|
+
// arrow array stream
|
1112
|
+
let class = module.define_class("RbArrowArrayStream", ruby.class_object())?;
|
1113
|
+
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1114
|
+
|
1084
1115
|
Ok(())
|
1085
1116
|
}
|
@@ -7,13 +7,16 @@ use super::*;
|
|
7
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
8
8
|
|
9
9
|
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
-
df.get_columns()
|
10
|
+
df.get_columns()
|
11
|
+
.iter()
|
12
|
+
.map(|s| s.as_materialized_series().iter())
|
13
|
+
.collect()
|
11
14
|
}
|
12
15
|
|
13
16
|
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
17
|
df.get_columns()
|
15
18
|
.iter()
|
16
|
-
.map(|s| s.iter().skip(skip))
|
19
|
+
.map(|s| s.as_materialized_series().iter().skip(skip))
|
17
20
|
.collect()
|
18
21
|
}
|
19
22
|
|
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
|
|
113
116
|
true,
|
114
117
|
));
|
115
118
|
} else if out.is_kind_of(class::array()) {
|
116
|
-
return Err(RbPolarsErr::
|
119
|
+
return Err(RbPolarsErr::Other(
|
117
120
|
"A list output type is invalid. Do you mean to create polars List Series?\
|
118
121
|
Then return a Series object."
|
119
122
|
.into(),
|
120
|
-
)
|
123
|
+
)
|
124
|
+
.into());
|
121
125
|
} else {
|
122
|
-
return Err(RbPolarsErr::
|
126
|
+
return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
|
123
127
|
}
|
124
128
|
}
|
125
|
-
Err(RbPolarsErr::
|
129
|
+
Err(RbPolarsErr::Other("Could not determine output type".into()).into())
|
126
130
|
}
|
127
131
|
|
128
132
|
fn apply_iter<T>(
|
@@ -158,10 +162,16 @@ where
|
|
158
162
|
{
|
159
163
|
let skip = usize::from(first_value.is_some());
|
160
164
|
if init_null_count == df.height() {
|
161
|
-
ChunkedArray::full_null("
|
165
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
162
166
|
} else {
|
163
167
|
let iter = apply_iter(df, lambda, init_null_count, skip);
|
164
|
-
iterator_to_primitive(
|
168
|
+
iterator_to_primitive(
|
169
|
+
iter,
|
170
|
+
init_null_count,
|
171
|
+
first_value,
|
172
|
+
PlSmallStr::from_static("map"),
|
173
|
+
df.height(),
|
174
|
+
)
|
165
175
|
}
|
166
176
|
}
|
167
177
|
|
@@ -174,10 +184,16 @@ pub fn apply_lambda_with_bool_out_type(
|
|
174
184
|
) -> ChunkedArray<BooleanType> {
|
175
185
|
let skip = usize::from(first_value.is_some());
|
176
186
|
if init_null_count == df.height() {
|
177
|
-
ChunkedArray::full_null("
|
187
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
178
188
|
} else {
|
179
189
|
let iter = apply_iter(df, lambda, init_null_count, skip);
|
180
|
-
iterator_to_bool(
|
190
|
+
iterator_to_bool(
|
191
|
+
iter,
|
192
|
+
init_null_count,
|
193
|
+
first_value,
|
194
|
+
PlSmallStr::from_static("map"),
|
195
|
+
df.height(),
|
196
|
+
)
|
181
197
|
}
|
182
198
|
}
|
183
199
|
|
@@ -190,10 +206,16 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
190
206
|
) -> StringChunked {
|
191
207
|
let skip = usize::from(first_value.is_some());
|
192
208
|
if init_null_count == df.height() {
|
193
|
-
ChunkedArray::full_null("
|
209
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
194
210
|
} else {
|
195
211
|
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
196
|
-
iterator_to_utf8(
|
212
|
+
iterator_to_utf8(
|
213
|
+
iter,
|
214
|
+
init_null_count,
|
215
|
+
first_value,
|
216
|
+
PlSmallStr::from_static("map"),
|
217
|
+
df.height(),
|
218
|
+
)
|
197
219
|
}
|
198
220
|
}
|
199
221
|
|
@@ -207,7 +229,10 @@ pub fn apply_lambda_with_list_out_type(
|
|
207
229
|
) -> RbResult<ListChunked> {
|
208
230
|
let skip = usize::from(first_value.is_some());
|
209
231
|
if init_null_count == df.height() {
|
210
|
-
Ok(ChunkedArray::full_null(
|
232
|
+
Ok(ChunkedArray::full_null(
|
233
|
+
PlSmallStr::from_static("map"),
|
234
|
+
df.height(),
|
235
|
+
))
|
211
236
|
} else {
|
212
237
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
213
238
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
@@ -229,7 +254,14 @@ pub fn apply_lambda_with_list_out_type(
|
|
229
254
|
Err(e) => panic!("ruby function failed {}", e),
|
230
255
|
}
|
231
256
|
});
|
232
|
-
iterator_to_list(
|
257
|
+
iterator_to_list(
|
258
|
+
dt,
|
259
|
+
iter,
|
260
|
+
init_null_count,
|
261
|
+
first_value,
|
262
|
+
PlSmallStr::from_static("map"),
|
263
|
+
df.height(),
|
264
|
+
)
|
233
265
|
}
|
234
266
|
}
|
235
267
|
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -1,10 +1,70 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::rb_modules::*;
|
5
|
+
use crate::{RbExpr, RbSeries, Wrap};
|
5
6
|
|
6
|
-
|
7
|
-
|
7
|
+
fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
|
8
|
+
let rb_rbseries = match v.funcall("_s", ()) {
|
9
|
+
Ok(s) => s,
|
10
|
+
// the lambda did not return a series, we try to create a new Ruby Series
|
11
|
+
_ => {
|
12
|
+
let data = RArray::new();
|
13
|
+
data.push(v).unwrap();
|
14
|
+
let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
|
15
|
+
|
16
|
+
match res {
|
17
|
+
Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
|
18
|
+
Err(_) => {
|
19
|
+
polars_bail!(ComputeError:
|
20
|
+
"expected a something that could convert to a `Series` but got: {}",
|
21
|
+
unsafe { v.classname() }
|
22
|
+
)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
};
|
27
|
+
// Finally get the actual Series
|
28
|
+
Ok(rb_rbseries.series.borrow().clone())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
|
32
|
+
// create a RbSeries struct/object for Ruby
|
33
|
+
let rbseries_a = RbSeries::new(a);
|
34
|
+
let rbseries_b = RbSeries::new(b);
|
35
|
+
|
36
|
+
// Wrap this RbSeries object in the Ruby side Series wrapper
|
37
|
+
let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
|
38
|
+
let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
|
39
|
+
|
40
|
+
// call the lambda and get a Ruby side Series wrapper
|
41
|
+
let result_series_wrapper: Value =
|
42
|
+
match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
|
43
|
+
Ok(rbobj) => rbobj,
|
44
|
+
Err(e) => polars_bail!(
|
45
|
+
ComputeError: "custom Ruby function failed: {}", e,
|
46
|
+
),
|
47
|
+
};
|
48
|
+
let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
|
49
|
+
{
|
50
|
+
let expr = rbexpr.inner.clone();
|
51
|
+
let df = DataFrame::empty();
|
52
|
+
let out = df
|
53
|
+
.lazy()
|
54
|
+
.select([expr])
|
55
|
+
.with_predicate_pushdown(false)
|
56
|
+
.with_projection_pushdown(false)
|
57
|
+
.collect()?;
|
58
|
+
|
59
|
+
let s = out.select_at_idx(0).unwrap().clone();
|
60
|
+
RbSeries::new(s.take_materialized_series())
|
61
|
+
} else {
|
62
|
+
return Some(to_series(result_series_wrapper, "")).transpose();
|
63
|
+
};
|
64
|
+
|
65
|
+
// Finally get the actual Series
|
66
|
+
let binding = rbseries.series.borrow();
|
67
|
+
Ok(Some(binding.clone()))
|
8
68
|
}
|
9
69
|
|
10
70
|
pub fn map_single(
|
@@ -13,6 +73,7 @@ pub fn map_single(
|
|
13
73
|
_output_type: Option<Wrap<DataType>>,
|
14
74
|
_agg_list: bool,
|
15
75
|
_is_elementwise: bool,
|
76
|
+
_returns_scalar: bool,
|
16
77
|
) -> RbExpr {
|
17
78
|
todo!();
|
18
79
|
}
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -28,14 +28,14 @@ fn iterator_to_struct(
|
|
28
28
|
it: impl Iterator<Item = Option<Value>>,
|
29
29
|
init_null_count: usize,
|
30
30
|
first_value: AnyValue,
|
31
|
-
name:
|
31
|
+
name: PlSmallStr,
|
32
32
|
capacity: usize,
|
33
33
|
) -> RbResult<RbSeries> {
|
34
34
|
let (vals, flds) = match &first_value {
|
35
35
|
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
36
36
|
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
37
37
|
_ => {
|
38
|
-
return Err(crate::
|
38
|
+
return Err(crate::exceptions::ComputeError::new_err(format!(
|
39
39
|
"expected struct got {first_value:?}",
|
40
40
|
)))
|
41
41
|
}
|
@@ -70,7 +70,7 @@ fn iterator_to_struct(
|
|
70
70
|
Some(dict) => {
|
71
71
|
let dict = RHash::try_convert(dict)?;
|
72
72
|
if dict.len() != struct_width {
|
73
|
-
return Err(crate::
|
73
|
+
return Err(crate::exceptions::ComputeError::new_err(
|
74
74
|
format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
|
75
75
|
));
|
76
76
|
}
|
@@ -89,21 +89,23 @@ fn iterator_to_struct(
|
|
89
89
|
items
|
90
90
|
.par_iter()
|
91
91
|
.zip(flds)
|
92
|
-
.map(|(av, fld)| Series::new(fld.name(), av))
|
92
|
+
.map(|(av, fld)| Series::new(fld.name().clone(), av))
|
93
93
|
.collect::<Vec<_>>()
|
94
94
|
});
|
95
95
|
|
96
|
-
Ok(
|
97
|
-
.
|
98
|
-
|
99
|
-
|
96
|
+
Ok(
|
97
|
+
StructChunked::from_series(name, fields[0].len(), fields.iter())
|
98
|
+
.unwrap()
|
99
|
+
.into_series()
|
100
|
+
.into(),
|
101
|
+
)
|
100
102
|
}
|
101
103
|
|
102
104
|
fn iterator_to_primitive<T>(
|
103
105
|
it: impl Iterator<Item = Option<T::Native>>,
|
104
106
|
init_null_count: usize,
|
105
107
|
first_value: Option<T::Native>,
|
106
|
-
name:
|
108
|
+
name: PlSmallStr,
|
107
109
|
capacity: usize,
|
108
110
|
) -> ChunkedArray<T>
|
109
111
|
where
|
@@ -136,7 +138,7 @@ fn iterator_to_bool(
|
|
136
138
|
it: impl Iterator<Item = Option<bool>>,
|
137
139
|
init_null_count: usize,
|
138
140
|
first_value: Option<bool>,
|
139
|
-
name:
|
141
|
+
name: PlSmallStr,
|
140
142
|
capacity: usize,
|
141
143
|
) -> ChunkedArray<BooleanType> {
|
142
144
|
// safety: we know the iterators len
|
@@ -166,7 +168,7 @@ fn iterator_to_object(
|
|
166
168
|
it: impl Iterator<Item = Option<ObjectValue>>,
|
167
169
|
init_null_count: usize,
|
168
170
|
first_value: Option<ObjectValue>,
|
169
|
-
name:
|
171
|
+
name: PlSmallStr,
|
170
172
|
capacity: usize,
|
171
173
|
) -> ObjectChunked<ObjectValue> {
|
172
174
|
// safety: we know the iterators len
|
@@ -196,7 +198,7 @@ fn iterator_to_utf8(
|
|
196
198
|
it: impl Iterator<Item = Option<String>>,
|
197
199
|
init_null_count: usize,
|
198
200
|
first_value: Option<&str>,
|
199
|
-
name:
|
201
|
+
name: PlSmallStr,
|
200
202
|
capacity: usize,
|
201
203
|
) -> StringChunked {
|
202
204
|
let first_value = first_value.map(|v| v.to_string());
|
@@ -229,11 +231,10 @@ fn iterator_to_list(
|
|
229
231
|
it: impl Iterator<Item = Option<Series>>,
|
230
232
|
init_null_count: usize,
|
231
233
|
first_value: Option<&Series>,
|
232
|
-
name:
|
234
|
+
name: PlSmallStr,
|
233
235
|
capacity: usize,
|
234
236
|
) -> RbResult<ListChunked> {
|
235
|
-
let mut builder =
|
236
|
-
get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
|
237
|
+
let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
|
237
238
|
for _ in 0..init_null_count {
|
238
239
|
builder.append_null()
|
239
240
|
}
|
@@ -246,7 +247,7 @@ fn iterator_to_list(
|
|
246
247
|
Some(s) => {
|
247
248
|
if s.len() == 0 && s.dtype() != dt {
|
248
249
|
builder
|
249
|
-
.append_series(&Series::full_null(
|
250
|
+
.append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
|
250
251
|
.unwrap()
|
251
252
|
} else {
|
252
253
|
builder.append_series(&s).map_err(RbPolarsErr::from)?
|