polars-df 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -0
- data/Cargo.lock +1368 -319
- data/LICENSE.txt +1 -0
- data/README.md +1 -2
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +10 -13
- data/ext/polars/src/conversion/any_value.rs +37 -21
- data/ext/polars/src/conversion/chunked_array.rs +3 -3
- data/ext/polars/src/conversion/mod.rs +159 -46
- data/ext/polars/src/dataframe/construction.rs +4 -7
- data/ext/polars/src/dataframe/export.rs +9 -2
- data/ext/polars/src/dataframe/general.rs +22 -16
- data/ext/polars/src/dataframe/io.rs +78 -174
- data/ext/polars/src/dataframe/mod.rs +1 -0
- data/ext/polars/src/dataframe/serde.rs +15 -0
- data/ext/polars/src/error.rs +31 -48
- data/ext/polars/src/exceptions.rs +24 -0
- data/ext/polars/src/expr/binary.rs +4 -42
- data/ext/polars/src/expr/datetime.rs +16 -7
- data/ext/polars/src/expr/general.rs +14 -23
- data/ext/polars/src/expr/list.rs +18 -11
- data/ext/polars/src/expr/name.rs +3 -2
- data/ext/polars/src/expr/rolling.rs +6 -7
- data/ext/polars/src/expr/string.rs +17 -37
- data/ext/polars/src/file.rs +59 -22
- data/ext/polars/src/functions/business.rs +15 -0
- data/ext/polars/src/functions/io.rs +6 -6
- data/ext/polars/src/functions/lazy.rs +17 -8
- data/ext/polars/src/functions/mod.rs +1 -0
- data/ext/polars/src/functions/range.rs +4 -2
- data/ext/polars/src/interop/arrow/mod.rs +1 -0
- data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
- data/ext/polars/src/interop/mod.rs +1 -0
- data/ext/polars/src/lazyframe/general.rs +877 -0
- data/ext/polars/src/lazyframe/mod.rs +3 -825
- data/ext/polars/src/lazyframe/serde.rs +31 -0
- data/ext/polars/src/lib.rs +44 -13
- data/ext/polars/src/map/dataframe.rs +46 -14
- data/ext/polars/src/map/lazy.rs +65 -4
- data/ext/polars/src/map/mod.rs +17 -16
- data/ext/polars/src/map/series.rs +106 -64
- data/ext/polars/src/on_startup.rs +2 -2
- data/ext/polars/src/series/aggregation.rs +1 -5
- data/ext/polars/src/series/arithmetic.rs +10 -10
- data/ext/polars/src/series/construction.rs +52 -25
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +643 -0
- data/ext/polars/src/series/import.rs +55 -0
- data/ext/polars/src/series/mod.rs +11 -638
- data/ext/polars/src/series/scatter.rs +2 -2
- data/ext/polars/src/utils.rs +0 -20
- data/lib/polars/batched_csv_reader.rb +0 -2
- data/lib/polars/binary_expr.rb +133 -9
- data/lib/polars/binary_name_space.rb +101 -6
- data/lib/polars/config.rb +4 -0
- data/lib/polars/data_frame.rb +285 -62
- data/lib/polars/data_type_group.rb +28 -0
- data/lib/polars/data_types.rb +2 -0
- data/lib/polars/date_time_expr.rb +244 -0
- data/lib/polars/date_time_name_space.rb +87 -0
- data/lib/polars/expr.rb +109 -8
- data/lib/polars/functions/as_datatype.rb +51 -2
- data/lib/polars/functions/col.rb +1 -1
- data/lib/polars/functions/eager.rb +1 -3
- data/lib/polars/functions/lazy.rb +88 -10
- data/lib/polars/functions/range/time_range.rb +21 -21
- data/lib/polars/io/csv.rb +14 -16
- data/lib/polars/io/database.rb +2 -2
- data/lib/polars/io/ipc.rb +14 -12
- data/lib/polars/io/ndjson.rb +10 -0
- data/lib/polars/io/parquet.rb +168 -111
- data/lib/polars/lazy_frame.rb +649 -15
- data/lib/polars/list_name_space.rb +169 -0
- data/lib/polars/selectors.rb +1144 -0
- data/lib/polars/series.rb +470 -40
- data/lib/polars/string_cache.rb +27 -1
- data/lib/polars/string_expr.rb +0 -1
- data/lib/polars/string_name_space.rb +73 -3
- data/lib/polars/struct_name_space.rb +31 -7
- data/lib/polars/utils/various.rb +5 -1
- data/lib/polars/utils.rb +45 -10
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +2 -1
- metadata +14 -4
- data/lib/polars/functions.rb +0 -57
@@ -0,0 +1,31 @@
|
|
1
|
+
use magnus::Value;
|
2
|
+
use polars::lazy::frame::LazyFrame;
|
3
|
+
use polars::prelude::*;
|
4
|
+
use std::io::Read;
|
5
|
+
|
6
|
+
use crate::file::get_file_like;
|
7
|
+
use crate::{RbLazyFrame, RbResult, RbValueError};
|
8
|
+
|
9
|
+
impl RbLazyFrame {
|
10
|
+
// TODO change to serialize_json
|
11
|
+
pub fn read_json(rb_f: Value) -> RbResult<Self> {
|
12
|
+
// it is faster to first read to memory and then parse: https://github.com/serde-rs/json/issues/160
|
13
|
+
// so don't bother with files.
|
14
|
+
let mut json = String::new();
|
15
|
+
let _ = get_file_like(rb_f, false)?
|
16
|
+
.read_to_string(&mut json)
|
17
|
+
.unwrap();
|
18
|
+
|
19
|
+
// Safety
|
20
|
+
// we skipped the serializing/deserializing of the static in lifetime in `DataType`
|
21
|
+
// so we actually don't have a lifetime at all when serializing.
|
22
|
+
|
23
|
+
// &str still has a lifetime. Bit its ok, because we drop it immediately
|
24
|
+
// in this scope
|
25
|
+
let json = unsafe { std::mem::transmute::<&'_ str, &'static str>(json.as_str()) };
|
26
|
+
|
27
|
+
let lp = serde_json::from_str::<DslPlan>(json)
|
28
|
+
.map_err(|err| RbValueError::new_err(format!("{:?}", err)))?;
|
29
|
+
Ok(LazyFrame::from(lp).into())
|
30
|
+
}
|
31
|
+
}
|
data/ext/polars/src/lib.rs
CHANGED
@@ -3,6 +3,7 @@ mod batched_csv;
|
|
3
3
|
mod conversion;
|
4
4
|
mod dataframe;
|
5
5
|
mod error;
|
6
|
+
mod exceptions;
|
6
7
|
mod expr;
|
7
8
|
mod file;
|
8
9
|
mod functions;
|
@@ -21,21 +22,31 @@ mod utils;
|
|
21
22
|
use batched_csv::RbBatchedCsv;
|
22
23
|
use conversion::*;
|
23
24
|
use dataframe::RbDataFrame;
|
24
|
-
use error::
|
25
|
+
use error::RbPolarsErr;
|
26
|
+
use exceptions::{RbTypeError, RbValueError};
|
25
27
|
use expr::rb_exprs_to_exprs;
|
26
28
|
use expr::RbExpr;
|
27
29
|
use functions::string_cache::RbStringCacheHolder;
|
28
30
|
use functions::whenthen::{RbChainedThen, RbChainedWhen, RbThen, RbWhen};
|
31
|
+
use interop::arrow::to_ruby::RbArrowArrayStream;
|
29
32
|
use lazyframe::RbLazyFrame;
|
30
33
|
use lazygroupby::RbLazyGroupBy;
|
31
|
-
use magnus::{define_module, function, method, prelude::*,
|
34
|
+
use magnus::{define_module, function, method, prelude::*, Ruby};
|
32
35
|
use series::RbSeries;
|
33
36
|
use sql::RbSQLContext;
|
34
37
|
|
35
|
-
|
38
|
+
use magnus::error::Result as RbResult;
|
39
|
+
use magnus::Error as RbErr;
|
40
|
+
|
41
|
+
// TODO move
|
42
|
+
fn re_escape(pattern: String) -> String {
|
43
|
+
regex::escape(&pattern)
|
44
|
+
}
|
36
45
|
|
37
46
|
#[magnus::init]
|
38
47
|
fn init(ruby: &Ruby) -> RbResult<()> {
|
48
|
+
crate::on_startup::register_startup_deps();
|
49
|
+
|
39
50
|
let module = define_module("Polars")?;
|
40
51
|
|
41
52
|
let class = module.define_class("RbBatchedCsv", ruby.class_object())?;
|
@@ -45,7 +56,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
45
56
|
let class = module.define_class("RbDataFrame", ruby.class_object())?;
|
46
57
|
class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
|
47
58
|
class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
|
48
|
-
class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
|
49
59
|
class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
|
50
60
|
class.define_singleton_method(
|
51
61
|
"read_ipc_stream",
|
@@ -58,17 +68,21 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
58
68
|
class.define_singleton_method("read_ndjson", function!(RbDataFrame::read_ndjson, 4))?;
|
59
69
|
class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
|
60
70
|
class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
|
61
|
-
class.define_method("write_avro", method!(RbDataFrame::write_avro,
|
71
|
+
class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
|
62
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
63
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
64
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
65
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 3))?;
|
66
76
|
class.define_method(
|
67
77
|
"write_ipc_stream",
|
68
|
-
method!(RbDataFrame::write_ipc_stream,
|
78
|
+
method!(RbDataFrame::write_ipc_stream, 3),
|
69
79
|
)?;
|
70
80
|
class.define_method("row_tuple", method!(RbDataFrame::row_tuple, 1))?;
|
71
81
|
class.define_method("row_tuples", method!(RbDataFrame::row_tuples, 0))?;
|
82
|
+
class.define_method(
|
83
|
+
"arrow_c_stream",
|
84
|
+
method!(RbDataFrame::__arrow_c_stream__, 0),
|
85
|
+
)?;
|
72
86
|
class.define_method("to_numo", method!(RbDataFrame::to_numo, 0))?;
|
73
87
|
class.define_method("write_parquet", method!(RbDataFrame::write_parquet, 6))?;
|
74
88
|
class.define_method("add", method!(RbDataFrame::add, 1))?;
|
@@ -143,6 +157,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
157
|
class.define_method("to_struct", method!(RbDataFrame::to_struct, 1))?;
|
144
158
|
class.define_method("unnest", method!(RbDataFrame::unnest, 1))?;
|
145
159
|
class.define_method("clear", method!(RbDataFrame::clear, 0))?;
|
160
|
+
class.define_method("serialize_json", method!(RbDataFrame::serialize_json, 1))?;
|
146
161
|
|
147
162
|
let class = module.define_class("RbExpr", ruby.class_object())?;
|
148
163
|
class.define_method("+", method!(RbExpr::add, 1))?;
|
@@ -286,6 +301,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
286
301
|
class.define_method("str_slice", method!(RbExpr::str_slice, 2))?;
|
287
302
|
class.define_method("str_to_uppercase", method!(RbExpr::str_to_uppercase, 0))?;
|
288
303
|
class.define_method("str_to_lowercase", method!(RbExpr::str_to_lowercase, 0))?;
|
304
|
+
// class.define_method("str_to_titlecase", method!(RbExpr::str_to_titlecase, 0))?;
|
289
305
|
class.define_method("str_len_bytes", method!(RbExpr::str_len_bytes, 0))?;
|
290
306
|
class.define_method("str_len_chars", method!(RbExpr::str_len_chars, 0))?;
|
291
307
|
class.define_method("str_replace_n", method!(RbExpr::str_replace_n, 4))?;
|
@@ -412,7 +428,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
412
428
|
class.define_method("dt_dst_offset", method!(RbExpr::dt_dst_offset, 0))?;
|
413
429
|
class.define_method("dt_round", method!(RbExpr::dt_round, 1))?;
|
414
430
|
class.define_method("dt_combine", method!(RbExpr::dt_combine, 2))?;
|
415
|
-
class.define_method("map_batches", method!(RbExpr::map_batches,
|
431
|
+
class.define_method("map_batches", method!(RbExpr::map_batches, 5))?;
|
416
432
|
class.define_method("dot", method!(RbExpr::dot, 1))?;
|
417
433
|
class.define_method("reinterpret", method!(RbExpr::reinterpret, 1))?;
|
418
434
|
class.define_method("mode", method!(RbExpr::mode, 0))?;
|
@@ -567,6 +583,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
567
583
|
class.define_singleton_method("when", function!(functions::whenthen::when, 1))?;
|
568
584
|
class.define_singleton_method("concat_str", function!(functions::lazy::concat_str, 3))?;
|
569
585
|
class.define_singleton_method("concat_list", function!(functions::lazy::concat_list, 1))?;
|
586
|
+
class.define_singleton_method(
|
587
|
+
"business_day_count",
|
588
|
+
function!(functions::business::business_day_count, 4),
|
589
|
+
)?;
|
570
590
|
class.define_singleton_method(
|
571
591
|
"all_horizontal",
|
572
592
|
function!(functions::aggregation::all_horizontal, 1),
|
@@ -699,17 +719,18 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
699
719
|
"set_random_seed",
|
700
720
|
function!(functions::random::set_random_seed, 1),
|
701
721
|
)?;
|
722
|
+
class.define_singleton_method("re_escape", function!(re_escape, 1))?;
|
702
723
|
|
703
724
|
let class = module.define_class("RbLazyFrame", ruby.class_object())?;
|
704
725
|
class.define_singleton_method("read_json", function!(RbLazyFrame::read_json, 1))?;
|
705
726
|
class.define_singleton_method(
|
706
727
|
"new_from_ndjson",
|
707
|
-
function!(RbLazyFrame::new_from_ndjson,
|
728
|
+
function!(RbLazyFrame::new_from_ndjson, 8),
|
708
729
|
)?;
|
709
730
|
class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
|
710
731
|
class.define_singleton_method(
|
711
732
|
"new_from_parquet",
|
712
|
-
function!(RbLazyFrame::new_from_parquet,
|
733
|
+
function!(RbLazyFrame::new_from_parquet, -1),
|
713
734
|
)?;
|
714
735
|
class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 10))?;
|
715
736
|
class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
|
@@ -741,15 +762,15 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
741
762
|
method!(RbLazyFrame::group_by_dynamic, 9),
|
742
763
|
)?;
|
743
764
|
class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
|
744
|
-
class.define_method("join_asof", method!(RbLazyFrame::join_asof,
|
745
|
-
class.define_method("join", method!(RbLazyFrame::join,
|
765
|
+
class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
|
766
|
+
class.define_method("join", method!(RbLazyFrame::join, 10))?;
|
746
767
|
class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
|
747
768
|
class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
|
748
769
|
class.define_method(
|
749
770
|
"with_columns_seq",
|
750
771
|
method!(RbLazyFrame::with_columns_seq, 1),
|
751
772
|
)?;
|
752
|
-
class.define_method("rename", method!(RbLazyFrame::rename,
|
773
|
+
class.define_method("rename", method!(RbLazyFrame::rename, 3))?;
|
753
774
|
class.define_method("reverse", method!(RbLazyFrame::reverse, 0))?;
|
754
775
|
class.define_method("shift", method!(RbLazyFrame::shift, 2))?;
|
755
776
|
class.define_method("fill_nan", method!(RbLazyFrame::fill_nan, 1))?;
|
@@ -770,6 +791,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
770
791
|
class.define_method("unpivot", method!(RbLazyFrame::unpivot, 4))?;
|
771
792
|
class.define_method("with_row_index", method!(RbLazyFrame::with_row_index, 2))?;
|
772
793
|
class.define_method("drop", method!(RbLazyFrame::drop, 1))?;
|
794
|
+
class.define_method("cast", method!(RbLazyFrame::cast, 2))?;
|
773
795
|
class.define_method("cast_all", method!(RbLazyFrame::cast_all, 2))?;
|
774
796
|
class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
|
775
797
|
class.define_method("collect_schema", method!(RbLazyFrame::collect_schema, 0))?;
|
@@ -810,7 +832,12 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
810
832
|
class.define_singleton_method("new_array", function!(RbSeries::new_array, 5))?;
|
811
833
|
class.define_singleton_method("new_decimal", function!(RbSeries::new_decimal, 3))?;
|
812
834
|
class.define_singleton_method("repeat", function!(RbSeries::repeat, 4))?;
|
835
|
+
class.define_singleton_method(
|
836
|
+
"from_arrow_c_stream",
|
837
|
+
function!(RbSeries::from_arrow_c_stream, 1),
|
838
|
+
)?;
|
813
839
|
class.define_method("struct_unnest", method!(RbSeries::struct_unnest, 0))?;
|
840
|
+
class.define_method("struct_fields", method!(RbSeries::struct_fields, 0))?;
|
814
841
|
class.define_method(
|
815
842
|
"is_sorted_flag",
|
816
843
|
method!(RbSeries::is_sorted_ascending_flag, 0),
|
@@ -1081,5 +1108,9 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1081
1108
|
let class = module.define_class("RbStringCacheHolder", ruby.class_object())?;
|
1082
1109
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1083
1110
|
|
1111
|
+
// arrow array stream
|
1112
|
+
let class = module.define_class("RbArrowArrayStream", ruby.class_object())?;
|
1113
|
+
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1114
|
+
|
1084
1115
|
Ok(())
|
1085
1116
|
}
|
@@ -7,13 +7,16 @@ use super::*;
|
|
7
7
|
use crate::{RbDataFrame, RbPolarsErr, RbSeries, Wrap};
|
8
8
|
|
9
9
|
fn get_iters(df: &DataFrame) -> Vec<SeriesIter> {
|
10
|
-
df.get_columns()
|
10
|
+
df.get_columns()
|
11
|
+
.iter()
|
12
|
+
.map(|s| s.as_materialized_series().iter())
|
13
|
+
.collect()
|
11
14
|
}
|
12
15
|
|
13
16
|
fn get_iters_skip(df: &DataFrame, skip: usize) -> Vec<std::iter::Skip<SeriesIter>> {
|
14
17
|
df.get_columns()
|
15
18
|
.iter()
|
16
|
-
.map(|s| s.iter().skip(skip))
|
19
|
+
.map(|s| s.as_materialized_series().iter().skip(skip))
|
17
20
|
.collect()
|
18
21
|
}
|
19
22
|
|
@@ -113,16 +116,17 @@ pub fn apply_lambda_unknown<'a>(
|
|
113
116
|
true,
|
114
117
|
));
|
115
118
|
} else if out.is_kind_of(class::array()) {
|
116
|
-
return Err(RbPolarsErr::
|
119
|
+
return Err(RbPolarsErr::Other(
|
117
120
|
"A list output type is invalid. Do you mean to create polars List Series?\
|
118
121
|
Then return a Series object."
|
119
122
|
.into(),
|
120
|
-
)
|
123
|
+
)
|
124
|
+
.into());
|
121
125
|
} else {
|
122
|
-
return Err(RbPolarsErr::
|
126
|
+
return Err(RbPolarsErr::Other("Could not determine output type".into()).into());
|
123
127
|
}
|
124
128
|
}
|
125
|
-
Err(RbPolarsErr::
|
129
|
+
Err(RbPolarsErr::Other("Could not determine output type".into()).into())
|
126
130
|
}
|
127
131
|
|
128
132
|
fn apply_iter<T>(
|
@@ -158,10 +162,16 @@ where
|
|
158
162
|
{
|
159
163
|
let skip = usize::from(first_value.is_some());
|
160
164
|
if init_null_count == df.height() {
|
161
|
-
ChunkedArray::full_null("
|
165
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
162
166
|
} else {
|
163
167
|
let iter = apply_iter(df, lambda, init_null_count, skip);
|
164
|
-
iterator_to_primitive(
|
168
|
+
iterator_to_primitive(
|
169
|
+
iter,
|
170
|
+
init_null_count,
|
171
|
+
first_value,
|
172
|
+
PlSmallStr::from_static("map"),
|
173
|
+
df.height(),
|
174
|
+
)
|
165
175
|
}
|
166
176
|
}
|
167
177
|
|
@@ -174,10 +184,16 @@ pub fn apply_lambda_with_bool_out_type(
|
|
174
184
|
) -> ChunkedArray<BooleanType> {
|
175
185
|
let skip = usize::from(first_value.is_some());
|
176
186
|
if init_null_count == df.height() {
|
177
|
-
ChunkedArray::full_null("
|
187
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
178
188
|
} else {
|
179
189
|
let iter = apply_iter(df, lambda, init_null_count, skip);
|
180
|
-
iterator_to_bool(
|
190
|
+
iterator_to_bool(
|
191
|
+
iter,
|
192
|
+
init_null_count,
|
193
|
+
first_value,
|
194
|
+
PlSmallStr::from_static("map"),
|
195
|
+
df.height(),
|
196
|
+
)
|
181
197
|
}
|
182
198
|
}
|
183
199
|
|
@@ -190,10 +206,16 @@ pub fn apply_lambda_with_utf8_out_type(
|
|
190
206
|
) -> StringChunked {
|
191
207
|
let skip = usize::from(first_value.is_some());
|
192
208
|
if init_null_count == df.height() {
|
193
|
-
ChunkedArray::full_null("
|
209
|
+
ChunkedArray::full_null(PlSmallStr::from_static("map"), df.height())
|
194
210
|
} else {
|
195
211
|
let iter = apply_iter::<String>(df, lambda, init_null_count, skip);
|
196
|
-
iterator_to_utf8(
|
212
|
+
iterator_to_utf8(
|
213
|
+
iter,
|
214
|
+
init_null_count,
|
215
|
+
first_value,
|
216
|
+
PlSmallStr::from_static("map"),
|
217
|
+
df.height(),
|
218
|
+
)
|
197
219
|
}
|
198
220
|
}
|
199
221
|
|
@@ -207,7 +229,10 @@ pub fn apply_lambda_with_list_out_type(
|
|
207
229
|
) -> RbResult<ListChunked> {
|
208
230
|
let skip = usize::from(first_value.is_some());
|
209
231
|
if init_null_count == df.height() {
|
210
|
-
Ok(ChunkedArray::full_null(
|
232
|
+
Ok(ChunkedArray::full_null(
|
233
|
+
PlSmallStr::from_static("map"),
|
234
|
+
df.height(),
|
235
|
+
))
|
211
236
|
} else {
|
212
237
|
let mut iters = get_iters_skip(df, init_null_count + skip);
|
213
238
|
let iter = ((init_null_count + skip)..df.height()).map(|_| {
|
@@ -229,7 +254,14 @@ pub fn apply_lambda_with_list_out_type(
|
|
229
254
|
Err(e) => panic!("ruby function failed {}", e),
|
230
255
|
}
|
231
256
|
});
|
232
|
-
iterator_to_list(
|
257
|
+
iterator_to_list(
|
258
|
+
dt,
|
259
|
+
iter,
|
260
|
+
init_null_count,
|
261
|
+
first_value,
|
262
|
+
PlSmallStr::from_static("map"),
|
263
|
+
df.height(),
|
264
|
+
)
|
233
265
|
}
|
234
266
|
}
|
235
267
|
|
data/ext/polars/src/map/lazy.rs
CHANGED
@@ -1,10 +1,70 @@
|
|
1
|
-
use magnus::Value;
|
1
|
+
use magnus::{prelude::*, RArray, Value};
|
2
2
|
use polars::prelude::*;
|
3
3
|
|
4
|
-
use crate::
|
4
|
+
use crate::rb_modules::*;
|
5
|
+
use crate::{RbExpr, RbSeries, Wrap};
|
5
6
|
|
6
|
-
|
7
|
-
|
7
|
+
fn to_series(v: Value, name: &str) -> PolarsResult<Series> {
|
8
|
+
let rb_rbseries = match v.funcall("_s", ()) {
|
9
|
+
Ok(s) => s,
|
10
|
+
// the lambda did not return a series, we try to create a new Ruby Series
|
11
|
+
_ => {
|
12
|
+
let data = RArray::new();
|
13
|
+
data.push(v).unwrap();
|
14
|
+
let res = series().funcall::<_, _, Value>("new", (name.to_string(), data));
|
15
|
+
|
16
|
+
match res {
|
17
|
+
Ok(ruby_s) => ruby_s.funcall::<_, _, &RbSeries>("_s", ()).unwrap(),
|
18
|
+
Err(_) => {
|
19
|
+
polars_bail!(ComputeError:
|
20
|
+
"expected a something that could convert to a `Series` but got: {}",
|
21
|
+
unsafe { v.classname() }
|
22
|
+
)
|
23
|
+
}
|
24
|
+
}
|
25
|
+
}
|
26
|
+
};
|
27
|
+
// Finally get the actual Series
|
28
|
+
Ok(rb_rbseries.series.borrow().clone())
|
29
|
+
}
|
30
|
+
|
31
|
+
pub fn binary_lambda(lambda: Value, a: Series, b: Series) -> PolarsResult<Option<Series>> {
|
32
|
+
// create a RbSeries struct/object for Ruby
|
33
|
+
let rbseries_a = RbSeries::new(a);
|
34
|
+
let rbseries_b = RbSeries::new(b);
|
35
|
+
|
36
|
+
// Wrap this RbSeries object in the Ruby side Series wrapper
|
37
|
+
let ruby_series_wrapper_a: Value = utils().funcall("wrap_s", (rbseries_a,)).unwrap();
|
38
|
+
let ruby_series_wrapper_b: Value = utils().funcall("wrap_s", (rbseries_b,)).unwrap();
|
39
|
+
|
40
|
+
// call the lambda and get a Ruby side Series wrapper
|
41
|
+
let result_series_wrapper: Value =
|
42
|
+
match lambda.funcall("call", (ruby_series_wrapper_a, ruby_series_wrapper_b)) {
|
43
|
+
Ok(rbobj) => rbobj,
|
44
|
+
Err(e) => polars_bail!(
|
45
|
+
ComputeError: "custom Ruby function failed: {}", e,
|
46
|
+
),
|
47
|
+
};
|
48
|
+
let rbseries = if let Ok(rbexpr) = result_series_wrapper.funcall::<_, _, &RbExpr>("_rbexpr", ())
|
49
|
+
{
|
50
|
+
let expr = rbexpr.inner.clone();
|
51
|
+
let df = DataFrame::empty();
|
52
|
+
let out = df
|
53
|
+
.lazy()
|
54
|
+
.select([expr])
|
55
|
+
.with_predicate_pushdown(false)
|
56
|
+
.with_projection_pushdown(false)
|
57
|
+
.collect()?;
|
58
|
+
|
59
|
+
let s = out.select_at_idx(0).unwrap().clone();
|
60
|
+
RbSeries::new(s.take_materialized_series())
|
61
|
+
} else {
|
62
|
+
return Some(to_series(result_series_wrapper, "")).transpose();
|
63
|
+
};
|
64
|
+
|
65
|
+
// Finally get the actual Series
|
66
|
+
let binding = rbseries.series.borrow();
|
67
|
+
Ok(Some(binding.clone()))
|
8
68
|
}
|
9
69
|
|
10
70
|
pub fn map_single(
|
@@ -13,6 +73,7 @@ pub fn map_single(
|
|
13
73
|
_output_type: Option<Wrap<DataType>>,
|
14
74
|
_agg_list: bool,
|
15
75
|
_is_elementwise: bool,
|
76
|
+
_returns_scalar: bool,
|
16
77
|
) -> RbExpr {
|
17
78
|
todo!();
|
18
79
|
}
|
data/ext/polars/src/map/mod.rs
CHANGED
@@ -28,14 +28,14 @@ fn iterator_to_struct(
|
|
28
28
|
it: impl Iterator<Item = Option<Value>>,
|
29
29
|
init_null_count: usize,
|
30
30
|
first_value: AnyValue,
|
31
|
-
name:
|
31
|
+
name: PlSmallStr,
|
32
32
|
capacity: usize,
|
33
33
|
) -> RbResult<RbSeries> {
|
34
34
|
let (vals, flds) = match &first_value {
|
35
35
|
av @ AnyValue::Struct(_, _, flds) => (av._iter_struct_av().collect::<Vec<_>>(), &**flds),
|
36
36
|
AnyValue::StructOwned(payload) => (payload.0.clone(), &*payload.1),
|
37
37
|
_ => {
|
38
|
-
return Err(crate::
|
38
|
+
return Err(crate::exceptions::ComputeError::new_err(format!(
|
39
39
|
"expected struct got {first_value:?}",
|
40
40
|
)))
|
41
41
|
}
|
@@ -70,7 +70,7 @@ fn iterator_to_struct(
|
|
70
70
|
Some(dict) => {
|
71
71
|
let dict = RHash::try_convert(dict)?;
|
72
72
|
if dict.len() != struct_width {
|
73
|
-
return Err(crate::
|
73
|
+
return Err(crate::exceptions::ComputeError::new_err(
|
74
74
|
format!("Cannot create struct type.\n> The struct dtype expects {} fields, but it got a dict with {} fields.", struct_width, dict.len())
|
75
75
|
));
|
76
76
|
}
|
@@ -89,21 +89,23 @@ fn iterator_to_struct(
|
|
89
89
|
items
|
90
90
|
.par_iter()
|
91
91
|
.zip(flds)
|
92
|
-
.map(|(av, fld)| Series::new(fld.name(), av))
|
92
|
+
.map(|(av, fld)| Series::new(fld.name().clone(), av))
|
93
93
|
.collect::<Vec<_>>()
|
94
94
|
});
|
95
95
|
|
96
|
-
Ok(
|
97
|
-
.
|
98
|
-
|
99
|
-
|
96
|
+
Ok(
|
97
|
+
StructChunked::from_series(name, fields[0].len(), fields.iter())
|
98
|
+
.unwrap()
|
99
|
+
.into_series()
|
100
|
+
.into(),
|
101
|
+
)
|
100
102
|
}
|
101
103
|
|
102
104
|
fn iterator_to_primitive<T>(
|
103
105
|
it: impl Iterator<Item = Option<T::Native>>,
|
104
106
|
init_null_count: usize,
|
105
107
|
first_value: Option<T::Native>,
|
106
|
-
name:
|
108
|
+
name: PlSmallStr,
|
107
109
|
capacity: usize,
|
108
110
|
) -> ChunkedArray<T>
|
109
111
|
where
|
@@ -136,7 +138,7 @@ fn iterator_to_bool(
|
|
136
138
|
it: impl Iterator<Item = Option<bool>>,
|
137
139
|
init_null_count: usize,
|
138
140
|
first_value: Option<bool>,
|
139
|
-
name:
|
141
|
+
name: PlSmallStr,
|
140
142
|
capacity: usize,
|
141
143
|
) -> ChunkedArray<BooleanType> {
|
142
144
|
// safety: we know the iterators len
|
@@ -166,7 +168,7 @@ fn iterator_to_object(
|
|
166
168
|
it: impl Iterator<Item = Option<ObjectValue>>,
|
167
169
|
init_null_count: usize,
|
168
170
|
first_value: Option<ObjectValue>,
|
169
|
-
name:
|
171
|
+
name: PlSmallStr,
|
170
172
|
capacity: usize,
|
171
173
|
) -> ObjectChunked<ObjectValue> {
|
172
174
|
// safety: we know the iterators len
|
@@ -196,7 +198,7 @@ fn iterator_to_utf8(
|
|
196
198
|
it: impl Iterator<Item = Option<String>>,
|
197
199
|
init_null_count: usize,
|
198
200
|
first_value: Option<&str>,
|
199
|
-
name:
|
201
|
+
name: PlSmallStr,
|
200
202
|
capacity: usize,
|
201
203
|
) -> StringChunked {
|
202
204
|
let first_value = first_value.map(|v| v.to_string());
|
@@ -229,11 +231,10 @@ fn iterator_to_list(
|
|
229
231
|
it: impl Iterator<Item = Option<Series>>,
|
230
232
|
init_null_count: usize,
|
231
233
|
first_value: Option<&Series>,
|
232
|
-
name:
|
234
|
+
name: PlSmallStr,
|
233
235
|
capacity: usize,
|
234
236
|
) -> RbResult<ListChunked> {
|
235
|
-
let mut builder =
|
236
|
-
get_list_builder(dt, capacity * 5, capacity, name).map_err(RbPolarsErr::from)?;
|
237
|
+
let mut builder = get_list_builder(dt, capacity * 5, capacity, name);
|
237
238
|
for _ in 0..init_null_count {
|
238
239
|
builder.append_null()
|
239
240
|
}
|
@@ -246,7 +247,7 @@ fn iterator_to_list(
|
|
246
247
|
Some(s) => {
|
247
248
|
if s.len() == 0 && s.dtype() != dt {
|
248
249
|
builder
|
249
|
-
.append_series(&Series::full_null(
|
250
|
+
.append_series(&Series::full_null(PlSmallStr::EMPTY, 0, dt))
|
250
251
|
.unwrap()
|
251
252
|
} else {
|
252
253
|
builder.append_series(&s).map_err(RbPolarsErr::from)?
|