polars-df 0.23.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +127 -1
- data/Cargo.lock +72 -58
- data/README.md +31 -27
- data/ext/polars/Cargo.toml +15 -6
- data/ext/polars/src/batched_csv.rs +35 -39
- data/ext/polars/src/c_api/allocator.rs +7 -0
- data/ext/polars/src/c_api/mod.rs +1 -0
- data/ext/polars/src/catalog/unity.rs +123 -101
- data/ext/polars/src/conversion/any_value.rs +13 -17
- data/ext/polars/src/conversion/chunked_array.rs +5 -5
- data/ext/polars/src/conversion/datetime.rs +3 -2
- data/ext/polars/src/conversion/mod.rs +50 -45
- data/ext/polars/src/dataframe/export.rs +13 -13
- data/ext/polars/src/dataframe/general.rs +223 -223
- data/ext/polars/src/dataframe/io.rs +27 -141
- data/ext/polars/src/dataframe/mod.rs +13 -5
- data/ext/polars/src/dataframe/serde.rs +1 -1
- data/ext/polars/src/error.rs +44 -7
- data/ext/polars/src/exceptions.rs +45 -12
- data/ext/polars/src/expr/array.rs +12 -0
- data/ext/polars/src/expr/datatype.rs +2 -2
- data/ext/polars/src/expr/datetime.rs +4 -5
- data/ext/polars/src/expr/general.rs +49 -13
- data/ext/polars/src/expr/list.rs +4 -0
- data/ext/polars/src/expr/meta.rs +8 -3
- data/ext/polars/src/expr/mod.rs +22 -6
- data/ext/polars/src/expr/name.rs +19 -8
- data/ext/polars/src/expr/rolling.rs +50 -1
- data/ext/polars/src/expr/string.rs +0 -1
- data/ext/polars/src/expr/struct.rs +7 -2
- data/ext/polars/src/file.rs +136 -103
- data/ext/polars/src/functions/aggregation.rs +9 -8
- data/ext/polars/src/functions/io.rs +81 -10
- data/ext/polars/src/functions/lazy.rs +95 -21
- data/ext/polars/src/functions/mod.rs +2 -0
- data/ext/polars/src/functions/range.rs +19 -3
- data/ext/polars/src/functions/strings.rs +6 -0
- data/ext/polars/src/functions/utils.rs +6 -0
- data/ext/polars/src/interop/arrow/mod.rs +50 -1
- data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
- data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
- data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
- data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
- data/ext/polars/src/lazyframe/exitable.rs +39 -0
- data/ext/polars/src/lazyframe/general.rs +340 -236
- data/ext/polars/src/lazyframe/mod.rs +46 -10
- data/ext/polars/src/lazyframe/optflags.rs +5 -4
- data/ext/polars/src/lazyframe/serde.rs +11 -3
- data/ext/polars/src/lazyframe/sink.rs +10 -5
- data/ext/polars/src/lazygroupby.rs +6 -7
- data/ext/polars/src/lib.rs +141 -76
- data/ext/polars/src/map/dataframe.rs +12 -12
- data/ext/polars/src/map/lazy.rs +7 -5
- data/ext/polars/src/map/mod.rs +15 -8
- data/ext/polars/src/map/series.rs +3 -3
- data/ext/polars/src/on_startup.rs +16 -8
- data/ext/polars/src/prelude.rs +1 -0
- data/ext/polars/src/rb_modules.rs +19 -49
- data/ext/polars/src/series/aggregation.rs +79 -140
- data/ext/polars/src/series/arithmetic.rs +16 -22
- data/ext/polars/src/series/comparison.rs +101 -222
- data/ext/polars/src/series/construction.rs +17 -18
- data/ext/polars/src/series/export.rs +1 -1
- data/ext/polars/src/series/general.rs +254 -289
- data/ext/polars/src/series/import.rs +17 -0
- data/ext/polars/src/series/map.rs +178 -160
- data/ext/polars/src/series/mod.rs +28 -12
- data/ext/polars/src/series/scatter.rs +12 -9
- data/ext/polars/src/sql.rs +16 -9
- data/ext/polars/src/testing/frame.rs +31 -0
- data/ext/polars/src/testing/mod.rs +5 -0
- data/ext/polars/src/testing/series.rs +31 -0
- data/ext/polars/src/timeout.rs +105 -0
- data/ext/polars/src/utils.rs +159 -1
- data/lib/polars/array_expr.rb +81 -12
- data/lib/polars/array_name_space.rb +74 -7
- data/lib/polars/batched_csv_reader.rb +21 -21
- data/lib/polars/binary_name_space.rb +1 -1
- data/lib/polars/cat_expr.rb +7 -7
- data/lib/polars/config.rb +1 -1
- data/lib/polars/convert.rb +189 -34
- data/lib/polars/data_frame.rb +1066 -831
- data/lib/polars/data_frame_plot.rb +173 -0
- data/lib/polars/data_type_group.rb +1 -0
- data/lib/polars/data_types.rb +31 -12
- data/lib/polars/date_time_expr.rb +51 -69
- data/lib/polars/date_time_name_space.rb +80 -112
- data/lib/polars/dynamic_group_by.rb +7 -7
- data/lib/polars/exceptions.rb +50 -10
- data/lib/polars/expr.rb +470 -517
- data/lib/polars/functions/aggregation/horizontal.rb +0 -1
- data/lib/polars/functions/aggregation/vertical.rb +2 -3
- data/lib/polars/functions/as_datatype.rb +290 -8
- data/lib/polars/functions/eager.rb +204 -10
- data/lib/polars/functions/escape_regex.rb +21 -0
- data/lib/polars/functions/lazy.rb +409 -169
- data/lib/polars/functions/lit.rb +17 -1
- data/lib/polars/functions/range/int_range.rb +74 -2
- data/lib/polars/functions/range/linear_space.rb +77 -0
- data/lib/polars/functions/range/time_range.rb +1 -1
- data/lib/polars/functions/repeat.rb +3 -12
- data/lib/polars/functions/whenthen.rb +2 -2
- data/lib/polars/group_by.rb +72 -20
- data/lib/polars/iceberg_dataset.rb +1 -6
- data/lib/polars/in_process_query.rb +37 -0
- data/lib/polars/io/cloud.rb +18 -0
- data/lib/polars/io/csv.rb +265 -126
- data/lib/polars/io/database.rb +0 -1
- data/lib/polars/io/delta.rb +15 -7
- data/lib/polars/io/ipc.rb +24 -17
- data/lib/polars/io/ndjson.rb +161 -24
- data/lib/polars/io/parquet.rb +101 -38
- data/lib/polars/lazy_frame.rb +849 -558
- data/lib/polars/lazy_group_by.rb +327 -2
- data/lib/polars/list_expr.rb +94 -16
- data/lib/polars/list_name_space.rb +88 -24
- data/lib/polars/meta_expr.rb +42 -1
- data/lib/polars/name_expr.rb +41 -4
- data/lib/polars/query_opt_flags.rb +198 -2
- data/lib/polars/rolling_group_by.rb +3 -3
- data/lib/polars/schema.rb +21 -3
- data/lib/polars/selector.rb +37 -2
- data/lib/polars/selectors.rb +45 -9
- data/lib/polars/series.rb +1156 -728
- data/lib/polars/series_plot.rb +72 -0
- data/lib/polars/slice.rb +1 -1
- data/lib/polars/sql_context.rb +11 -4
- data/lib/polars/string_expr.rb +59 -68
- data/lib/polars/string_name_space.rb +51 -87
- data/lib/polars/struct_expr.rb +36 -18
- data/lib/polars/testing.rb +24 -273
- data/lib/polars/utils/constants.rb +2 -0
- data/lib/polars/utils/construction/data_frame.rb +410 -0
- data/lib/polars/utils/construction/series.rb +364 -0
- data/lib/polars/utils/construction/utils.rb +9 -0
- data/lib/polars/utils/deprecation.rb +11 -0
- data/lib/polars/utils/serde.rb +8 -3
- data/lib/polars/utils/unstable.rb +19 -0
- data/lib/polars/utils/various.rb +59 -0
- data/lib/polars/utils.rb +46 -47
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +47 -1
- metadata +25 -6
- data/ext/polars/src/allocator.rs +0 -13
- data/lib/polars/plot.rb +0 -109
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
//! A global process-aborting timeout system, mainly intended for testing.
|
|
2
|
+
|
|
3
|
+
use std::cmp::Reverse;
|
|
4
|
+
use std::collections::BinaryHeap;
|
|
5
|
+
use std::sync::LazyLock;
|
|
6
|
+
use std::sync::mpsc::{Receiver, RecvTimeoutError, Sender, channel};
|
|
7
|
+
use std::time::Duration;
|
|
8
|
+
|
|
9
|
+
use polars::prelude::{InitHashMaps, PlHashSet};
|
|
10
|
+
use polars_utils::priority::Priority;
|
|
11
|
+
use polars_utils::relaxed_cell::RelaxedCell;
|
|
12
|
+
|
|
13
|
+
static TIMEOUT_REQUEST_HANDLER: LazyLock<Sender<TimeoutRequest>> = LazyLock::new(|| {
|
|
14
|
+
let (send, recv) = channel();
|
|
15
|
+
std::thread::Builder::new()
|
|
16
|
+
.name("polars-timeout".to_string())
|
|
17
|
+
.spawn(move || timeout_thread(recv))
|
|
18
|
+
.unwrap();
|
|
19
|
+
send
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
enum TimeoutRequest {
|
|
23
|
+
Start(Duration, u64),
|
|
24
|
+
Cancel(u64),
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
pub fn get_timeout() -> Option<Duration> {
|
|
28
|
+
static TIMEOUT_DISABLED: RelaxedCell<bool> = RelaxedCell::new_bool(false);
|
|
29
|
+
|
|
30
|
+
// Fast path so we don't have to keep checking environment variables. Make
|
|
31
|
+
// sure that if you want to use POLARS_TIMEOUT_MS it is set before the first
|
|
32
|
+
// polars call.
|
|
33
|
+
if TIMEOUT_DISABLED.load() {
|
|
34
|
+
return None;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
let Ok(timeout) = std::env::var("POLARS_TIMEOUT_MS") else {
|
|
38
|
+
TIMEOUT_DISABLED.store(true);
|
|
39
|
+
return None;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
match timeout.parse() {
|
|
43
|
+
Ok(ms) => Some(Duration::from_millis(ms)),
|
|
44
|
+
Err(e) => {
|
|
45
|
+
eprintln!("failed to parse POLARS_TIMEOUT_MS: {e:?}");
|
|
46
|
+
None
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
fn timeout_thread(recv: Receiver<TimeoutRequest>) {
|
|
52
|
+
let mut active_timeouts: PlHashSet<u64> = PlHashSet::new();
|
|
53
|
+
let mut shortest_timeout: BinaryHeap<Priority<Reverse<Duration>, u64>> = BinaryHeap::new();
|
|
54
|
+
loop {
|
|
55
|
+
// Remove cancelled requests.
|
|
56
|
+
while let Some(Priority(_, id)) = shortest_timeout.peek() {
|
|
57
|
+
if active_timeouts.contains(id) {
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
shortest_timeout.pop();
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
let request = if let Some(Priority(timeout, _)) = shortest_timeout.peek() {
|
|
64
|
+
match recv.recv_timeout(timeout.0) {
|
|
65
|
+
Err(RecvTimeoutError::Timeout) => {
|
|
66
|
+
eprintln!("exiting the process, POLARS_TIMEOUT_MS exceeded");
|
|
67
|
+
std::thread::sleep(Duration::from_secs_f64(1.0));
|
|
68
|
+
std::process::exit(1);
|
|
69
|
+
}
|
|
70
|
+
r => r.unwrap(),
|
|
71
|
+
}
|
|
72
|
+
} else {
|
|
73
|
+
recv.recv().unwrap()
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
match request {
|
|
77
|
+
TimeoutRequest::Start(duration, id) => {
|
|
78
|
+
shortest_timeout.push(Priority(Reverse(duration), id));
|
|
79
|
+
active_timeouts.insert(id);
|
|
80
|
+
}
|
|
81
|
+
TimeoutRequest::Cancel(id) => {
|
|
82
|
+
active_timeouts.remove(&id);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
pub fn schedule_polars_timeout() -> Option<u64> {
|
|
89
|
+
static TIMEOUT_ID: RelaxedCell<u64> = RelaxedCell::new_u64(0);
|
|
90
|
+
|
|
91
|
+
let timeout = get_timeout()?;
|
|
92
|
+
let id = TIMEOUT_ID.fetch_add(1);
|
|
93
|
+
TIMEOUT_REQUEST_HANDLER
|
|
94
|
+
.send(TimeoutRequest::Start(timeout, id))
|
|
95
|
+
.unwrap();
|
|
96
|
+
Some(id)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
pub fn cancel_polars_timeout(opt_id: Option<u64>) {
|
|
100
|
+
if let Some(id) = opt_id {
|
|
101
|
+
TIMEOUT_REQUEST_HANDLER
|
|
102
|
+
.send(TimeoutRequest::Cancel(id))
|
|
103
|
+
.unwrap();
|
|
104
|
+
}
|
|
105
|
+
}
|
data/ext/polars/src/utils.rs
CHANGED
|
@@ -1,4 +1,16 @@
|
|
|
1
|
-
use
|
|
1
|
+
use std::os::raw::c_void;
|
|
2
|
+
use std::panic::AssertUnwindSafe;
|
|
3
|
+
|
|
4
|
+
use magnus::Ruby;
|
|
5
|
+
use polars::frame::DataFrame;
|
|
6
|
+
use polars::series::IntoSeries;
|
|
7
|
+
use polars_error::PolarsResult;
|
|
8
|
+
use polars_error::signals::{KeyboardInterrupt, catch_keyboard_interrupt};
|
|
9
|
+
use rb_sys::{rb_thread_call_with_gvl, rb_thread_call_without_gvl};
|
|
10
|
+
|
|
11
|
+
use crate::exceptions::RbKeyboardInterrupt;
|
|
12
|
+
use crate::timeout::{cancel_polars_timeout, schedule_polars_timeout};
|
|
13
|
+
use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
|
|
2
14
|
|
|
3
15
|
#[macro_export]
|
|
4
16
|
macro_rules! apply_method_all_arrow_series2 {
|
|
@@ -10,14 +22,17 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
|
10
22
|
DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
|
|
11
23
|
DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
|
|
12
24
|
DataType::UInt64 => $self.u64().unwrap().$method($($args),*),
|
|
25
|
+
DataType::UInt128 => $self.u128().unwrap().$method($($args),*),
|
|
13
26
|
DataType::Int8 => $self.i8().unwrap().$method($($args),*),
|
|
14
27
|
DataType::Int16 => $self.i16().unwrap().$method($($args),*),
|
|
15
28
|
DataType::Int32 => $self.i32().unwrap().$method($($args),*),
|
|
16
29
|
DataType::Int64 => $self.i64().unwrap().$method($($args),*),
|
|
30
|
+
DataType::Int128 => $self.i128().unwrap().$method($($args),*),
|
|
17
31
|
DataType::Float32 => $self.f32().unwrap().$method($($args),*),
|
|
18
32
|
DataType::Float64 => $self.f64().unwrap().$method($($args),*),
|
|
19
33
|
DataType::Date => $self.date().unwrap().physical().$method($($args),*),
|
|
20
34
|
DataType::Datetime(_, _) => $self.datetime().unwrap().physical().$method($($args),*),
|
|
35
|
+
// TODO implement
|
|
21
36
|
// DataType::List(_) => $self.list().unwrap().$method($($args),*),
|
|
22
37
|
DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
|
|
23
38
|
dt => panic!("dtype {:?} not supported", dt)
|
|
@@ -30,3 +45,146 @@ macro_rules! apply_method_all_arrow_series2 {
|
|
|
30
45
|
pub(crate) fn to_rb_err<E: Into<RbPolarsErr>>(e: E) -> RbErr {
|
|
31
46
|
e.into().into()
|
|
32
47
|
}
|
|
48
|
+
|
|
49
|
+
pub trait EnterPolarsExt {
|
|
50
|
+
fn enter_polars<T, E, F>(self, f: F) -> RbResult<T>
|
|
51
|
+
where
|
|
52
|
+
F: FnOnce() -> Result<T, E>,
|
|
53
|
+
E: Into<RbPolarsErr>;
|
|
54
|
+
|
|
55
|
+
#[inline(always)]
|
|
56
|
+
fn enter_polars_ok<T, F>(self, f: F) -> RbResult<T>
|
|
57
|
+
where
|
|
58
|
+
Self: Sized,
|
|
59
|
+
F: FnOnce() -> T,
|
|
60
|
+
{
|
|
61
|
+
self.enter_polars(move || RbResult::Ok(f()))
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
#[inline(always)]
|
|
65
|
+
fn enter_polars_df<F>(self, f: F) -> RbResult<RbDataFrame>
|
|
66
|
+
where
|
|
67
|
+
Self: Sized,
|
|
68
|
+
F: FnOnce() -> PolarsResult<DataFrame>,
|
|
69
|
+
{
|
|
70
|
+
self.enter_polars(f).map(RbDataFrame::new)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
#[inline(always)]
|
|
74
|
+
fn enter_polars_series<T, F>(self, f: F) -> RbResult<RbSeries>
|
|
75
|
+
where
|
|
76
|
+
Self: Sized,
|
|
77
|
+
T: IntoSeries,
|
|
78
|
+
F: FnOnce() -> PolarsResult<T>,
|
|
79
|
+
{
|
|
80
|
+
self.enter_polars(f).map(|s| RbSeries::new(s.into_series()))
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
fn detach<T, F>(self, f: F) -> T
|
|
84
|
+
where
|
|
85
|
+
Self: Sized,
|
|
86
|
+
F: FnOnce() -> T,
|
|
87
|
+
{
|
|
88
|
+
if std::env::var("POLARS_GVL").is_ok() {
|
|
89
|
+
f()
|
|
90
|
+
} else {
|
|
91
|
+
let mut data = CallbackData {
|
|
92
|
+
func: Some(f),
|
|
93
|
+
result: None,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
unsafe {
|
|
97
|
+
rb_thread_call_without_gvl(
|
|
98
|
+
Some(call_without_gvl::<F, T>),
|
|
99
|
+
&mut data as *mut _ as *mut c_void,
|
|
100
|
+
None,
|
|
101
|
+
std::ptr::null_mut(),
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
data.result.unwrap()
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
impl EnterPolarsExt for &Ruby {
|
|
111
|
+
fn enter_polars<T, E, F>(self, f: F) -> RbResult<T>
|
|
112
|
+
where
|
|
113
|
+
F: FnOnce() -> Result<T, E>,
|
|
114
|
+
E: Into<RbPolarsErr>,
|
|
115
|
+
{
|
|
116
|
+
let timeout = schedule_polars_timeout();
|
|
117
|
+
let ret = self.detach(|| catch_keyboard_interrupt(AssertUnwindSafe(f)));
|
|
118
|
+
cancel_polars_timeout(timeout);
|
|
119
|
+
match ret {
|
|
120
|
+
Ok(Ok(ret)) => Ok(ret),
|
|
121
|
+
Ok(Err(err)) => Err(RbErr::from(err.into())),
|
|
122
|
+
Err(KeyboardInterrupt) => Err(RbKeyboardInterrupt::new_err("")),
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
pub trait RubyAttach {
|
|
128
|
+
fn attach<T, F>(f: F) -> T
|
|
129
|
+
where
|
|
130
|
+
F: FnOnce(&Ruby) -> T;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
unsafe extern "C" {
|
|
134
|
+
fn ruby_thread_has_gvl_p() -> std::ffi::c_int;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
impl RubyAttach for Ruby {
|
|
138
|
+
fn attach<T, F>(f: F) -> T
|
|
139
|
+
where
|
|
140
|
+
F: FnOnce(&Ruby) -> T,
|
|
141
|
+
{
|
|
142
|
+
// recheck GVL state since cached value can be incorrect
|
|
143
|
+
// https://github.com/matsadler/magnus/pull/161
|
|
144
|
+
if let Ok(rb) = Ruby::get()
|
|
145
|
+
&& unsafe { ruby_thread_has_gvl_p() } != 0
|
|
146
|
+
{
|
|
147
|
+
f(&rb)
|
|
148
|
+
} else {
|
|
149
|
+
let mut data = CallbackData {
|
|
150
|
+
func: Some(f),
|
|
151
|
+
result: None,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
unsafe {
|
|
155
|
+
rb_thread_call_with_gvl(
|
|
156
|
+
Some(call_with_gvl::<F, T>),
|
|
157
|
+
&mut data as *mut _ as *mut c_void,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
data.result.unwrap()
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
struct CallbackData<F, T> {
|
|
167
|
+
func: Option<F>,
|
|
168
|
+
result: Option<T>,
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
extern "C" fn call_without_gvl<F, T>(data: *mut c_void) -> *mut c_void
|
|
172
|
+
where
|
|
173
|
+
F: FnOnce() -> T,
|
|
174
|
+
{
|
|
175
|
+
let data = unsafe { &mut *(data as *mut CallbackData<F, T>) };
|
|
176
|
+
let func = data.func.take().unwrap();
|
|
177
|
+
data.result = Some(func());
|
|
178
|
+
std::ptr::null_mut()
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
extern "C" fn call_with_gvl<F, T>(data: *mut c_void) -> *mut c_void
|
|
182
|
+
where
|
|
183
|
+
F: FnOnce(&Ruby) -> T,
|
|
184
|
+
{
|
|
185
|
+
let rb = Ruby::get().unwrap();
|
|
186
|
+
let data = unsafe { &mut *(data as *mut CallbackData<F, T>) };
|
|
187
|
+
let func = data.func.take().unwrap();
|
|
188
|
+
data.result = Some(func(&rb));
|
|
189
|
+
std::ptr::null_mut()
|
|
190
|
+
}
|
data/lib/polars/array_expr.rb
CHANGED
|
@@ -191,7 +191,7 @@ module Polars
|
|
|
191
191
|
# @example
|
|
192
192
|
# df = Polars::DataFrame.new(
|
|
193
193
|
# {"a" => [[1, 2], [4, 3]]},
|
|
194
|
-
# schema: {"a" => Polars::Array.new(
|
|
194
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
195
195
|
# )
|
|
196
196
|
# df.select(Polars.col("a").arr.min)
|
|
197
197
|
# # =>
|
|
@@ -215,7 +215,7 @@ module Polars
|
|
|
215
215
|
# @example
|
|
216
216
|
# df = Polars::DataFrame.new(
|
|
217
217
|
# {"a" => [[1, 2], [4, 3]]},
|
|
218
|
-
# schema: {"a" => Polars::Array.new(
|
|
218
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
219
219
|
# )
|
|
220
220
|
# df.select(Polars.col("a").arr.max)
|
|
221
221
|
# # =>
|
|
@@ -239,7 +239,7 @@ module Polars
|
|
|
239
239
|
# @example
|
|
240
240
|
# df = Polars::DataFrame.new(
|
|
241
241
|
# {"a" => [[1, 2], [4, 3]]},
|
|
242
|
-
# schema: {"a" => Polars::Array.new(
|
|
242
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
243
243
|
# )
|
|
244
244
|
# df.select(Polars.col("a").arr.sum)
|
|
245
245
|
# # =>
|
|
@@ -263,7 +263,7 @@ module Polars
|
|
|
263
263
|
# @example
|
|
264
264
|
# df = Polars::DataFrame.new(
|
|
265
265
|
# {"a" => [[1, 2], [4, 3]]},
|
|
266
|
-
# schema: {"a" => Polars::Array.new(
|
|
266
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
267
267
|
# )
|
|
268
268
|
# df.select(Polars.col("a").arr.std)
|
|
269
269
|
# # =>
|
|
@@ -287,7 +287,7 @@ module Polars
|
|
|
287
287
|
# @example
|
|
288
288
|
# df = Polars::DataFrame.new(
|
|
289
289
|
# {"a" => [[1, 2], [4, 3]]},
|
|
290
|
-
# schema: {"a" => Polars::Array.new(
|
|
290
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
291
291
|
# )
|
|
292
292
|
# df.select(Polars.col("a").arr.var)
|
|
293
293
|
# # =>
|
|
@@ -335,7 +335,7 @@ module Polars
|
|
|
335
335
|
# @example
|
|
336
336
|
# df = Polars::DataFrame.new(
|
|
337
337
|
# {"a" => [[1, 2], [4, 3]]},
|
|
338
|
-
# schema: {"a" => Polars::Array.new(
|
|
338
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
339
339
|
# )
|
|
340
340
|
# df.select(Polars.col("a").arr.median)
|
|
341
341
|
# # =>
|
|
@@ -437,7 +437,7 @@ module Polars
|
|
|
437
437
|
# @example
|
|
438
438
|
# df = Polars::DataFrame.new(
|
|
439
439
|
# {
|
|
440
|
-
# "a"
|
|
440
|
+
# "a" => [
|
|
441
441
|
# [true, true],
|
|
442
442
|
# [false, true],
|
|
443
443
|
# [false, false],
|
|
@@ -472,7 +472,7 @@ module Polars
|
|
|
472
472
|
# @example
|
|
473
473
|
# df = Polars::DataFrame.new(
|
|
474
474
|
# {
|
|
475
|
-
# "a"
|
|
475
|
+
# "a" => [
|
|
476
476
|
# [true, true],
|
|
477
477
|
# [false, true],
|
|
478
478
|
# [false, false],
|
|
@@ -642,7 +642,7 @@ module Polars
|
|
|
642
642
|
# {"arr" => [[1, 2, 3], [4, 5, 6], [7, 8, 9]], "idx" => [1, -2, 4]},
|
|
643
643
|
# schema: {"arr" => Polars::Array.new(Polars::Int32, 3), "idx" => Polars::Int32}
|
|
644
644
|
# )
|
|
645
|
-
# df.with_columns(get: Polars.col("arr").arr.get("idx"))
|
|
645
|
+
# df.with_columns(get: Polars.col("arr").arr.get("idx", null_on_oob: true))
|
|
646
646
|
# # =>
|
|
647
647
|
# # shape: (3, 3)
|
|
648
648
|
# # ┌───────────────┬─────┬──────┐
|
|
@@ -654,7 +654,7 @@ module Polars
|
|
|
654
654
|
# # │ [4, 5, 6] ┆ -2 ┆ 5 │
|
|
655
655
|
# # │ [7, 8, 9] ┆ 4 ┆ null │
|
|
656
656
|
# # └───────────────┴─────┴──────┘
|
|
657
|
-
def get(index, null_on_oob:
|
|
657
|
+
def get(index, null_on_oob: false)
|
|
658
658
|
index = Utils.parse_into_expression(index)
|
|
659
659
|
Utils.wrap_expr(_rbexpr.arr_get(index, null_on_oob))
|
|
660
660
|
end
|
|
@@ -681,7 +681,7 @@ module Polars
|
|
|
681
681
|
# # │ [7, 8, 9] ┆ 7 │
|
|
682
682
|
# # └───────────────┴───────┘
|
|
683
683
|
def first
|
|
684
|
-
get(0)
|
|
684
|
+
get(0, null_on_oob: true)
|
|
685
685
|
end
|
|
686
686
|
|
|
687
687
|
# Get the last value of the sub-arrays.
|
|
@@ -706,7 +706,7 @@ module Polars
|
|
|
706
706
|
# # │ [7, 8, 9] ┆ 9 │
|
|
707
707
|
# # └───────────────┴──────┘
|
|
708
708
|
def last
|
|
709
|
-
get(-1)
|
|
709
|
+
get(-1, null_on_oob: true)
|
|
710
710
|
end
|
|
711
711
|
|
|
712
712
|
# Join all string items in a sub-array and place a separator between them.
|
|
@@ -914,5 +914,74 @@ module Polars
|
|
|
914
914
|
n = Utils.parse_into_expression(n)
|
|
915
915
|
Utils.wrap_expr(_rbexpr.arr_shift(n))
|
|
916
916
|
end
|
|
917
|
+
|
|
918
|
+
# Run any polars expression against the arrays' elements.
|
|
919
|
+
#
|
|
920
|
+
# @param expr [Expr]
|
|
921
|
+
# Expression to run. Note that you can select an element with `Polars.element`
|
|
922
|
+
# @param as_list [Boolean]
|
|
923
|
+
# Collect the resulting data as a list. This allows for expressions which
|
|
924
|
+
# output a variable amount of data.
|
|
925
|
+
#
|
|
926
|
+
# @return [Expr]
|
|
927
|
+
#
|
|
928
|
+
# @example
|
|
929
|
+
# df = Polars::DataFrame.new({"a" => [1, 8, 3], "b" => [4, 5, 2]})
|
|
930
|
+
# df.with_columns(rank: Polars.concat_arr("a", "b").arr.eval(Polars.element.rank))
|
|
931
|
+
# # =>
|
|
932
|
+
# # shape: (3, 3)
|
|
933
|
+
# # ┌─────┬─────┬───────────────┐
|
|
934
|
+
# # │ a ┆ b ┆ rank │
|
|
935
|
+
# # │ --- ┆ --- ┆ --- │
|
|
936
|
+
# # │ i64 ┆ i64 ┆ array[f64, 2] │
|
|
937
|
+
# # ╞═════╪═════╪═══════════════╡
|
|
938
|
+
# # │ 1 ┆ 4 ┆ [1.0, 2.0] │
|
|
939
|
+
# # │ 8 ┆ 5 ┆ [2.0, 1.0] │
|
|
940
|
+
# # │ 3 ┆ 2 ┆ [2.0, 1.0] │
|
|
941
|
+
# # └─────┴─────┴───────────────┘
|
|
942
|
+
def eval(expr, as_list: false)
|
|
943
|
+
Utils.wrap_expr(_rbexpr.arr_eval(expr._rbexpr, as_list))
|
|
944
|
+
end
|
|
945
|
+
|
|
946
|
+
# Run any polars aggregation expression against the arrays' elements.
|
|
947
|
+
#
|
|
948
|
+
# @param expr [Expr]
|
|
949
|
+
# Expression to run. Note that you can select an element with `Polars.element`.
|
|
950
|
+
#
|
|
951
|
+
# @return [Expr]
|
|
952
|
+
#
|
|
953
|
+
# @example
|
|
954
|
+
# df = Polars::Series.new(
|
|
955
|
+
# "a", [[1, nil], [42, 13], [nil, nil]], dtype: Polars::Array.new(Polars::Int64, 2)
|
|
956
|
+
# ).to_frame
|
|
957
|
+
# df.with_columns(null_count: Polars.col("a").arr.agg(Polars.element.null_count))
|
|
958
|
+
# # =>
|
|
959
|
+
# # shape: (3, 2)
|
|
960
|
+
# # ┌───────────────┬────────────┐
|
|
961
|
+
# # │ a ┆ null_count │
|
|
962
|
+
# # │ --- ┆ --- │
|
|
963
|
+
# # │ array[i64, 2] ┆ u32 │
|
|
964
|
+
# # ╞═══════════════╪════════════╡
|
|
965
|
+
# # │ [1, null] ┆ 1 │
|
|
966
|
+
# # │ [42, 13] ┆ 0 │
|
|
967
|
+
# # │ [null, null] ┆ 2 │
|
|
968
|
+
# # └───────────────┴────────────┘
|
|
969
|
+
#
|
|
970
|
+
# @example
|
|
971
|
+
# df.with_columns(no_nulls: Polars.col("a").arr.agg(Polars.element.drop_nulls))
|
|
972
|
+
# # =>
|
|
973
|
+
# # shape: (3, 2)
|
|
974
|
+
# # ┌───────────────┬───────────┐
|
|
975
|
+
# # │ a ┆ no_nulls │
|
|
976
|
+
# # │ --- ┆ --- │
|
|
977
|
+
# # │ array[i64, 2] ┆ list[i64] │
|
|
978
|
+
# # ╞═══════════════╪═══════════╡
|
|
979
|
+
# # │ [1, null] ┆ [1] │
|
|
980
|
+
# # │ [42, 13] ┆ [42, 13] │
|
|
981
|
+
# # │ [null, null] ┆ [] │
|
|
982
|
+
# # └───────────────┴───────────┘
|
|
983
|
+
def agg(expr)
|
|
984
|
+
Utils.wrap_expr(_rbexpr.arr_agg(expr._rbexpr))
|
|
985
|
+
end
|
|
917
986
|
end
|
|
918
987
|
end
|
|
@@ -16,7 +16,7 @@ module Polars
|
|
|
16
16
|
#
|
|
17
17
|
# @example
|
|
18
18
|
# s = Polars::Series.new(
|
|
19
|
-
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(
|
|
19
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int64, 2)
|
|
20
20
|
# )
|
|
21
21
|
# s.arr.min
|
|
22
22
|
# # =>
|
|
@@ -36,7 +36,7 @@ module Polars
|
|
|
36
36
|
#
|
|
37
37
|
# @example
|
|
38
38
|
# s = Polars::Series.new(
|
|
39
|
-
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(
|
|
39
|
+
# "a", [[1, 2], [4, 3]], dtype: Polars::Array.new(Polars::Int64, 2)
|
|
40
40
|
# )
|
|
41
41
|
# s.arr.max
|
|
42
42
|
# # =>
|
|
@@ -57,7 +57,7 @@ module Polars
|
|
|
57
57
|
# @example
|
|
58
58
|
# df = Polars::DataFrame.new(
|
|
59
59
|
# {"a" => [[1, 2], [4, 3]]},
|
|
60
|
-
# schema: {"a" => Polars::Array.new(
|
|
60
|
+
# schema: {"a" => Polars::Array.new(Polars::Int64, 2)}
|
|
61
61
|
# )
|
|
62
62
|
# df.select(Polars.col("a").arr.sum)
|
|
63
63
|
# # =>
|
|
@@ -477,6 +477,10 @@ module Polars
|
|
|
477
477
|
#
|
|
478
478
|
# @param index [Integer]
|
|
479
479
|
# Index to return per sublist
|
|
480
|
+
# @param null_on_oob [Boolean]
|
|
481
|
+
# Behavior if an index is out of bounds:
|
|
482
|
+
# true -> set as null
|
|
483
|
+
# false -> raise an error
|
|
480
484
|
#
|
|
481
485
|
# @return [Series]
|
|
482
486
|
#
|
|
@@ -484,7 +488,7 @@ module Polars
|
|
|
484
488
|
# s = Polars::Series.new(
|
|
485
489
|
# "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype: Polars::Array.new(Polars::Int32, 3)
|
|
486
490
|
# )
|
|
487
|
-
# s.arr.get(Polars::Series.new([1, -2, 4]))
|
|
491
|
+
# s.arr.get(Polars::Series.new([1, -2, 4]), null_on_oob: true)
|
|
488
492
|
# # =>
|
|
489
493
|
# # shape: (3,)
|
|
490
494
|
# # Series: 'a' [i32]
|
|
@@ -493,7 +497,7 @@ module Polars
|
|
|
493
497
|
# # 5
|
|
494
498
|
# # null
|
|
495
499
|
# # ]
|
|
496
|
-
def get(index)
|
|
500
|
+
def get(index, null_on_oob: false)
|
|
497
501
|
super
|
|
498
502
|
end
|
|
499
503
|
|
|
@@ -548,7 +552,7 @@ module Polars
|
|
|
548
552
|
# @param ignore_nulls [Boolean]
|
|
549
553
|
# Ignore null values (default).
|
|
550
554
|
#
|
|
551
|
-
# If set to `
|
|
555
|
+
# If set to `false`, null values will be propagated.
|
|
552
556
|
# If the sub-list contains any null values, the output is `nil`.
|
|
553
557
|
#
|
|
554
558
|
# @return [Series]
|
|
@@ -593,6 +597,8 @@ module Polars
|
|
|
593
597
|
#
|
|
594
598
|
# @param item [Object]
|
|
595
599
|
# Item that will be checked for membership
|
|
600
|
+
# @param nulls_equal [Boolean]
|
|
601
|
+
# If true, treat null as a distinct value. Null values will not propagate.
|
|
596
602
|
#
|
|
597
603
|
# @return [Series]
|
|
598
604
|
#
|
|
@@ -609,7 +615,7 @@ module Polars
|
|
|
609
615
|
# # true
|
|
610
616
|
# # false
|
|
611
617
|
# # ]
|
|
612
|
-
def contains(item)
|
|
618
|
+
def contains(item, nulls_equal: true)
|
|
613
619
|
super
|
|
614
620
|
end
|
|
615
621
|
|
|
@@ -700,5 +706,66 @@ module Polars
|
|
|
700
706
|
def shift(n = 1)
|
|
701
707
|
super
|
|
702
708
|
end
|
|
709
|
+
|
|
710
|
+
# Run any polars expression against the arrays' elements.
|
|
711
|
+
#
|
|
712
|
+
# @param expr [Expr]
|
|
713
|
+
# Expression to run. Note that you can select an element with `pl.element()`
|
|
714
|
+
# @param as_list [Boolean]
|
|
715
|
+
# Collect the resulting data as a list. This allows for expressions which
|
|
716
|
+
# output a variable amount of data.
|
|
717
|
+
#
|
|
718
|
+
# @return [Series]
|
|
719
|
+
#
|
|
720
|
+
# @example
|
|
721
|
+
# s = Polars::Series.new("a", [[1, 4], [8, 5], [3, 2]], dtype: Polars::Array.new(Polars::Int64, 2))
|
|
722
|
+
# s.arr.eval(Polars.element.rank)
|
|
723
|
+
# # =>
|
|
724
|
+
# # shape: (3,)
|
|
725
|
+
# # Series: 'a' [array[f64, 2]]
|
|
726
|
+
# # [
|
|
727
|
+
# # [1.0, 2.0]
|
|
728
|
+
# # [2.0, 1.0]
|
|
729
|
+
# # [2.0, 1.0]
|
|
730
|
+
# # ]
|
|
731
|
+
def eval(expr, as_list: false)
|
|
732
|
+
s = Utils.wrap_s(_s)
|
|
733
|
+
s.to_frame.select(F.col(s.name).arr.eval(expr, as_list: as_list)).to_series
|
|
734
|
+
end
|
|
735
|
+
|
|
736
|
+
# Run any polars aggregation expression against the arrays' elements.
|
|
737
|
+
#
|
|
738
|
+
# @param expr [Expr]
|
|
739
|
+
# Expression to run. Note that you can select an element with `Polars.element`.
|
|
740
|
+
#
|
|
741
|
+
# @return [Series]
|
|
742
|
+
#
|
|
743
|
+
# @example
|
|
744
|
+
# s = Polars::Series.new(
|
|
745
|
+
# "a", [[1, nil], [42, 13], [nil, nil]], dtype: Polars::Array.new(Polars::Int64, 2)
|
|
746
|
+
# )
|
|
747
|
+
# s.arr.agg(Polars.element.null_count)
|
|
748
|
+
# # =>
|
|
749
|
+
# # shape: (3,)
|
|
750
|
+
# # Series: 'a' [u32]
|
|
751
|
+
# # [
|
|
752
|
+
# # 1
|
|
753
|
+
# # 0
|
|
754
|
+
# # 2
|
|
755
|
+
# # ]
|
|
756
|
+
#
|
|
757
|
+
# @example
|
|
758
|
+
# s.arr.agg(Polars.element.drop_nulls)
|
|
759
|
+
# # =>
|
|
760
|
+
# # shape: (3,)
|
|
761
|
+
# # Series: 'a' [list[i64]]
|
|
762
|
+
# # [
|
|
763
|
+
# # [1]
|
|
764
|
+
# # [42, 13]
|
|
765
|
+
# # []
|
|
766
|
+
# # ]
|
|
767
|
+
def agg(expr)
|
|
768
|
+
super
|
|
769
|
+
end
|
|
703
770
|
end
|
|
704
771
|
end
|
|
@@ -4,18 +4,19 @@ module Polars
|
|
|
4
4
|
attr_accessor :_reader, :new_columns
|
|
5
5
|
|
|
6
6
|
def initialize(
|
|
7
|
-
|
|
7
|
+
source,
|
|
8
8
|
has_header: true,
|
|
9
9
|
columns: nil,
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
separator: ",",
|
|
11
|
+
comment_prefix: nil,
|
|
12
12
|
quote_char: '"',
|
|
13
13
|
skip_rows: 0,
|
|
14
|
-
|
|
14
|
+
skip_lines: 0,
|
|
15
|
+
schema_overrides: nil,
|
|
15
16
|
null_values: nil,
|
|
16
17
|
missing_utf8_is_empty_string: false,
|
|
17
18
|
ignore_errors: false,
|
|
18
|
-
|
|
19
|
+
try_parse_dates: false,
|
|
19
20
|
n_threads: nil,
|
|
20
21
|
infer_schema_length: 100,
|
|
21
22
|
batch_size: 50_000,
|
|
@@ -24,30 +25,28 @@ module Polars
|
|
|
24
25
|
low_memory: false,
|
|
25
26
|
rechunk: true,
|
|
26
27
|
skip_rows_after_header: 0,
|
|
27
|
-
|
|
28
|
-
|
|
28
|
+
row_index_name: nil,
|
|
29
|
+
row_index_offset: 0,
|
|
29
30
|
eol_char: "\n",
|
|
30
31
|
new_columns: nil,
|
|
31
32
|
raise_if_empty: true,
|
|
32
33
|
truncate_ragged_lines: false,
|
|
33
34
|
decimal_comma: false
|
|
34
35
|
)
|
|
35
|
-
|
|
36
|
-
path = Utils.normalize_filepath(file)
|
|
37
|
-
end
|
|
36
|
+
path = Utils.normalize_filepath(source)
|
|
38
37
|
|
|
39
38
|
dtype_list = nil
|
|
40
39
|
dtype_slice = nil
|
|
41
|
-
if !
|
|
42
|
-
if
|
|
40
|
+
if !schema_overrides.nil?
|
|
41
|
+
if schema_overrides.is_a?(Hash)
|
|
43
42
|
dtype_list = []
|
|
44
|
-
|
|
45
|
-
dtype_list << [k, Utils.
|
|
43
|
+
schema_overrides.each do |k, v|
|
|
44
|
+
dtype_list << [k, Utils.parse_into_dtype(v)]
|
|
46
45
|
end
|
|
47
|
-
elsif
|
|
48
|
-
dtype_slice =
|
|
46
|
+
elsif schema_overrides.is_a?(::Array)
|
|
47
|
+
dtype_slice = schema_overrides
|
|
49
48
|
else
|
|
50
|
-
raise
|
|
49
|
+
raise TypeError, "dtype arg should be array or hash"
|
|
51
50
|
end
|
|
52
51
|
end
|
|
53
52
|
|
|
@@ -61,8 +60,9 @@ module Polars
|
|
|
61
60
|
ignore_errors,
|
|
62
61
|
n_rows,
|
|
63
62
|
skip_rows,
|
|
63
|
+
skip_lines,
|
|
64
64
|
projection,
|
|
65
|
-
|
|
65
|
+
separator,
|
|
66
66
|
rechunk,
|
|
67
67
|
columns,
|
|
68
68
|
encoding,
|
|
@@ -71,13 +71,13 @@ module Polars
|
|
|
71
71
|
dtype_list,
|
|
72
72
|
dtype_slice,
|
|
73
73
|
low_memory,
|
|
74
|
-
|
|
74
|
+
comment_prefix,
|
|
75
75
|
quote_char,
|
|
76
76
|
processed_null_values,
|
|
77
77
|
missing_utf8_is_empty_string,
|
|
78
|
-
|
|
78
|
+
try_parse_dates,
|
|
79
79
|
skip_rows_after_header,
|
|
80
|
-
Utils.parse_row_index_args(
|
|
80
|
+
Utils.parse_row_index_args(row_index_name, row_index_offset),
|
|
81
81
|
eol_char,
|
|
82
82
|
raise_if_empty,
|
|
83
83
|
truncate_ragged_lines,
|