polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -6,135 +6,132 @@ use polars::prelude::pivot::{pivot, pivot_stable};
6
6
  use polars::prelude::*;
7
7
 
8
8
  use crate::conversion::*;
9
+ use crate::exceptions::RbIndexError;
9
10
  use crate::map::dataframe::{
10
11
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
11
12
  apply_lambda_with_utf8_out_type,
12
13
  };
13
14
  use crate::prelude::strings_to_pl_smallstr;
14
- use crate::series::{to_rbseries, to_series};
15
+ use crate::series::ToRbSeries;
16
+ use crate::series::to_series;
17
+ use crate::utils::EnterPolarsExt;
15
18
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
16
19
 
17
20
  impl RbDataFrame {
18
21
  pub fn init(columns: RArray) -> RbResult<Self> {
19
22
  let mut cols = Vec::new();
20
23
  for i in columns.into_iter() {
21
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
24
+ cols.push(<&RbSeries>::try_convert(i)?.series.read().clone().into());
22
25
  }
23
26
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
24
27
  Ok(RbDataFrame::new(df))
25
28
  }
26
29
 
27
30
  pub fn estimated_size(&self) -> usize {
28
- self.df.borrow().estimated_size()
31
+ self.df.read().estimated_size()
29
32
  }
30
33
 
31
34
  pub fn dtype_strings(&self) -> Vec<String> {
32
35
  self.df
33
- .borrow()
36
+ .read()
34
37
  .get_columns()
35
38
  .iter()
36
39
  .map(|s| format!("{}", s.dtype()))
37
40
  .collect()
38
41
  }
39
42
 
40
- pub fn add(&self, s: &RbSeries) -> RbResult<Self> {
41
- let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?;
42
- Ok(df.into())
43
+ pub fn add(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
44
+ rb.enter_polars_df(|| &*self_.df.read() + &*s.series.read())
43
45
  }
44
46
 
45
- pub fn sub(&self, s: &RbSeries) -> RbResult<Self> {
46
- let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?;
47
- Ok(df.into())
47
+ pub fn sub(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
48
+ rb.enter_polars_df(|| &*self_.df.read() - &*s.series.read())
48
49
  }
49
50
 
50
- pub fn div(&self, s: &RbSeries) -> RbResult<Self> {
51
- let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?;
52
- Ok(df.into())
51
+ pub fn div(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
52
+ rb.enter_polars_df(|| &*self_.df.read() / &*s.series.read())
53
53
  }
54
54
 
55
- pub fn mul(&self, s: &RbSeries) -> RbResult<Self> {
56
- let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?;
57
- Ok(df.into())
55
+ pub fn mul(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
56
+ rb.enter_polars_df(|| &*self_.df.read() * &*s.series.read())
58
57
  }
59
58
 
60
- pub fn rem(&self, s: &RbSeries) -> RbResult<Self> {
61
- let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?;
62
- Ok(df.into())
59
+ pub fn rem(rb: &Ruby, self_: &Self, s: &RbSeries) -> RbResult<Self> {
60
+ rb.enter_polars_df(|| &*self_.df.read() % &*s.series.read())
63
61
  }
64
62
 
65
- pub fn add_df(&self, s: &Self) -> RbResult<Self> {
66
- let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?;
67
- Ok(df.into())
63
+ pub fn add_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
64
+ rb.enter_polars_df(|| &*self_.df.read() + &*s.df.read())
68
65
  }
69
66
 
70
- pub fn sub_df(&self, s: &Self) -> RbResult<Self> {
71
- let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?;
72
- Ok(df.into())
67
+ pub fn sub_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
68
+ rb.enter_polars_df(|| &*self_.df.read() - &*s.df.read())
73
69
  }
74
70
 
75
- pub fn div_df(&self, s: &Self) -> RbResult<Self> {
76
- let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?;
77
- Ok(df.into())
71
+ pub fn div_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
72
+ rb.enter_polars_df(|| &*self_.df.read() / &*s.df.read())
78
73
  }
79
74
 
80
- pub fn mul_df(&self, s: &Self) -> RbResult<Self> {
81
- let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?;
82
- Ok(df.into())
75
+ pub fn mul_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
76
+ rb.enter_polars_df(|| &*self_.df.read() * &*s.df.read())
83
77
  }
84
78
 
85
- pub fn rem_df(&self, s: &Self) -> RbResult<Self> {
86
- let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?;
87
- Ok(df.into())
79
+ pub fn rem_df(rb: &Ruby, self_: &Self, s: &Self) -> RbResult<Self> {
80
+ rb.enter_polars_df(|| &*self_.df.read() % &*s.df.read())
88
81
  }
89
82
 
90
83
  pub fn sample_n(
91
- &self,
84
+ rb: &Ruby,
85
+ self_: &Self,
92
86
  n: &RbSeries,
93
87
  with_replacement: bool,
94
88
  shuffle: bool,
95
89
  seed: Option<u64>,
96
90
  ) -> RbResult<Self> {
97
- let df = self
98
- .df
99
- .borrow()
100
- .sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
101
- .map_err(RbPolarsErr::from)?;
102
- Ok(df.into())
91
+ rb.enter_polars_df(|| {
92
+ self_
93
+ .df
94
+ .read()
95
+ .sample_n(&n.series.read(), with_replacement, shuffle, seed)
96
+ })
103
97
  }
104
98
 
105
99
  pub fn sample_frac(
106
- &self,
100
+ rb: &Ruby,
101
+ self_: &Self,
107
102
  frac: &RbSeries,
108
103
  with_replacement: bool,
109
104
  shuffle: bool,
110
105
  seed: Option<u64>,
111
106
  ) -> RbResult<Self> {
112
- let df = self
113
- .df
114
- .borrow()
115
- .sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
116
- .map_err(RbPolarsErr::from)?;
117
- Ok(df.into())
107
+ rb.enter_polars_df(|| {
108
+ self_
109
+ .df
110
+ .read()
111
+ .sample_frac(&frac.series.read(), with_replacement, shuffle, seed)
112
+ })
118
113
  }
119
114
 
120
- pub fn rechunk(&self) -> Self {
121
- let mut df = self.df.borrow_mut().clone();
122
- df.as_single_chunk_par();
123
- df.into()
115
+ pub fn rechunk(rb: &Ruby, self_: &Self) -> RbResult<Self> {
116
+ rb.enter_polars_df(|| {
117
+ let mut df = self_.df.write().clone();
118
+ df.as_single_chunk_par();
119
+ Ok(df)
120
+ })
124
121
  }
125
122
 
126
123
  pub fn as_str(&self) -> String {
127
- format!("{}", self.df.borrow())
124
+ format!("{}", self.df.read())
128
125
  }
129
126
 
130
- pub fn get_columns(&self) -> RArray {
131
- let cols = self.df.borrow().get_columns().to_vec();
132
- to_rbseries(cols)
127
+ pub fn get_columns(rb: &Ruby, self_: &Self) -> RArray {
128
+ let cols = self_.df.read().get_columns().to_vec();
129
+ cols.to_rbseries(rb)
133
130
  }
134
131
 
135
132
  pub fn columns(&self) -> Vec<String> {
136
133
  self.df
137
- .borrow()
134
+ .read()
138
135
  .get_column_names()
139
136
  .iter()
140
137
  .map(|v| v.to_string())
@@ -143,214 +140,207 @@ impl RbDataFrame {
143
140
 
144
141
  pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
145
142
  self.df
146
- .borrow_mut()
143
+ .write()
147
144
  .set_column_names(&names)
148
145
  .map_err(RbPolarsErr::from)?;
149
146
  Ok(())
150
147
  }
151
148
 
152
- pub fn dtypes(ruby: &Ruby, rb_self: &Self) -> RArray {
149
+ pub fn dtypes(ruby: &Ruby, self_: &Self) -> RArray {
153
150
  ruby.ary_from_iter(
154
- rb_self
151
+ self_
155
152
  .df
156
- .borrow()
153
+ .read()
157
154
  .iter()
158
155
  .map(|s| Wrap(s.dtype().clone()).into_value_with(ruby)),
159
156
  )
160
157
  }
161
158
 
162
159
  pub fn n_chunks(&self) -> usize {
163
- self.df.borrow().first_col_n_chunks()
160
+ self.df.read().first_col_n_chunks()
164
161
  }
165
162
 
166
163
  pub fn shape(&self) -> (usize, usize) {
167
- self.df.borrow().shape()
164
+ self.df.read().shape()
168
165
  }
169
166
 
170
167
  pub fn height(&self) -> usize {
171
- self.df.borrow().height()
168
+ self.df.read().height()
172
169
  }
173
170
 
174
171
  pub fn width(&self) -> usize {
175
- self.df.borrow().width()
172
+ self.df.read().width()
176
173
  }
177
174
 
178
- pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
175
+ pub fn hstack(rb: &Ruby, self_: &Self, columns: RArray) -> RbResult<Self> {
179
176
  let columns = to_series(columns)?;
180
177
  let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
181
- let df = self
182
- .df
183
- .borrow()
184
- .hstack(&columns)
185
- .map_err(RbPolarsErr::from)?;
186
- Ok(df.into())
178
+ rb.enter_polars_df(|| self_.df.read().hstack(&columns))
187
179
  }
188
180
 
189
- pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
181
+ pub fn hstack_mut(rb: &Ruby, self_: &Self, columns: RArray) -> RbResult<()> {
190
182
  let columns = to_series(columns)?;
191
183
  let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
192
- self.df
193
- .borrow_mut()
194
- .hstack_mut(&columns)
195
- .map_err(RbPolarsErr::from)?;
184
+ rb.enter_polars(|| self_.df.write().hstack_mut(&columns).map(drop))?;
196
185
  Ok(())
197
186
  }
198
187
 
199
- pub fn vstack(&self, df: &RbDataFrame) -> RbResult<Self> {
200
- let df = self
201
- .df
202
- .borrow()
203
- .vstack(&df.df.borrow())
204
- .map_err(RbPolarsErr::from)?;
205
- Ok(df.into())
188
+ pub fn vstack(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<Self> {
189
+ rb.enter_polars_df(|| self_.df.read().vstack(&other.df.read()))
206
190
  }
207
191
 
208
- pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> {
209
- self.df
210
- .borrow_mut()
211
- .vstack_mut(&df.df.borrow())
212
- .map_err(RbPolarsErr::from)?;
192
+ pub fn vstack_mut(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<()> {
193
+ rb.enter_polars(|| {
194
+ // Prevent self-vstack deadlocks.
195
+ let other = other.df.read().clone();
196
+ self_.df.write().vstack_mut(&other)?;
197
+ PolarsResult::Ok(())
198
+ })?;
213
199
  Ok(())
214
200
  }
215
201
 
216
- pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> {
217
- self.df
218
- .borrow_mut()
219
- .extend(&df.df.borrow())
220
- .map_err(RbPolarsErr::from)?;
202
+ pub fn extend(rb: &Ruby, self_: &Self, other: &RbDataFrame) -> RbResult<()> {
203
+ rb.enter_polars(|| {
204
+ // Prevent self-extend deadlocks.
205
+ let other = other.df.read().clone();
206
+ self_.df.write().extend(&other)
207
+ })?;
221
208
  Ok(())
222
209
  }
223
210
 
224
211
  pub fn drop_in_place(&self, name: String) -> RbResult<RbSeries> {
225
212
  let s = self
226
213
  .df
227
- .borrow_mut()
214
+ .write()
228
215
  .drop_in_place(&name)
229
216
  .map_err(RbPolarsErr::from)?;
230
217
  let s = s.take_materialized_series();
231
218
  Ok(RbSeries::new(s))
232
219
  }
233
220
 
234
- pub fn select_at_idx(&self, idx: usize) -> Option<RbSeries> {
235
- self.df
236
- .borrow()
237
- .select_at_idx(idx)
238
- .map(|s| RbSeries::new(s.as_materialized_series().clone()))
221
+ pub fn to_series(&self, index: isize) -> RbResult<RbSeries> {
222
+ let df = &self.df.read();
223
+
224
+ let index_adjusted = if index < 0 {
225
+ df.width().checked_sub(index.unsigned_abs())
226
+ } else {
227
+ Some(usize::try_from(index).unwrap())
228
+ };
229
+
230
+ let s = index_adjusted.and_then(|i| df.select_at_idx(i));
231
+ match s {
232
+ Some(s) => Ok(RbSeries::new(s.as_materialized_series().clone())),
233
+ None => Err(RbIndexError::new_err(
234
+ polars_err!(oob = index, df.width()).to_string(),
235
+ )),
236
+ }
239
237
  }
240
238
 
241
239
  pub fn get_column_index(&self, name: String) -> Option<usize> {
242
- self.df.borrow().get_column_index(&name)
240
+ self.df.read().get_column_index(&name)
243
241
  }
244
242
 
245
243
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
246
244
  let series = self
247
245
  .df
248
- .borrow()
246
+ .read()
249
247
  .column(&name)
250
248
  .map(|s| RbSeries::new(s.as_materialized_series().clone()))
251
249
  .map_err(RbPolarsErr::from)?;
252
250
  Ok(series)
253
251
  }
254
252
 
255
- pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
256
- let df = self
257
- .df
258
- .borrow()
259
- .select(selection)
260
- .map_err(RbPolarsErr::from)?;
261
- Ok(RbDataFrame::new(df))
253
+ pub fn select(rb: &Ruby, self_: &Self, columns: Vec<String>) -> RbResult<Self> {
254
+ rb.enter_polars_df(|| self_.df.read().select(columns.iter().map(|x| &**x)))
262
255
  }
263
256
 
264
- pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
257
+ pub fn gather(rb: &Ruby, self_: &Self, indices: Vec<IdxSize>) -> RbResult<Self> {
265
258
  let indices = IdxCa::from_vec("".into(), indices);
266
- let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
267
- Ok(RbDataFrame::new(df))
259
+ rb.enter_polars_df(|| self_.df.read().take(&indices))
268
260
  }
269
261
 
270
- pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
271
- let binding = indices.series.borrow();
272
- let idx = binding.idx().map_err(RbPolarsErr::from)?;
273
- let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
274
- Ok(RbDataFrame::new(df))
262
+ pub fn gather_with_series(rb: &Ruby, self_: &Self, indices: &RbSeries) -> RbResult<Self> {
263
+ let idx_s = indices.series.read();
264
+ let indices = idx_s.idx().map_err(RbPolarsErr::from)?;
265
+ rb.enter_polars_df(|| self_.df.read().take(indices))
275
266
  }
276
267
 
277
268
  pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
278
269
  self.df
279
- .borrow_mut()
280
- .replace(&column, new_col.series.borrow().clone())
270
+ .write()
271
+ .replace(&column, new_col.series.read().clone())
281
272
  .map_err(RbPolarsErr::from)?;
282
273
  Ok(())
283
274
  }
284
275
 
285
276
  pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
286
277
  self.df
287
- .borrow_mut()
288
- .replace_column(index, new_col.series.borrow().clone())
278
+ .write()
279
+ .replace_column(index, new_col.series.read().clone())
289
280
  .map_err(RbPolarsErr::from)?;
290
281
  Ok(())
291
282
  }
292
283
 
293
284
  pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
294
285
  self.df
295
- .borrow_mut()
296
- .insert_column(index, new_col.series.borrow().clone())
286
+ .write()
287
+ .insert_column(index, new_col.series.read().clone())
297
288
  .map_err(RbPolarsErr::from)?;
298
289
  Ok(())
299
290
  }
300
291
 
301
- pub fn slice(&self, offset: i64, length: Option<usize>) -> Self {
302
- let df = self
303
- .df
304
- .borrow()
305
- .slice(offset, length.unwrap_or_else(|| self.df.borrow().height()));
306
- df.into()
292
+ pub fn slice(rb: &Ruby, self_: &Self, offset: i64, length: Option<usize>) -> RbResult<Self> {
293
+ rb.enter_polars_df(|| {
294
+ let df = self_.df.read();
295
+ Ok(df.slice(offset, length.unwrap_or_else(|| df.height())))
296
+ })
307
297
  }
308
298
 
309
- pub fn head(&self, length: Option<usize>) -> Self {
310
- self.df.borrow().head(length).into()
299
+ pub fn head(rb: &Ruby, self_: &Self, n: usize) -> RbResult<Self> {
300
+ rb.enter_polars_df(|| Ok(self_.df.read().head(Some(n))))
311
301
  }
312
302
 
313
- pub fn tail(&self, length: Option<usize>) -> Self {
314
- self.df.borrow().tail(length).into()
303
+ pub fn tail(rb: &Ruby, self_: &Self, n: usize) -> RbResult<Self> {
304
+ rb.enter_polars_df(|| Ok(self_.df.read().tail(Some(n))))
315
305
  }
316
306
 
317
- pub fn is_unique(&self) -> RbResult<RbSeries> {
318
- let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
319
- Ok(mask.into_series().into())
307
+ pub fn is_unique(rb: &Ruby, self_: &Self) -> RbResult<RbSeries> {
308
+ rb.enter_polars_series(|| self_.df.read().is_unique())
320
309
  }
321
310
 
322
- pub fn is_duplicated(&self) -> RbResult<RbSeries> {
323
- let mask = self
324
- .df
325
- .borrow()
326
- .is_duplicated()
327
- .map_err(RbPolarsErr::from)?;
328
- Ok(mask.into_series().into())
311
+ pub fn is_duplicated(rb: &Ruby, self_: &Self) -> RbResult<RbSeries> {
312
+ rb.enter_polars_series(|| self_.df.read().is_duplicated())
329
313
  }
330
314
 
331
- pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
315
+ pub fn equals(
316
+ rb: &Ruby,
317
+ self_: &Self,
318
+ other: &RbDataFrame,
319
+ null_equal: bool,
320
+ ) -> RbResult<bool> {
332
321
  if null_equal {
333
- self.df.borrow().equals_missing(&other.df.borrow())
322
+ rb.enter_polars_ok(|| self_.df.read().equals_missing(&other.df.read()))
334
323
  } else {
335
- self.df.borrow().equals(&other.df.borrow())
324
+ rb.enter_polars_ok(|| self_.df.read().equals(&other.df.read()))
336
325
  }
337
326
  }
338
327
 
339
- pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
340
- let df = self
341
- .df
342
- .borrow()
343
- .with_row_index(name.into(), offset)
344
- .map_err(RbPolarsErr::from)?;
345
- Ok(df.into())
328
+ pub fn with_row_index(
329
+ rb: &Ruby,
330
+ self_: &Self,
331
+ name: String,
332
+ offset: Option<IdxSize>,
333
+ ) -> RbResult<Self> {
334
+ rb.enter_polars_df(|| self_.df.read().with_row_index(name.into(), offset))
346
335
  }
347
336
 
348
337
  pub fn clone(&self) -> Self {
349
- RbDataFrame::new(self.df.borrow().clone())
338
+ Clone::clone(self)
350
339
  }
351
340
 
352
341
  pub fn unpivot(
353
- &self,
342
+ rb: &Ruby,
343
+ self_: &Self,
354
344
  on: Vec<String>,
355
345
  index: Vec<String>,
356
346
  value_name: Option<String>,
@@ -363,13 +353,12 @@ impl RbDataFrame {
363
353
  variable_name: variable_name.map(|s| s.into()),
364
354
  };
365
355
 
366
- let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?;
367
- Ok(RbDataFrame::new(df))
356
+ rb.enter_polars_df(|| self_.df.read().unpivot2(args))
368
357
  }
369
358
 
370
- #[allow(clippy::too_many_arguments)]
371
359
  pub fn pivot_expr(
372
- &self,
360
+ rb: &Ruby,
361
+ self_: &Self,
373
362
  on: Vec<String>,
374
363
  index: Option<Vec<String>>,
375
364
  values: Option<Vec<String>>,
@@ -378,77 +367,76 @@ impl RbDataFrame {
378
367
  aggregate_expr: Option<&RbExpr>,
379
368
  separator: Option<String>,
380
369
  ) -> RbResult<Self> {
370
+ let df = self_.df.read().clone(); // Clone to avoid dead lock on re-entrance in aggregate_expr.
381
371
  let fun = if maintain_order { pivot_stable } else { pivot };
382
- let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
383
- let df = fun(
384
- &self.df.borrow(),
385
- on,
386
- index,
387
- values,
388
- sort_columns,
389
- agg_expr,
390
- separator.as_deref(),
391
- )
392
- .map_err(RbPolarsErr::from)?;
393
- Ok(RbDataFrame::new(df))
372
+ let agg_expr = aggregate_expr.map(|expr| expr.inner.clone());
373
+ rb.enter_polars_df(|| {
374
+ fun(
375
+ &df,
376
+ on,
377
+ index,
378
+ values,
379
+ sort_columns,
380
+ agg_expr,
381
+ separator.as_deref(),
382
+ )
383
+ })
394
384
  }
395
385
 
396
386
  pub fn partition_by(
397
- ruby: &Ruby,
398
- rb_self: &Self,
387
+ rb: &Ruby,
388
+ self_: &Self,
399
389
  by: Vec<String>,
400
390
  maintain_order: bool,
401
391
  include_key: bool,
402
392
  ) -> RbResult<RArray> {
403
393
  let out = if maintain_order {
404
- rb_self.df.borrow().partition_by_stable(by, include_key)
394
+ self_.df.read().partition_by_stable(by, include_key)
405
395
  } else {
406
- rb_self.df.borrow().partition_by(by, include_key)
396
+ self_.df.read().partition_by(by, include_key)
407
397
  }
408
398
  .map_err(RbPolarsErr::from)?;
409
- Ok(ruby.ary_from_iter(out.into_iter().map(RbDataFrame::new)))
399
+ Ok(rb.ary_from_iter(out.into_iter().map(RbDataFrame::new)))
410
400
  }
411
401
 
412
402
  pub fn lazy(&self) -> RbLazyFrame {
413
- self.df.borrow().clone().lazy().into()
403
+ self.df.read().clone().lazy().into()
414
404
  }
415
405
 
416
406
  pub fn to_dummies(
417
- &self,
407
+ rb: &Ruby,
408
+ self_: &Self,
418
409
  columns: Option<Vec<String>>,
419
410
  separator: Option<String>,
420
411
  drop_first: bool,
421
412
  drop_nulls: bool,
422
413
  ) -> RbResult<Self> {
423
- let df = match columns {
424
- Some(cols) => self.df.borrow().columns_to_dummies(
414
+ rb.enter_polars_df(|| match columns {
415
+ Some(cols) => self_.df.read().columns_to_dummies(
425
416
  cols.iter().map(|x| x as &str).collect(),
426
417
  separator.as_deref(),
427
418
  drop_first,
428
419
  drop_nulls,
429
420
  ),
430
- None => self
421
+ None => self_
431
422
  .df
432
- .borrow()
423
+ .read()
433
424
  .to_dummies(separator.as_deref(), drop_first, drop_nulls),
434
- }
435
- .map_err(RbPolarsErr::from)?;
436
- Ok(df.into())
425
+ })
437
426
  }
438
427
 
439
- pub fn null_count(&self) -> Self {
440
- let df = self.df.borrow().null_count();
441
- df.into()
428
+ pub fn null_count(rb: &Ruby, self_: &Self) -> RbResult<Self> {
429
+ rb.enter_polars_df(|| Ok(self_.df.read().null_count()))
442
430
  }
443
431
 
444
432
  pub fn map_rows(
445
433
  ruby: &Ruby,
446
- rb_self: &Self,
434
+ self_: &Self,
447
435
  lambda: Value,
448
436
  output_type: Option<Wrap<DataType>>,
449
437
  inference_size: usize,
450
438
  ) -> RbResult<(Value, bool)> {
451
- let df = &rb_self.df.borrow();
439
+ let df = &self_.df.read();
452
440
 
453
441
  let output_type = output_type.map(|dt| dt.0);
454
442
  let out = match output_type {
@@ -496,22 +484,29 @@ impl RbDataFrame {
496
484
  Ok((ruby.obj_wrap(RbSeries::from(out)).as_value(), false))
497
485
  }
498
486
 
499
- pub fn shrink_to_fit(&self) {
500
- self.df.borrow_mut().shrink_to_fit();
487
+ pub fn shrink_to_fit(rb: &Ruby, self_: &Self) -> RbResult<()> {
488
+ rb.enter_polars_ok(|| self_.df.write().shrink_to_fit())
501
489
  }
502
490
 
503
- pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult<RbSeries> {
491
+ pub fn hash_rows(
492
+ rb: &Ruby,
493
+ self_: &Self,
494
+ k0: u64,
495
+ k1: u64,
496
+ k2: u64,
497
+ k3: u64,
498
+ ) -> RbResult<RbSeries> {
504
499
  let seed = PlFixedStateQuality::default().hash_one((k0, k1, k2, k3));
505
500
  let hb = PlSeedableRandomStateQuality::seed_from_u64(seed);
506
- let hash = self
507
- .df
508
- .borrow_mut()
509
- .hash_rows(Some(hb))
510
- .map_err(RbPolarsErr::from)?;
511
- Ok(hash.into_series().into())
501
+ rb.enter_polars_series(|| self_.df.write().hash_rows(Some(hb)))
512
502
  }
513
503
 
514
- pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
504
+ pub fn transpose(
505
+ rb: &Ruby,
506
+ self_: &Self,
507
+ keep_names_as: Option<String>,
508
+ column_names: Value,
509
+ ) -> RbResult<Self> {
515
510
  let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
516
511
  Some(Either::Right(name))
517
512
  } else if let Ok(name) = String::try_convert(column_names) {
@@ -519,40 +514,45 @@ impl RbDataFrame {
519
514
  } else {
520
515
  None
521
516
  };
522
- Ok(self
523
- .df
524
- .borrow_mut()
525
- .transpose(keep_names_as.as_deref(), new_col_names)
526
- .map_err(RbPolarsErr::from)?
527
- .into())
517
+ rb.enter_polars_df(|| {
518
+ self_
519
+ .df
520
+ .write()
521
+ .transpose(keep_names_as.as_deref(), new_col_names)
522
+ })
528
523
  }
529
524
 
530
525
  pub fn upsample(
531
- &self,
526
+ rb: &Ruby,
527
+ self_: &Self,
532
528
  by: Vec<String>,
533
529
  index_column: String,
534
530
  every: String,
535
531
  stable: bool,
536
532
  ) -> RbResult<Self> {
537
- let out = if stable {
538
- self.df
539
- .borrow()
540
- .upsample_stable(by, &index_column, Duration::parse(&every))
541
- } else {
542
- self.df
543
- .borrow()
544
- .upsample(by, &index_column, Duration::parse(&every))
545
- };
546
- let out = out.map_err(RbPolarsErr::from)?;
547
- Ok(out.into())
533
+ rb.enter_polars_df(|| {
534
+ if stable {
535
+ self_
536
+ .df
537
+ .read()
538
+ .upsample_stable(by, &index_column, Duration::parse(&every))
539
+ } else {
540
+ self_
541
+ .df
542
+ .read()
543
+ .upsample(by, &index_column, Duration::parse(&every))
544
+ }
545
+ })
548
546
  }
549
547
 
550
- pub fn to_struct(&self, name: String) -> RbSeries {
551
- let s = self.df.borrow().clone().into_struct(name.into());
552
- s.into_series().into()
548
+ pub fn to_struct(rb: &Ruby, self_: &Self, name: String) -> RbResult<RbSeries> {
549
+ rb.enter_polars_series(|| {
550
+ let ca = self_.df.read().clone().into_struct(name.into());
551
+ Ok(ca)
552
+ })
553
553
  }
554
554
 
555
- pub fn clear(&self) -> Self {
556
- self.df.borrow().clear().into()
555
+ pub fn clear(rb: &Ruby, self_: &Self) -> RbResult<Self> {
556
+ rb.enter_polars_df(|| Ok(self_.df.read().clear()))
557
557
  }
558
558
  }