polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -1,19 +1,21 @@
1
- use magnus::{IntoValue, RArray, RHash, Ruby, TryConvert, Value, r_hash::ForEach, typed_data::Obj};
1
+ use magnus::{
2
+ IntoValue, RArray, RHash, Ruby, TryConvert, Value, r_hash::ForEach,
3
+ try_convert::TryConvertOwned,
4
+ };
2
5
  use polars::io::RowIndex;
3
6
  use polars::lazy::frame::LazyFrame;
4
7
  use polars::prelude::*;
5
8
  use polars_plan::dsl::ScanSources;
6
- use std::cell::RefCell;
7
- use std::io::BufWriter;
9
+ use polars_plan::plans::{HintIR, Sorted};
8
10
  use std::num::NonZeroUsize;
9
11
 
10
- use super::SinkTarget;
12
+ use super::{RbLazyFrame, RbOptFlags, SinkTarget};
11
13
  use crate::conversion::*;
12
- use crate::expr::rb_exprs_to_exprs;
14
+ use crate::expr::ToExprs;
13
15
  use crate::expr::selector::RbSelector;
14
- use crate::file::get_file_like;
15
16
  use crate::io::RbScanOptions;
16
- use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
17
+ use crate::utils::EnterPolarsExt;
18
+ use crate::{RbDataFrame, RbExpr, RbLazyGroupBy, RbPolarsErr, RbResult, RbValueError};
17
19
 
18
20
  fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPath>, ScanSources)> {
19
21
  use crate::file::{RubyScanSourceInput, get_ruby_scan_source_input};
@@ -22,35 +24,58 @@ fn rbobject_to_first_path_and_scan_sources(obj: Value) -> RbResult<(Option<PlPat
22
24
  Some(path.clone()),
23
25
  ScanSources::Paths(FromIterator::from_iter([path])),
24
26
  ),
25
- RubyScanSourceInput::File(file) => (None, ScanSources::Files([file].into())),
27
+ RubyScanSourceInput::File(file) => (None, ScanSources::Files([file.into()].into())),
26
28
  RubyScanSourceInput::Buffer(buff) => (None, ScanSources::Buffers([buff].into())),
27
29
  })
28
30
  }
29
31
 
30
32
  impl RbLazyFrame {
31
- #[allow(clippy::too_many_arguments)]
32
- pub fn new_from_ndjson(
33
- source: Option<Value>,
34
- sources: Wrap<ScanSources>,
35
- infer_schema_length: Option<usize>,
36
- batch_size: Option<NonZeroUsize>,
37
- n_rows: Option<usize>,
38
- low_memory: bool,
39
- rechunk: bool,
40
- row_index: Option<(String, IdxSize)>,
41
- ) -> RbResult<Self> {
33
+ pub fn new_from_ndjson(arguments: &[Value]) -> RbResult<Self> {
34
+ let source = Option::<Value>::try_convert(arguments[0])?;
35
+ let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
36
+ let infer_schema_length = Option::<usize>::try_convert(arguments[2])?;
37
+ let schema = Option::<Wrap<Schema>>::try_convert(arguments[3])?;
38
+ let schema_overrides = Option::<Wrap<Schema>>::try_convert(arguments[4])?;
39
+ let batch_size = Option::<NonZeroUsize>::try_convert(arguments[5])?;
40
+ let n_rows = Option::<usize>::try_convert(arguments[6])?;
41
+ let low_memory = bool::try_convert(arguments[7])?;
42
+ let rechunk = bool::try_convert(arguments[8])?;
43
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[9])?;
44
+ let ignore_errors = bool::try_convert(arguments[10])?;
45
+ let include_file_paths = Option::<String>::try_convert(arguments[11])?;
46
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[12])?;
47
+ let credential_provider = Option::<Value>::try_convert(arguments[13])?;
48
+ let retries = usize::try_convert(arguments[14])?;
49
+ let file_cache_ttl = Option::<u64>::try_convert(arguments[15])?;
50
+
42
51
  let row_index = row_index.map(|(name, offset)| RowIndex {
43
52
  name: name.into(),
44
53
  offset,
45
54
  });
46
55
 
47
56
  let sources = sources.0;
48
- let (_first_path, sources) = match source {
57
+ let (first_path, sources) = match source {
49
58
  None => (sources.first_path().map(|p| p.into_owned()), sources),
50
59
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
51
60
  };
52
61
 
53
- let r = LazyJsonLineReader::new_with_sources(sources);
62
+ let mut r = LazyJsonLineReader::new_with_sources(sources);
63
+
64
+ if let Some(first_path) = first_path {
65
+ let first_path_url = first_path.to_str();
66
+
67
+ let mut cloud_options =
68
+ parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
69
+ cloud_options = cloud_options
70
+ .with_max_retries(retries)
71
+ .with_credential_provider(credential_provider.map(|_| todo!()));
72
+
73
+ if let Some(file_cache_ttl) = file_cache_ttl {
74
+ cloud_options.file_cache_ttl = file_cache_ttl;
75
+ }
76
+
77
+ r = r.with_cloud_options(Some(cloud_options));
78
+ };
54
79
 
55
80
  let lf = r
56
81
  .with_infer_schema_length(infer_schema_length.and_then(NonZeroUsize::new))
@@ -58,11 +83,11 @@ impl RbLazyFrame {
58
83
  .with_n_rows(n_rows)
59
84
  .low_memory(low_memory)
60
85
  .with_rechunk(rechunk)
61
- // .with_schema(schema.map(|schema| Arc::new(schema.0)))
62
- // .with_schema_overwrite(schema_overrides.map(|x| Arc::new(x.0)))
86
+ .with_schema(schema.map(|schema| Arc::new(schema.0)))
87
+ .with_schema_overwrite(schema_overrides.map(|x| Arc::new(x.0)))
63
88
  .with_row_index(row_index)
64
- // .with_ignore_errors(ignore_errors)
65
- // .with_include_file_paths(include_file_paths.map(|x| x.into()))
89
+ .with_ignore_errors(ignore_errors)
90
+ .with_include_file_paths(include_file_paths.map(|x| x.into()))
66
91
  .finish()
67
92
  .map_err(RbPolarsErr::from)?;
68
93
 
@@ -73,27 +98,38 @@ impl RbLazyFrame {
73
98
  // start arguments
74
99
  // this pattern is needed for more than 16
75
100
  let source = Option::<Value>::try_convert(arguments[0])?;
76
- let sources = Wrap::<ScanSources>::try_convert(arguments[21])?;
77
- let separator = String::try_convert(arguments[1])?;
78
- let has_header = bool::try_convert(arguments[2])?;
79
- let ignore_errors = bool::try_convert(arguments[3])?;
80
- let skip_rows = usize::try_convert(arguments[4])?;
81
- let n_rows = Option::<usize>::try_convert(arguments[5])?;
82
- let cache = bool::try_convert(arguments[6])?;
83
- let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[7])?;
84
- let low_memory = bool::try_convert(arguments[8])?;
85
- let comment_prefix = Option::<String>::try_convert(arguments[9])?;
86
- let quote_char = Option::<String>::try_convert(arguments[10])?;
87
- let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[11])?;
88
- let infer_schema_length = Option::<usize>::try_convert(arguments[12])?;
89
- let with_schema_modify = Option::<Value>::try_convert(arguments[13])?;
90
- let rechunk = bool::try_convert(arguments[14])?;
91
- let skip_rows_after_header = usize::try_convert(arguments[15])?;
92
- let encoding = Wrap::<CsvEncoding>::try_convert(arguments[16])?;
93
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[17])?;
94
- let try_parse_dates = bool::try_convert(arguments[18])?;
95
- let eol_char = String::try_convert(arguments[19])?;
96
- let truncate_ragged_lines = bool::try_convert(arguments[20])?;
101
+ let sources = Wrap::<ScanSources>::try_convert(arguments[1])?;
102
+ let separator = String::try_convert(arguments[2])?;
103
+ let has_header = bool::try_convert(arguments[3])?;
104
+ let ignore_errors = bool::try_convert(arguments[4])?;
105
+ let skip_rows = usize::try_convert(arguments[5])?;
106
+ let skip_lines = usize::try_convert(arguments[6])?;
107
+ let n_rows = Option::<usize>::try_convert(arguments[7])?;
108
+ let cache = bool::try_convert(arguments[8])?;
109
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[9])?;
110
+ let low_memory = bool::try_convert(arguments[10])?;
111
+ let comment_prefix = Option::<String>::try_convert(arguments[11])?;
112
+ let quote_char = Option::<String>::try_convert(arguments[12])?;
113
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[13])?;
114
+ let missing_utf8_is_empty_string = bool::try_convert(arguments[14])?;
115
+ let infer_schema_length = Option::<usize>::try_convert(arguments[15])?;
116
+ let with_schema_modify = Option::<Value>::try_convert(arguments[16])?;
117
+ let rechunk = bool::try_convert(arguments[17])?;
118
+ let skip_rows_after_header = usize::try_convert(arguments[18])?;
119
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[19])?;
120
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[20])?;
121
+ let try_parse_dates = bool::try_convert(arguments[21])?;
122
+ let eol_char = String::try_convert(arguments[22])?;
123
+ let raise_if_empty = bool::try_convert(arguments[23])?;
124
+ let truncate_ragged_lines = bool::try_convert(arguments[24])?;
125
+ let decimal_comma = bool::try_convert(arguments[25])?;
126
+ let glob = bool::try_convert(arguments[26])?;
127
+ let schema = Option::<Wrap<Schema>>::try_convert(arguments[27])?;
128
+ let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[28])?;
129
+ let _credential_provider = Option::<Value>::try_convert(arguments[29])?;
130
+ let retries = usize::try_convert(arguments[30])?;
131
+ let file_cache_ttl = Option::<u64>::try_convert(arguments[31])?;
132
+ let include_file_paths = Option::<String>::try_convert(arguments[32])?;
97
133
  // end arguments
98
134
 
99
135
  let null_values = null_values.map(|w| w.0);
@@ -113,12 +149,24 @@ impl RbLazyFrame {
113
149
  });
114
150
 
115
151
  let sources = sources.0;
116
- let (_first_path, sources) = match source {
152
+ let (first_path, sources) = match source {
117
153
  None => (sources.first_path().map(|p| p.into_owned()), sources),
118
154
  Some(source) => rbobject_to_first_path_and_scan_sources(source)?,
119
155
  };
120
156
 
121
- let r = LazyCsvReader::new_with_sources(sources);
157
+ let mut r = LazyCsvReader::new_with_sources(sources);
158
+
159
+ if let Some(first_path) = first_path {
160
+ let first_path_url = first_path.to_str();
161
+
162
+ let mut cloud_options =
163
+ parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
164
+ if let Some(file_cache_ttl) = file_cache_ttl {
165
+ cloud_options.file_cache_ttl = file_cache_ttl;
166
+ }
167
+ cloud_options = cloud_options.with_max_retries(retries);
168
+ r = r.with_cloud_options(Some(cloud_options));
169
+ }
122
170
 
123
171
  let r = r
124
172
  .with_infer_schema_length(infer_schema_length)
@@ -126,10 +174,11 @@ impl RbLazyFrame {
126
174
  .with_has_header(has_header)
127
175
  .with_ignore_errors(ignore_errors)
128
176
  .with_skip_rows(skip_rows)
177
+ .with_skip_lines(skip_lines)
129
178
  .with_n_rows(n_rows)
130
179
  .with_cache(cache)
131
180
  .with_dtype_overwrite(overwrite_dtype.map(Arc::new))
132
- // TODO add with_schema
181
+ .with_schema(schema.map(|schema| Arc::new(schema.0)))
133
182
  .with_low_memory(low_memory)
134
183
  .with_comment_prefix(comment_prefix.map(|x| x.into()))
135
184
  .with_quote_char(quote_char)
@@ -140,8 +189,12 @@ impl RbLazyFrame {
140
189
  .with_row_index(row_index)
141
190
  .with_try_parse_dates(try_parse_dates)
142
191
  .with_null_values(null_values)
143
- // TODO add with_missing_is_null
144
- .with_truncate_ragged_lines(truncate_ragged_lines);
192
+ .with_missing_is_null(!missing_utf8_is_empty_string)
193
+ .with_truncate_ragged_lines(truncate_ragged_lines)
194
+ .with_decimal_comma(decimal_comma)
195
+ .with_glob(glob)
196
+ .with_raise_if_empty(raise_if_empty)
197
+ .with_include_file_paths(include_file_paths.map(|x| x.into()));
145
198
 
146
199
  if let Some(_lambda) = with_schema_modify {
147
200
  todo!();
@@ -208,57 +261,28 @@ impl RbLazyFrame {
208
261
  Ok(lf.into())
209
262
  }
210
263
 
211
- pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
212
- let file = BufWriter::new(get_file_like(rb_f, true)?);
213
- serde_json::to_writer(file, &self.ldf.borrow().logical_plan)
214
- .map_err(|err| RbValueError::new_err(format!("{err:?}")))?;
215
- Ok(())
264
+ pub fn describe_plan(rb: &Ruby, self_: &Self) -> RbResult<String> {
265
+ rb.enter_polars(|| self_.ldf.read().describe_plan())
216
266
  }
217
267
 
218
- pub fn describe_plan(&self) -> RbResult<String> {
219
- self.ldf
220
- .borrow()
221
- .describe_plan()
222
- .map_err(RbPolarsErr::from)
223
- .map_err(Into::into)
268
+ pub fn describe_optimized_plan(rb: &Ruby, self_: &Self) -> RbResult<String> {
269
+ rb.enter_polars(|| self_.ldf.read().describe_optimized_plan())
224
270
  }
225
271
 
226
- pub fn describe_optimized_plan(&self) -> RbResult<String> {
227
- let result = self
228
- .ldf
229
- .borrow()
230
- .describe_optimized_plan()
231
- .map_err(RbPolarsErr::from)?;
232
- Ok(result)
272
+ pub fn describe_plan_tree(rb: &Ruby, self_: &Self) -> RbResult<String> {
273
+ rb.enter_polars(|| self_.ldf.read().describe_plan_tree())
233
274
  }
234
275
 
235
- #[allow(clippy::too_many_arguments)]
236
- pub fn optimization_toggle(
237
- &self,
238
- type_coercion: bool,
239
- predicate_pushdown: bool,
240
- projection_pushdown: bool,
241
- simplify_expr: bool,
242
- slice_pushdown: bool,
243
- comm_subplan_elim: bool,
244
- comm_subexpr_elim: bool,
245
- allow_streaming: bool,
246
- _eager: bool,
247
- ) -> RbLazyFrame {
248
- let ldf = self.ldf.borrow().clone();
249
- let mut ldf = ldf
250
- .with_type_coercion(type_coercion)
251
- .with_predicate_pushdown(predicate_pushdown)
252
- .with_simplify_expr(simplify_expr)
253
- .with_slice_pushdown(slice_pushdown)
254
- .with_new_streaming(allow_streaming)
255
- ._with_eager(_eager)
256
- .with_projection_pushdown(projection_pushdown);
257
-
258
- ldf = ldf.with_comm_subplan_elim(comm_subplan_elim);
259
- ldf = ldf.with_comm_subexpr_elim(comm_subexpr_elim);
260
-
261
- ldf.into()
276
+ pub fn describe_optimized_plan_tree(rb: &Ruby, self_: &Self) -> RbResult<String> {
277
+ rb.enter_polars(|| self_.ldf.read().describe_optimized_plan_tree())
278
+ }
279
+
280
+ pub fn to_dot(rb: &Ruby, self_: &Self, optimized: bool) -> RbResult<String> {
281
+ rb.enter_polars(|| self_.ldf.read().to_dot(optimized))
282
+ }
283
+
284
+ pub fn to_dot_streaming_phys(rb: &Ruby, self_: &Self, optimized: bool) -> RbResult<String> {
285
+ rb.enter_polars(|| self_.ldf.read().to_dot_streaming_phys(optimized))
262
286
  }
263
287
 
264
288
  pub fn sort(
@@ -269,7 +293,7 @@ impl RbLazyFrame {
269
293
  maintain_order: bool,
270
294
  multithreaded: bool,
271
295
  ) -> Self {
272
- let ldf = self.ldf.borrow().clone();
296
+ let ldf = self.ldf.read().clone();
273
297
  ldf.sort(
274
298
  [&by_column],
275
299
  SortMultipleOptions {
@@ -291,8 +315,8 @@ impl RbLazyFrame {
291
315
  maintain_order: bool,
292
316
  multithreaded: bool,
293
317
  ) -> RbResult<Self> {
294
- let ldf = self.ldf.borrow().clone();
295
- let exprs = rb_exprs_to_exprs(by)?;
318
+ let ldf = self.ldf.read().clone();
319
+ let exprs = by.to_exprs()?;
296
320
  Ok(ldf
297
321
  .sort_by_exprs(
298
322
  exprs,
@@ -308,8 +332,8 @@ impl RbLazyFrame {
308
332
  }
309
333
 
310
334
  pub fn top_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
311
- let ldf = self.ldf.borrow().clone();
312
- let exprs = rb_exprs_to_exprs(by)?;
335
+ let ldf = self.ldf.read().clone();
336
+ let exprs = by.to_exprs()?;
313
337
  Ok(ldf
314
338
  .top_k(
315
339
  k,
@@ -320,8 +344,8 @@ impl RbLazyFrame {
320
344
  }
321
345
 
322
346
  pub fn bottom_k(&self, k: IdxSize, by: RArray, reverse: Vec<bool>) -> RbResult<Self> {
323
- let ldf = self.ldf.borrow().clone();
324
- let exprs = rb_exprs_to_exprs(by)?;
347
+ let ldf = self.ldf.read().clone();
348
+ let exprs = by.to_exprs()?;
325
349
  Ok(ldf
326
350
  .bottom_k(
327
351
  k,
@@ -332,19 +356,34 @@ impl RbLazyFrame {
332
356
  }
333
357
 
334
358
  pub fn cache(&self) -> Self {
335
- let ldf = self.ldf.borrow().clone();
359
+ let ldf = self.ldf.read().clone();
336
360
  ldf.cache().into()
337
361
  }
338
362
 
339
- pub fn collect(&self) -> RbResult<RbDataFrame> {
340
- let ldf = self.ldf.borrow().clone();
341
- let df = ldf.collect().map_err(RbPolarsErr::from)?;
342
- Ok(df.into())
363
+ pub fn with_optimizations(&self, optflags: &RbOptFlags) -> Self {
364
+ let ldf = self.ldf.read().clone();
365
+ ldf.with_optimizations(optflags.clone().inner.into_inner())
366
+ .into()
367
+ }
368
+
369
+ pub fn profile(rb: &Ruby, self_: &Self) -> RbResult<(RbDataFrame, RbDataFrame)> {
370
+ let (df, time_df) = rb.enter_polars(|| {
371
+ let ldf = self_.ldf.read().clone();
372
+ ldf.profile()
373
+ })?;
374
+ Ok((df.into(), time_df.into()))
375
+ }
376
+
377
+ pub fn collect(rb: &Ruby, self_: &Self, engine: Wrap<Engine>) -> RbResult<RbDataFrame> {
378
+ rb.enter_polars_df(|| {
379
+ let ldf = self_.ldf.read().clone();
380
+ ldf.collect_with_engine(engine.0)
381
+ })
343
382
  }
344
383
 
345
- #[allow(clippy::too_many_arguments)]
346
384
  pub fn sink_parquet(
347
- &self,
385
+ rb: &Ruby,
386
+ self_: &Self,
348
387
  target: SinkTarget,
349
388
  compression: String,
350
389
  compression_level: Option<i32>,
@@ -352,8 +391,11 @@ impl RbLazyFrame {
352
391
  row_group_size: Option<usize>,
353
392
  data_page_size: Option<usize>,
354
393
  cloud_options: Option<Vec<(String, String)>>,
394
+ credential_provider: Option<Value>,
355
395
  retries: usize,
356
396
  sink_options: Wrap<SinkOptions>,
397
+ metadata: Wrap<Option<KeyValueMetadata>>,
398
+ field_overwrites: Vec<Wrap<ParquetFieldOverwrites>>,
357
399
  ) -> RbResult<RbLazyFrame> {
358
400
  let compression = parse_parquet_compression(&compression, compression_level)?;
359
401
 
@@ -362,8 +404,8 @@ impl RbLazyFrame {
362
404
  statistics: statistics.0,
363
405
  row_group_size,
364
406
  data_page_size,
365
- key_value_metadata: None,
366
- field_overwrites: Vec::new(),
407
+ key_value_metadata: metadata.0,
408
+ field_overwrites: field_overwrites.into_iter().map(|f| f.0).collect(),
367
409
  };
368
410
 
369
411
  let cloud_options = match target.base_path() {
@@ -371,31 +413,39 @@ impl RbLazyFrame {
371
413
  Some(base_path) => {
372
414
  let cloud_options =
373
415
  parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
374
- Some(cloud_options.with_max_retries(retries))
416
+ Some(
417
+ cloud_options
418
+ .with_max_retries(retries)
419
+ .with_credential_provider(credential_provider.map(|_| todo!())),
420
+ )
375
421
  }
376
422
  };
377
423
 
378
- let ldf = self.ldf.borrow().clone();
379
- match target {
380
- SinkTarget::File(target) => {
381
- ldf.sink_parquet(target, options, cloud_options, sink_options.0)
424
+ rb.enter_polars(|| {
425
+ let ldf = self_.ldf.read().clone();
426
+ match target {
427
+ SinkTarget::File(target) => {
428
+ ldf.sink_parquet(target, options, cloud_options, sink_options.0)
429
+ }
382
430
  }
383
- }
384
- .map_err(RbPolarsErr::from)
431
+ })
385
432
  .map(Into::into)
386
- .map_err(Into::into)
387
433
  }
388
434
 
389
435
  pub fn sink_ipc(
390
- &self,
436
+ rb: &Ruby,
437
+ self_: &Self,
391
438
  target: SinkTarget,
392
439
  compression: Wrap<Option<IpcCompression>>,
440
+ compat_level: RbCompatLevel,
393
441
  cloud_options: Option<Vec<(String, String)>>,
442
+ credential_provider: Option<Value>,
394
443
  retries: usize,
395
444
  sink_options: Wrap<SinkOptions>,
396
445
  ) -> RbResult<RbLazyFrame> {
397
446
  let options = IpcWriterOptions {
398
447
  compression: compression.0,
448
+ compat_level: compat_level.0,
399
449
  ..Default::default()
400
450
  };
401
451
 
@@ -404,22 +454,26 @@ impl RbLazyFrame {
404
454
  Some(base_path) => {
405
455
  let cloud_options =
406
456
  parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
407
- Some(cloud_options.with_max_retries(retries))
457
+ Some(
458
+ cloud_options
459
+ .with_max_retries(retries)
460
+ .with_credential_provider(credential_provider.map(|_| todo!())),
461
+ )
408
462
  }
409
463
  };
410
464
 
411
- let ldf = self.ldf.borrow().clone();
412
- match target {
413
- SinkTarget::File(target) => {
414
- ldf.sink_ipc(target, options, cloud_options, sink_options.0)
465
+ rb.enter_polars(|| {
466
+ let ldf = self_.ldf.read().clone();
467
+ match target {
468
+ SinkTarget::File(target) => {
469
+ ldf.sink_ipc(target, options, cloud_options, sink_options.0)
470
+ }
415
471
  }
416
- }
417
- .map_err(RbPolarsErr::from)
472
+ })
418
473
  .map(Into::into)
419
- .map_err(Into::into)
420
474
  }
421
475
 
422
- pub fn sink_csv(&self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
476
+ pub fn sink_csv(rb: &Ruby, self_: &Self, arguments: &[Value]) -> RbResult<RbLazyFrame> {
423
477
  let target = SinkTarget::try_convert(arguments[0])?;
424
478
  let include_bom = bool::try_convert(arguments[1])?;
425
479
  let include_header = bool::try_convert(arguments[2])?;
@@ -436,8 +490,9 @@ impl RbLazyFrame {
436
490
  let null_value = Option::<String>::try_convert(arguments[13])?;
437
491
  let quote_style = Option::<Wrap<QuoteStyle>>::try_convert(arguments[14])?;
438
492
  let cloud_options = Option::<Vec<(String, String)>>::try_convert(arguments[15])?;
439
- let retries = usize::try_convert(arguments[16])?;
440
- let sink_options = Wrap::<SinkOptions>::try_convert(arguments[17])?;
493
+ let credential_provider = Option::<Value>::try_convert(arguments[16])?;
494
+ let retries = usize::try_convert(arguments[17])?;
495
+ let sink_options = Wrap::<SinkOptions>::try_convert(arguments[18])?;
441
496
 
442
497
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
443
498
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
@@ -468,25 +523,31 @@ impl RbLazyFrame {
468
523
  Some(base_path) => {
469
524
  let cloud_options =
470
525
  parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
471
- Some(cloud_options.with_max_retries(retries))
526
+ Some(
527
+ cloud_options
528
+ .with_max_retries(retries)
529
+ .with_credential_provider(credential_provider.map(|_| todo!())),
530
+ )
472
531
  }
473
532
  };
474
533
 
475
- let ldf = self.ldf.borrow().clone();
476
- match target {
477
- SinkTarget::File(target) => {
478
- ldf.sink_csv(target, options, cloud_options, sink_options.0)
534
+ rb.enter_polars(|| {
535
+ let ldf = self_.ldf.read().clone();
536
+ match target {
537
+ SinkTarget::File(target) => {
538
+ ldf.sink_csv(target, options, cloud_options, sink_options.0)
539
+ }
479
540
  }
480
- }
481
- .map_err(RbPolarsErr::from)
541
+ })
482
542
  .map(Into::into)
483
- .map_err(Into::into)
484
543
  }
485
544
 
486
545
  pub fn sink_json(
487
- &self,
546
+ rb: &Ruby,
547
+ self_: &Self,
488
548
  target: SinkTarget,
489
549
  cloud_options: Option<Vec<(String, String)>>,
550
+ credential_provider: Option<Value>,
490
551
  retries: usize,
491
552
  sink_options: Wrap<SinkOptions>,
492
553
  ) -> RbResult<RbLazyFrame> {
@@ -497,52 +558,56 @@ impl RbLazyFrame {
497
558
  Some(base_path) => {
498
559
  let cloud_options =
499
560
  parse_cloud_options(base_path.to_str(), cloud_options.unwrap_or_default())?;
500
- Some(cloud_options.with_max_retries(retries))
561
+ Some(
562
+ cloud_options
563
+ .with_max_retries(retries)
564
+ .with_credential_provider(credential_provider.map(|_| todo!())),
565
+ )
501
566
  }
502
567
  };
503
568
 
504
- let ldf = self.ldf.borrow().clone();
505
- match target {
506
- SinkTarget::File(path) => ldf.sink_json(path, options, cloud_options, sink_options.0),
507
- }
508
- .map_err(RbPolarsErr::from)
569
+ rb.enter_polars(|| {
570
+ let ldf = self_.ldf.read().clone();
571
+ match target {
572
+ SinkTarget::File(path) => {
573
+ ldf.sink_json(path, options, cloud_options, sink_options.0)
574
+ }
575
+ }
576
+ })
509
577
  .map(Into::into)
510
- .map_err(Into::into)
511
578
  }
512
579
 
513
580
  pub fn filter(&self, predicate: &RbExpr) -> Self {
514
- let ldf = self.ldf.borrow().clone();
581
+ let ldf = self.ldf.read().clone();
515
582
  ldf.filter(predicate.inner.clone()).into()
516
583
  }
517
584
 
518
585
  pub fn remove(&self, predicate: &RbExpr) -> Self {
519
- let ldf = self.ldf.borrow().clone();
586
+ let ldf = self.ldf.read().clone();
520
587
  ldf.remove(predicate.inner.clone()).into()
521
588
  }
522
589
 
523
590
  pub fn select(&self, exprs: RArray) -> RbResult<Self> {
524
- let ldf = self.ldf.borrow().clone();
525
- let exprs = rb_exprs_to_exprs(exprs)?;
591
+ let ldf = self.ldf.read().clone();
592
+ let exprs = exprs.to_exprs()?;
526
593
  Ok(ldf.select(exprs).into())
527
594
  }
528
595
 
529
596
  pub fn select_seq(&self, exprs: RArray) -> RbResult<Self> {
530
- let ldf = self.ldf.borrow().clone();
531
- let exprs = rb_exprs_to_exprs(exprs)?;
597
+ let ldf = self.ldf.read().clone();
598
+ let exprs = exprs.to_exprs()?;
532
599
  Ok(ldf.select_seq(exprs).into())
533
600
  }
534
601
 
535
602
  pub fn group_by(&self, by: RArray, maintain_order: bool) -> RbResult<RbLazyGroupBy> {
536
- let ldf = self.ldf.borrow().clone();
537
- let by = rb_exprs_to_exprs(by)?;
603
+ let ldf = self.ldf.read().clone();
604
+ let by = by.to_exprs()?;
538
605
  let lazy_gb = if maintain_order {
539
606
  ldf.group_by_stable(by)
540
607
  } else {
541
608
  ldf.group_by(by)
542
609
  };
543
- Ok(RbLazyGroupBy {
544
- lgb: RefCell::new(Some(lazy_gb)),
545
- })
610
+ Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
546
611
  }
547
612
 
548
613
  pub fn rolling(
@@ -554,8 +619,8 @@ impl RbLazyFrame {
554
619
  by: RArray,
555
620
  ) -> RbResult<RbLazyGroupBy> {
556
621
  let closed_window = closed.0;
557
- let ldf = self.ldf.borrow().clone();
558
- let by = rb_exprs_to_exprs(by)?;
622
+ let ldf = self.ldf.read().clone();
623
+ let by = by.to_exprs()?;
559
624
  let lazy_gb = ldf.rolling(
560
625
  index_column.inner.clone(),
561
626
  by,
@@ -567,12 +632,9 @@ impl RbLazyFrame {
567
632
  },
568
633
  );
569
634
 
570
- Ok(RbLazyGroupBy {
571
- lgb: RefCell::new(Some(lazy_gb)),
572
- })
635
+ Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
573
636
  }
574
637
 
575
- #[allow(clippy::too_many_arguments)]
576
638
  pub fn group_by_dynamic(
577
639
  &self,
578
640
  index_column: &RbExpr,
@@ -586,8 +648,8 @@ impl RbLazyFrame {
586
648
  start_by: Wrap<StartBy>,
587
649
  ) -> RbResult<RbLazyGroupBy> {
588
650
  let closed_window = closed.0;
589
- let by = rb_exprs_to_exprs(by)?;
590
- let ldf = self.ldf.borrow().clone();
651
+ let by = by.to_exprs()?;
652
+ let ldf = self.ldf.read().clone();
591
653
  let lazy_gb = ldf.group_by_dynamic(
592
654
  index_column.inner.clone(),
593
655
  by,
@@ -603,21 +665,9 @@ impl RbLazyFrame {
603
665
  },
604
666
  );
605
667
 
606
- Ok(RbLazyGroupBy {
607
- lgb: RefCell::new(Some(lazy_gb)),
608
- })
609
- }
610
-
611
- pub fn with_context(&self, contexts: RArray) -> RbResult<Self> {
612
- let contexts = contexts.typecheck::<Obj<RbLazyFrame>>()?;
613
- let contexts = contexts
614
- .into_iter()
615
- .map(|ldf| ldf.ldf.borrow().clone())
616
- .collect::<Vec<_>>();
617
- Ok(self.ldf.borrow().clone().with_context(contexts).into())
668
+ Ok(RbLazyGroupBy { lgb: Some(lazy_gb) })
618
669
  }
619
670
 
620
- #[allow(clippy::too_many_arguments)]
621
671
  pub fn join_asof(
622
672
  &self,
623
673
  other: &RbLazyFrame,
@@ -640,8 +690,8 @@ impl RbLazyFrame {
640
690
  } else {
641
691
  JoinCoalesce::KeepColumns
642
692
  };
643
- let ldf = self.ldf.borrow().clone();
644
- let other = other.ldf.borrow().clone();
693
+ let ldf = self.ldf.read().clone();
694
+ let other = other.ldf.read().clone();
645
695
  let left_on = left_on.inner.clone();
646
696
  let right_on = right_on.inner.clone();
647
697
  Ok(ldf
@@ -670,7 +720,6 @@ impl RbLazyFrame {
670
720
  .into())
671
721
  }
672
722
 
673
- #[allow(clippy::too_many_arguments)]
674
723
  pub fn join(
675
724
  &self,
676
725
  other: &RbLazyFrame,
@@ -690,10 +739,10 @@ impl RbLazyFrame {
690
739
  Some(true) => JoinCoalesce::CoalesceColumns,
691
740
  Some(false) => JoinCoalesce::KeepColumns,
692
741
  };
693
- let ldf = self.ldf.borrow().clone();
694
- let other = other.ldf.borrow().clone();
695
- let left_on = rb_exprs_to_exprs(left_on)?;
696
- let right_on = rb_exprs_to_exprs(right_on)?;
742
+ let ldf = self.ldf.read().clone();
743
+ let other = other.ldf.read().clone();
744
+ let left_on = left_on.to_exprs()?;
745
+ let right_on = right_on.to_exprs()?;
697
746
 
698
747
  Ok(ldf
699
748
  .join_builder()
@@ -713,10 +762,10 @@ impl RbLazyFrame {
713
762
  }
714
763
 
715
764
  pub fn join_where(&self, other: &Self, predicates: RArray, suffix: String) -> RbResult<Self> {
716
- let ldf = self.ldf.borrow().clone();
717
- let other = other.ldf.borrow().clone();
765
+ let ldf = self.ldf.read().clone();
766
+ let other = other.ldf.read().clone();
718
767
 
719
- let predicates = rb_exprs_to_exprs(predicates)?;
768
+ let predicates = predicates.to_exprs()?;
720
769
 
721
770
  Ok(ldf
722
771
  .join_builder()
@@ -727,32 +776,32 @@ impl RbLazyFrame {
727
776
  }
728
777
 
729
778
  pub fn with_column(&self, expr: &RbExpr) -> Self {
730
- let ldf = self.ldf.borrow().clone();
779
+ let ldf = self.ldf.read().clone();
731
780
  ldf.with_column(expr.inner.clone()).into()
732
781
  }
733
782
 
734
783
  pub fn with_columns(&self, exprs: RArray) -> RbResult<Self> {
735
- let ldf = self.ldf.borrow().clone();
736
- Ok(ldf.with_columns(rb_exprs_to_exprs(exprs)?).into())
784
+ let ldf = self.ldf.read().clone();
785
+ Ok(ldf.with_columns(exprs.to_exprs()?).into())
737
786
  }
738
787
 
739
788
  pub fn with_columns_seq(&self, exprs: RArray) -> RbResult<Self> {
740
- let ldf = self.ldf.borrow().clone();
741
- Ok(ldf.with_columns_seq(rb_exprs_to_exprs(exprs)?).into())
789
+ let ldf = self.ldf.read().clone();
790
+ Ok(ldf.with_columns_seq(exprs.to_exprs()?).into())
742
791
  }
743
792
 
744
793
  pub fn rename(&self, existing: Vec<String>, new: Vec<String>, strict: bool) -> Self {
745
- let ldf = self.ldf.borrow().clone();
794
+ let ldf = self.ldf.read().clone();
746
795
  ldf.rename(existing, new, strict).into()
747
796
  }
748
797
 
749
798
  pub fn reverse(&self) -> Self {
750
- let ldf = self.ldf.borrow().clone();
799
+ let ldf = self.ldf.read().clone();
751
800
  ldf.reverse().into()
752
801
  }
753
802
 
754
803
  pub fn shift(&self, n: &RbExpr, fill_value: Option<&RbExpr>) -> Self {
755
- let lf = self.ldf.borrow().clone();
804
+ let lf = self.ldf.read().clone();
756
805
  let out = match fill_value {
757
806
  Some(v) => lf.shift_and_fill(n.inner.clone(), v.inner.clone()),
758
807
  None => lf.shift(n.inner.clone()),
@@ -761,68 +810,64 @@ impl RbLazyFrame {
761
810
  }
762
811
 
763
812
  pub fn fill_nan(&self, fill_value: &RbExpr) -> Self {
764
- let ldf = self.ldf.borrow().clone();
813
+ let ldf = self.ldf.read().clone();
765
814
  ldf.fill_nan(fill_value.inner.clone()).into()
766
815
  }
767
816
 
768
817
  pub fn min(&self) -> Self {
769
- let ldf = self.ldf.borrow().clone();
818
+ let ldf = self.ldf.read().clone();
770
819
  let out = ldf.min();
771
820
  out.into()
772
821
  }
773
822
 
774
823
  pub fn max(&self) -> Self {
775
- let ldf = self.ldf.borrow().clone();
824
+ let ldf = self.ldf.read().clone();
776
825
  let out = ldf.max();
777
826
  out.into()
778
827
  }
779
828
 
780
829
  pub fn sum(&self) -> Self {
781
- let ldf = self.ldf.borrow().clone();
830
+ let ldf = self.ldf.read().clone();
782
831
  let out = ldf.sum();
783
832
  out.into()
784
833
  }
785
834
 
786
835
  pub fn mean(&self) -> Self {
787
- let ldf = self.ldf.borrow().clone();
836
+ let ldf = self.ldf.read().clone();
788
837
  let out = ldf.mean();
789
838
  out.into()
790
839
  }
791
840
 
792
841
  pub fn std(&self, ddof: u8) -> Self {
793
- let ldf = self.ldf.borrow().clone();
842
+ let ldf = self.ldf.read().clone();
794
843
  let out = ldf.std(ddof);
795
844
  out.into()
796
845
  }
797
846
 
798
847
  pub fn var(&self, ddof: u8) -> Self {
799
- let ldf = self.ldf.borrow().clone();
848
+ let ldf = self.ldf.read().clone();
800
849
  let out = ldf.var(ddof);
801
850
  out.into()
802
851
  }
803
852
 
804
853
  pub fn median(&self) -> Self {
805
- let ldf = self.ldf.borrow().clone();
854
+ let ldf = self.ldf.read().clone();
806
855
  let out = ldf.median();
807
856
  out.into()
808
857
  }
809
858
 
810
859
  pub fn quantile(&self, quantile: &RbExpr, interpolation: Wrap<QuantileMethod>) -> Self {
811
- let ldf = self.ldf.borrow().clone();
860
+ let ldf = self.ldf.read().clone();
812
861
  let out = ldf.quantile(quantile.inner.clone(), interpolation.0);
813
862
  out.into()
814
863
  }
815
864
 
816
865
  pub fn explode(&self, subset: &RbSelector) -> Self {
817
- self.ldf
818
- .borrow()
819
- .clone()
820
- .explode(subset.inner.clone())
821
- .into()
866
+ self.ldf.read().clone().explode(subset.inner.clone()).into()
822
867
  }
823
868
 
824
869
  pub fn null_count(&self) -> Self {
825
- let ldf = self.ldf.borrow().clone();
870
+ let ldf = self.ldf.read().clone();
826
871
  ldf.null_count().into()
827
872
  }
828
873
 
@@ -832,7 +877,7 @@ impl RbLazyFrame {
832
877
  subset: Option<&RbSelector>,
833
878
  keep: Wrap<UniqueKeepStrategy>,
834
879
  ) -> RbResult<Self> {
835
- let ldf = self.ldf.borrow().clone();
880
+ let ldf = self.ldf.read().clone();
836
881
  let subset = subset.map(|e| e.inner.clone());
837
882
  Ok(match maintain_order {
838
883
  true => ldf.unique_stable_generic(subset, keep.0),
@@ -843,7 +888,7 @@ impl RbLazyFrame {
843
888
 
844
889
  pub fn drop_nans(&self, subset: Option<&RbSelector>) -> Self {
845
890
  self.ldf
846
- .borrow()
891
+ .read()
847
892
  .clone()
848
893
  .drop_nans(subset.map(|e| e.inner.clone()))
849
894
  .into()
@@ -851,19 +896,19 @@ impl RbLazyFrame {
851
896
 
852
897
  pub fn drop_nulls(&self, subset: Option<&RbSelector>) -> Self {
853
898
  self.ldf
854
- .borrow()
899
+ .read()
855
900
  .clone()
856
901
  .drop_nulls(subset.map(|e| e.inner.clone()))
857
902
  .into()
858
903
  }
859
904
 
860
905
  pub fn slice(&self, offset: i64, len: Option<IdxSize>) -> Self {
861
- let ldf = self.ldf.borrow().clone();
906
+ let ldf = self.ldf.read().clone();
862
907
  ldf.slice(offset, len.unwrap_or(IdxSize::MAX)).into()
863
908
  }
864
909
 
865
910
  pub fn tail(&self, n: IdxSize) -> Self {
866
- let ldf = self.ldf.borrow().clone();
911
+ let ldf = self.ldf.read().clone();
867
912
  ldf.tail(n).into()
868
913
  }
869
914
 
@@ -881,17 +926,17 @@ impl RbLazyFrame {
881
926
  variable_name: variable_name.map(|s| s.into()),
882
927
  };
883
928
 
884
- let ldf = self.ldf.borrow().clone();
929
+ let ldf = self.ldf.read().clone();
885
930
  Ok(ldf.unpivot(args).into())
886
931
  }
887
932
 
888
933
  pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> Self {
889
- let ldf = self.ldf.borrow().clone();
934
+ let ldf = self.ldf.read().clone();
890
935
  ldf.with_row_index(&name, offset).into()
891
936
  }
892
937
 
893
938
  pub fn drop(&self, columns: &RbSelector) -> Self {
894
- self.ldf.borrow().clone().drop(columns.inner.clone()).into()
939
+ self.ldf.read().clone().drop(columns.inner.clone()).into()
895
940
  }
896
941
 
897
942
  pub fn cast(&self, rb_dtypes: RHash, strict: bool) -> RbResult<Self> {
@@ -902,30 +947,26 @@ impl RbLazyFrame {
902
947
  })?;
903
948
  let mut cast_map = PlHashMap::with_capacity(dtypes.len());
904
949
  cast_map.extend(dtypes.iter().map(|(k, v)| (k.as_ref(), v.clone())));
905
- Ok(self.ldf.borrow().clone().cast(cast_map, strict).into())
950
+ Ok(self.ldf.read().clone().cast(cast_map, strict).into())
906
951
  }
907
952
 
908
953
  pub fn cast_all(&self, dtype: Wrap<DataType>, strict: bool) -> Self {
909
- self.ldf.borrow().clone().cast_all(dtype.0, strict).into()
954
+ self.ldf.read().clone().cast_all(dtype.0, strict).into()
910
955
  }
911
956
 
912
957
  pub fn clone(&self) -> Self {
913
- self.ldf.borrow().clone().into()
958
+ self.ldf.read().clone().into()
914
959
  }
915
960
 
916
- pub fn collect_schema(ruby: &Ruby, rb_self: &Self) -> RbResult<RHash> {
917
- let schema = rb_self
918
- .ldf
919
- .borrow_mut()
920
- .collect_schema()
921
- .map_err(RbPolarsErr::from)?;
961
+ pub fn collect_schema(rb: &Ruby, self_: &Self) -> RbResult<RHash> {
962
+ let schema = rb.enter_polars(|| self_.ldf.write().collect_schema())?;
922
963
 
923
- let schema_dict = ruby.hash_new();
964
+ let schema_dict = rb.hash_new();
924
965
  schema.iter_fields().for_each(|fld| {
925
966
  schema_dict
926
967
  .aset::<String, Value>(
927
968
  fld.name().to_string(),
928
- Wrap(fld.dtype().clone()).into_value_with(ruby),
969
+ Wrap(fld.dtype().clone()).into_value_with(rb),
929
970
  )
930
971
  .unwrap();
931
972
  });
@@ -934,7 +975,7 @@ impl RbLazyFrame {
934
975
 
935
976
  pub fn unnest(&self, columns: &RbSelector, separator: Option<String>) -> Self {
936
977
  self.ldf
937
- .borrow()
978
+ .read()
938
979
  .clone()
939
980
  .unnest(
940
981
  columns.inner.clone(),
@@ -944,17 +985,80 @@ impl RbLazyFrame {
944
985
  }
945
986
 
946
987
  pub fn count(&self) -> Self {
947
- let ldf = self.ldf.borrow().clone();
988
+ let ldf = self.ldf.read().clone();
948
989
  ldf.count().into()
949
990
  }
950
991
 
951
992
  pub fn merge_sorted(&self, other: &Self, key: String) -> RbResult<Self> {
952
993
  let out = self
953
994
  .ldf
954
- .borrow()
995
+ .read()
955
996
  .clone()
956
- .merge_sorted(other.ldf.borrow().clone(), &key)
997
+ .merge_sorted(other.ldf.read().clone(), &key)
998
+ .map_err(RbPolarsErr::from)?;
999
+ Ok(out.into())
1000
+ }
1001
+
1002
+ pub fn hint_sorted(
1003
+ &self,
1004
+ columns: Vec<String>,
1005
+ descending: Vec<bool>,
1006
+ nulls_last: Vec<bool>,
1007
+ ) -> RbResult<Self> {
1008
+ if columns.len() != descending.len() && descending.len() != 1 {
1009
+ return Err(RbValueError::new_err(
1010
+ "`set_sorted` expects the same amount of `columns` as `descending` values.",
1011
+ ));
1012
+ }
1013
+ if columns.len() != nulls_last.len() && nulls_last.len() != 1 {
1014
+ return Err(RbValueError::new_err(
1015
+ "`set_sorted` expects the same amount of `columns` as `nulls_last` values.",
1016
+ ));
1017
+ }
1018
+
1019
+ let mut sorted = columns
1020
+ .iter()
1021
+ .map(|c| Sorted {
1022
+ column: PlSmallStr::from_str(c.as_str()),
1023
+ descending: false,
1024
+ nulls_last: false,
1025
+ })
1026
+ .collect::<Vec<_>>();
1027
+
1028
+ if !columns.is_empty() {
1029
+ if descending.len() != 1 {
1030
+ sorted
1031
+ .iter_mut()
1032
+ .zip(descending)
1033
+ .for_each(|(s, d)| s.descending = d);
1034
+ } else if descending[0] {
1035
+ sorted.iter_mut().for_each(|s| s.descending = true);
1036
+ }
1037
+
1038
+ if nulls_last.len() != 1 {
1039
+ sorted
1040
+ .iter_mut()
1041
+ .zip(nulls_last)
1042
+ .for_each(|(s, d)| s.nulls_last = d);
1043
+ } else if nulls_last[0] {
1044
+ sorted.iter_mut().for_each(|s| s.nulls_last = true);
1045
+ }
1046
+ }
1047
+
1048
+ let out = self
1049
+ .ldf
1050
+ .read()
1051
+ .clone()
1052
+ .hint(HintIR::Sorted(sorted.into()))
957
1053
  .map_err(RbPolarsErr::from)?;
958
1054
  Ok(out.into())
959
1055
  }
960
1056
  }
1057
+
1058
+ impl TryConvert for Wrap<polars_io::parquet::write::ParquetFieldOverwrites> {
1059
+ fn try_convert(_ob: Value) -> RbResult<Self> {
1060
+ todo!();
1061
+ }
1062
+ }
1063
+
1064
+ unsafe impl TryConvertOwned for Wrap<polars_io::parquet::write::ParquetFieldOverwrites> {}