polars-df 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +127 -1
  3. data/Cargo.lock +72 -58
  4. data/README.md +31 -27
  5. data/ext/polars/Cargo.toml +15 -6
  6. data/ext/polars/src/batched_csv.rs +35 -39
  7. data/ext/polars/src/c_api/allocator.rs +7 -0
  8. data/ext/polars/src/c_api/mod.rs +1 -0
  9. data/ext/polars/src/catalog/unity.rs +123 -101
  10. data/ext/polars/src/conversion/any_value.rs +13 -17
  11. data/ext/polars/src/conversion/chunked_array.rs +5 -5
  12. data/ext/polars/src/conversion/datetime.rs +3 -2
  13. data/ext/polars/src/conversion/mod.rs +50 -45
  14. data/ext/polars/src/dataframe/export.rs +13 -13
  15. data/ext/polars/src/dataframe/general.rs +223 -223
  16. data/ext/polars/src/dataframe/io.rs +27 -141
  17. data/ext/polars/src/dataframe/mod.rs +13 -5
  18. data/ext/polars/src/dataframe/serde.rs +1 -1
  19. data/ext/polars/src/error.rs +44 -7
  20. data/ext/polars/src/exceptions.rs +45 -12
  21. data/ext/polars/src/expr/array.rs +12 -0
  22. data/ext/polars/src/expr/datatype.rs +2 -2
  23. data/ext/polars/src/expr/datetime.rs +4 -5
  24. data/ext/polars/src/expr/general.rs +49 -13
  25. data/ext/polars/src/expr/list.rs +4 -0
  26. data/ext/polars/src/expr/meta.rs +8 -3
  27. data/ext/polars/src/expr/mod.rs +22 -6
  28. data/ext/polars/src/expr/name.rs +19 -8
  29. data/ext/polars/src/expr/rolling.rs +50 -1
  30. data/ext/polars/src/expr/string.rs +0 -1
  31. data/ext/polars/src/expr/struct.rs +7 -2
  32. data/ext/polars/src/file.rs +136 -103
  33. data/ext/polars/src/functions/aggregation.rs +9 -8
  34. data/ext/polars/src/functions/io.rs +81 -10
  35. data/ext/polars/src/functions/lazy.rs +95 -21
  36. data/ext/polars/src/functions/mod.rs +2 -0
  37. data/ext/polars/src/functions/range.rs +19 -3
  38. data/ext/polars/src/functions/strings.rs +6 -0
  39. data/ext/polars/src/functions/utils.rs +6 -0
  40. data/ext/polars/src/interop/arrow/mod.rs +50 -1
  41. data/ext/polars/src/interop/arrow/{to_ruby.rs → to_rb.rs} +30 -0
  42. data/ext/polars/src/interop/arrow/to_rust.rs +43 -0
  43. data/ext/polars/src/interop/numo/to_numo_df.rs +1 -1
  44. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  45. data/ext/polars/src/lazyframe/exitable.rs +39 -0
  46. data/ext/polars/src/lazyframe/general.rs +340 -236
  47. data/ext/polars/src/lazyframe/mod.rs +46 -10
  48. data/ext/polars/src/lazyframe/optflags.rs +5 -4
  49. data/ext/polars/src/lazyframe/serde.rs +11 -3
  50. data/ext/polars/src/lazyframe/sink.rs +10 -5
  51. data/ext/polars/src/lazygroupby.rs +6 -7
  52. data/ext/polars/src/lib.rs +141 -76
  53. data/ext/polars/src/map/dataframe.rs +12 -12
  54. data/ext/polars/src/map/lazy.rs +7 -5
  55. data/ext/polars/src/map/mod.rs +15 -8
  56. data/ext/polars/src/map/series.rs +3 -3
  57. data/ext/polars/src/on_startup.rs +16 -8
  58. data/ext/polars/src/prelude.rs +1 -0
  59. data/ext/polars/src/rb_modules.rs +19 -49
  60. data/ext/polars/src/series/aggregation.rs +79 -140
  61. data/ext/polars/src/series/arithmetic.rs +16 -22
  62. data/ext/polars/src/series/comparison.rs +101 -222
  63. data/ext/polars/src/series/construction.rs +17 -18
  64. data/ext/polars/src/series/export.rs +1 -1
  65. data/ext/polars/src/series/general.rs +254 -289
  66. data/ext/polars/src/series/import.rs +17 -0
  67. data/ext/polars/src/series/map.rs +178 -160
  68. data/ext/polars/src/series/mod.rs +28 -12
  69. data/ext/polars/src/series/scatter.rs +12 -9
  70. data/ext/polars/src/sql.rs +16 -9
  71. data/ext/polars/src/testing/frame.rs +31 -0
  72. data/ext/polars/src/testing/mod.rs +5 -0
  73. data/ext/polars/src/testing/series.rs +31 -0
  74. data/ext/polars/src/timeout.rs +105 -0
  75. data/ext/polars/src/utils.rs +159 -1
  76. data/lib/polars/array_expr.rb +81 -12
  77. data/lib/polars/array_name_space.rb +74 -7
  78. data/lib/polars/batched_csv_reader.rb +21 -21
  79. data/lib/polars/binary_name_space.rb +1 -1
  80. data/lib/polars/cat_expr.rb +7 -7
  81. data/lib/polars/config.rb +1 -1
  82. data/lib/polars/convert.rb +189 -34
  83. data/lib/polars/data_frame.rb +1066 -831
  84. data/lib/polars/data_frame_plot.rb +173 -0
  85. data/lib/polars/data_type_group.rb +1 -0
  86. data/lib/polars/data_types.rb +31 -12
  87. data/lib/polars/date_time_expr.rb +51 -69
  88. data/lib/polars/date_time_name_space.rb +80 -112
  89. data/lib/polars/dynamic_group_by.rb +7 -7
  90. data/lib/polars/exceptions.rb +50 -10
  91. data/lib/polars/expr.rb +470 -517
  92. data/lib/polars/functions/aggregation/horizontal.rb +0 -1
  93. data/lib/polars/functions/aggregation/vertical.rb +2 -3
  94. data/lib/polars/functions/as_datatype.rb +290 -8
  95. data/lib/polars/functions/eager.rb +204 -10
  96. data/lib/polars/functions/escape_regex.rb +21 -0
  97. data/lib/polars/functions/lazy.rb +409 -169
  98. data/lib/polars/functions/lit.rb +17 -1
  99. data/lib/polars/functions/range/int_range.rb +74 -2
  100. data/lib/polars/functions/range/linear_space.rb +77 -0
  101. data/lib/polars/functions/range/time_range.rb +1 -1
  102. data/lib/polars/functions/repeat.rb +3 -12
  103. data/lib/polars/functions/whenthen.rb +2 -2
  104. data/lib/polars/group_by.rb +72 -20
  105. data/lib/polars/iceberg_dataset.rb +1 -6
  106. data/lib/polars/in_process_query.rb +37 -0
  107. data/lib/polars/io/cloud.rb +18 -0
  108. data/lib/polars/io/csv.rb +265 -126
  109. data/lib/polars/io/database.rb +0 -1
  110. data/lib/polars/io/delta.rb +15 -7
  111. data/lib/polars/io/ipc.rb +24 -17
  112. data/lib/polars/io/ndjson.rb +161 -24
  113. data/lib/polars/io/parquet.rb +101 -38
  114. data/lib/polars/lazy_frame.rb +849 -558
  115. data/lib/polars/lazy_group_by.rb +327 -2
  116. data/lib/polars/list_expr.rb +94 -16
  117. data/lib/polars/list_name_space.rb +88 -24
  118. data/lib/polars/meta_expr.rb +42 -1
  119. data/lib/polars/name_expr.rb +41 -4
  120. data/lib/polars/query_opt_flags.rb +198 -2
  121. data/lib/polars/rolling_group_by.rb +3 -3
  122. data/lib/polars/schema.rb +21 -3
  123. data/lib/polars/selector.rb +37 -2
  124. data/lib/polars/selectors.rb +45 -9
  125. data/lib/polars/series.rb +1156 -728
  126. data/lib/polars/series_plot.rb +72 -0
  127. data/lib/polars/slice.rb +1 -1
  128. data/lib/polars/sql_context.rb +11 -4
  129. data/lib/polars/string_expr.rb +59 -68
  130. data/lib/polars/string_name_space.rb +51 -87
  131. data/lib/polars/struct_expr.rb +36 -18
  132. data/lib/polars/testing.rb +24 -273
  133. data/lib/polars/utils/constants.rb +2 -0
  134. data/lib/polars/utils/construction/data_frame.rb +410 -0
  135. data/lib/polars/utils/construction/series.rb +364 -0
  136. data/lib/polars/utils/construction/utils.rb +9 -0
  137. data/lib/polars/utils/deprecation.rb +11 -0
  138. data/lib/polars/utils/serde.rb +8 -3
  139. data/lib/polars/utils/unstable.rb +19 -0
  140. data/lib/polars/utils/various.rb +59 -0
  141. data/lib/polars/utils.rb +46 -47
  142. data/lib/polars/version.rb +1 -1
  143. data/lib/polars.rb +47 -1
  144. metadata +25 -6
  145. data/ext/polars/src/allocator.rs +0 -13
  146. data/lib/polars/plot.rb +0 -109
@@ -23,28 +23,29 @@ impl RbDataFrame {
23
23
  let ignore_errors = bool::try_convert(arguments[4])?;
24
24
  let n_rows = Option::<usize>::try_convert(arguments[5])?;
25
25
  let skip_rows = usize::try_convert(arguments[6])?;
26
- let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
27
- let separator = String::try_convert(arguments[8])?;
28
- let rechunk = bool::try_convert(arguments[9])?;
29
- let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
30
- let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
31
- let n_threads = Option::<usize>::try_convert(arguments[12])?;
32
- let path = Option::<String>::try_convert(arguments[13])?;
33
- let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
34
- let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
35
- let low_memory = bool::try_convert(arguments[16])?;
36
- let comment_prefix = Option::<String>::try_convert(arguments[17])?;
37
- let quote_char = Option::<String>::try_convert(arguments[18])?;
38
- let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
39
- let missing_utf8_is_empty_string = bool::try_convert(arguments[20])?;
40
- let try_parse_dates = bool::try_convert(arguments[21])?;
41
- let skip_rows_after_header = usize::try_convert(arguments[22])?;
42
- let row_index = Option::<(String, IdxSize)>::try_convert(arguments[23])?;
43
- let eol_char = String::try_convert(arguments[24])?;
44
- let raise_if_empty = bool::try_convert(arguments[25])?;
45
- let truncate_ragged_lines = bool::try_convert(arguments[26])?;
46
- let decimal_comma = bool::try_convert(arguments[27])?;
47
- let schema = Option::<Wrap<Schema>>::try_convert(arguments[28])?;
26
+ let skip_lines = usize::try_convert(arguments[7])?;
27
+ let projection = Option::<Vec<usize>>::try_convert(arguments[8])?;
28
+ let separator = String::try_convert(arguments[9])?;
29
+ let rechunk = bool::try_convert(arguments[10])?;
30
+ let columns = Option::<Vec<String>>::try_convert(arguments[11])?;
31
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[12])?;
32
+ let n_threads = Option::<usize>::try_convert(arguments[13])?;
33
+ let path = Option::<String>::try_convert(arguments[14])?;
34
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[15])?;
35
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[16])?;
36
+ let low_memory = bool::try_convert(arguments[17])?;
37
+ let comment_prefix = Option::<String>::try_convert(arguments[18])?;
38
+ let quote_char = Option::<String>::try_convert(arguments[19])?;
39
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[20])?;
40
+ let missing_utf8_is_empty_string = bool::try_convert(arguments[21])?;
41
+ let try_parse_dates = bool::try_convert(arguments[22])?;
42
+ let skip_rows_after_header = usize::try_convert(arguments[23])?;
43
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[24])?;
44
+ let eol_char = String::try_convert(arguments[25])?;
45
+ let raise_if_empty = bool::try_convert(arguments[26])?;
46
+ let truncate_ragged_lines = bool::try_convert(arguments[27])?;
47
+ let decimal_comma = bool::try_convert(arguments[28])?;
48
+ let schema = Option::<Wrap<Schema>>::try_convert(arguments[29])?;
48
49
  // end arguments
49
50
 
50
51
  let null_values = null_values.map(|w| w.0);
@@ -88,6 +89,7 @@ impl RbDataFrame {
88
89
  .with_has_header(has_header)
89
90
  .with_n_rows(n_rows)
90
91
  .with_skip_rows(skip_rows)
92
+ .with_skip_lines(skip_lines)
91
93
  .with_ignore_errors(ignore_errors)
92
94
  .with_projection(projection.map(Arc::new))
93
95
  .with_rechunk(rechunk)
@@ -145,33 +147,6 @@ impl RbDataFrame {
145
147
  Ok(out.into())
146
148
  }
147
149
 
148
- pub fn read_ndjson(
149
- rb_f: Value,
150
- ignore_errors: bool,
151
- schema: Option<Wrap<Schema>>,
152
- schema_overrides: Option<Wrap<Schema>>,
153
- ) -> RbResult<Self> {
154
- let rb_f = read_if_bytesio(rb_f);
155
- let mmap_bytes_r = get_mmap_bytes_reader(&rb_f)?;
156
-
157
- let mut builder = JsonReader::new(mmap_bytes_r)
158
- .with_json_format(JsonFormat::JsonLines)
159
- .with_ignore_errors(ignore_errors);
160
-
161
- if let Some(schema) = schema {
162
- builder = builder.with_schema(Arc::new(schema.0));
163
- }
164
-
165
- if let Some(schema) = schema_overrides.as_ref() {
166
- builder = builder.with_schema_overwrite(&schema.0);
167
- }
168
-
169
- let out = builder
170
- .finish()
171
- .map_err(|e| RbPolarsErr::Other(format!("{e}")))?;
172
- Ok(out.into())
173
- }
174
-
175
150
  pub fn read_ipc(
176
151
  rb_f: Value,
177
152
  columns: Option<Vec<String>>,
@@ -242,101 +217,12 @@ impl RbDataFrame {
242
217
  Ok(RbDataFrame::new(df))
243
218
  }
244
219
 
245
- #[allow(clippy::too_many_arguments)]
246
- pub fn write_csv(
247
- &self,
248
- rb_f: Value,
249
- include_header: bool,
250
- separator: u8,
251
- quote_char: u8,
252
- batch_size: NonZeroUsize,
253
- datetime_format: Option<String>,
254
- date_format: Option<String>,
255
- time_format: Option<String>,
256
- float_precision: Option<usize>,
257
- null_value: Option<String>,
258
- ) -> RbResult<()> {
259
- let null = null_value.unwrap_or_default();
260
- let mut buf = get_file_like(rb_f, true)?;
261
- CsvWriter::new(&mut buf)
262
- .include_header(include_header)
263
- .with_separator(separator)
264
- .with_quote_char(quote_char)
265
- .with_batch_size(batch_size)
266
- .with_datetime_format(datetime_format)
267
- .with_date_format(date_format)
268
- .with_time_format(time_format)
269
- .with_float_precision(float_precision)
270
- .with_null_value(null)
271
- .finish(&mut self.df.borrow_mut())
272
- .map_err(RbPolarsErr::from)?;
273
- Ok(())
274
- }
275
-
276
- pub fn write_parquet(
277
- &self,
278
- rb_f: Value,
279
- compression: String,
280
- compression_level: Option<i32>,
281
- statistics: Wrap<StatisticsOptions>,
282
- row_group_size: Option<usize>,
283
- data_page_size: Option<usize>,
284
- ) -> RbResult<()> {
285
- let compression = parse_parquet_compression(&compression, compression_level)?;
286
-
287
- let buf = get_file_like(rb_f, true)?;
288
- ParquetWriter::new(buf)
289
- .with_compression(compression)
290
- .with_statistics(statistics.0)
291
- .with_row_group_size(row_group_size)
292
- .with_data_page_size(data_page_size)
293
- .finish(&mut self.df.borrow_mut())
294
- .map_err(RbPolarsErr::from)?;
295
- Ok(())
296
- }
297
-
298
220
  pub fn write_json(&self, rb_f: Value) -> RbResult<()> {
299
221
  let file = BufWriter::new(get_file_like(rb_f, true)?);
300
222
 
301
223
  JsonWriter::new(file)
302
224
  .with_json_format(JsonFormat::Json)
303
- .finish(&mut self.df.borrow_mut())
304
- .map_err(RbPolarsErr::from)?;
305
- Ok(())
306
- }
307
-
308
- pub fn write_ndjson(&self, rb_f: Value) -> RbResult<()> {
309
- let file = BufWriter::new(get_file_like(rb_f, true)?);
310
-
311
- JsonWriter::new(file)
312
- .with_json_format(JsonFormat::JsonLines)
313
- .finish(&mut self.df.borrow_mut())
314
- .map_err(RbPolarsErr::from)?;
315
-
316
- Ok(())
317
- }
318
-
319
- pub fn write_ipc(
320
- &self,
321
- rb_f: Value,
322
- compression: Wrap<Option<IpcCompression>>,
323
- compat_level: RbCompatLevel,
324
- cloud_options: Option<Vec<(String, String)>>,
325
- retries: usize,
326
- ) -> RbResult<()> {
327
- let cloud_options = if let Ok(path) = String::try_convert(rb_f) {
328
- let cloud_options = parse_cloud_options(&path, cloud_options.unwrap_or_default())?;
329
- Some(cloud_options.with_max_retries(retries))
330
- } else {
331
- None
332
- };
333
-
334
- let f = crate::file::try_get_writeable(rb_f, cloud_options.as_ref())?;
335
-
336
- IpcWriter::new(f)
337
- .with_compression(compression.0)
338
- .with_compat_level(compat_level.0)
339
- .finish(&mut self.df.borrow_mut())
225
+ .finish(&mut self.df.write())
340
226
  .map_err(RbPolarsErr::from)?;
341
227
  Ok(())
342
228
  }
@@ -351,7 +237,7 @@ impl RbDataFrame {
351
237
  IpcStreamWriter::new(&mut buf)
352
238
  .with_compression(compression.0)
353
239
  .with_compat_level(compat_level.0)
354
- .finish(&mut self.df.borrow_mut())
240
+ .finish(&mut self.df.write())
355
241
  .map_err(RbPolarsErr::from)?;
356
242
  Ok(())
357
243
  }
@@ -367,7 +253,7 @@ impl RbDataFrame {
367
253
  AvroWriter::new(&mut buf)
368
254
  .with_compression(compression.0)
369
255
  .with_name(name)
370
- .finish(&mut self.df.borrow_mut())
256
+ .finish(&mut self.df.write())
371
257
  .map_err(RbPolarsErr::from)?;
372
258
  Ok(())
373
259
  }
@@ -5,27 +5,35 @@ mod io;
5
5
  mod serde;
6
6
 
7
7
  use magnus::{DataTypeFunctions, TypedData, gc};
8
+ use parking_lot::RwLock;
8
9
  use polars::prelude::*;
9
- use std::cell::RefCell;
10
10
 
11
11
  use crate::series::mark_series;
12
12
 
13
13
  #[derive(TypedData)]
14
14
  #[magnus(class = "Polars::RbDataFrame", mark)]
15
15
  pub struct RbDataFrame {
16
- pub df: RefCell<DataFrame>,
16
+ pub df: RwLock<DataFrame>,
17
+ }
18
+
19
+ impl Clone for RbDataFrame {
20
+ fn clone(&self) -> Self {
21
+ RbDataFrame {
22
+ df: RwLock::new(self.df.read().clone()),
23
+ }
24
+ }
17
25
  }
18
26
 
19
27
  impl From<DataFrame> for RbDataFrame {
20
28
  fn from(df: DataFrame) -> Self {
21
- RbDataFrame::new(df)
29
+ Self::new(df)
22
30
  }
23
31
  }
24
32
 
25
33
  impl RbDataFrame {
26
34
  pub fn new(df: DataFrame) -> Self {
27
35
  RbDataFrame {
28
- df: RefCell::new(df),
36
+ df: RwLock::new(df),
29
37
  }
30
38
  }
31
39
  }
@@ -36,7 +44,7 @@ impl DataTypeFunctions for RbDataFrame {
36
44
  // currently, this should only happen for write_* methods,
37
45
  // which should refuse to write Object datatype, and therefore be safe,
38
46
  // since GC will not have a chance to run
39
- if let Ok(df) = self.df.try_borrow() {
47
+ if let Some(df) = self.df.try_read() {
40
48
  for column in df.get_columns() {
41
49
  if let DataType::Object(_) = column.dtype() {
42
50
  match column {
@@ -14,7 +14,7 @@ impl RbDataFrame {
14
14
 
15
15
  Ok(self
16
16
  .df
17
- .borrow_mut()
17
+ .write()
18
18
  .serialize_into_writer(&mut writer)
19
19
  .map_err(RbPolarsErr::from)?)
20
20
  }
@@ -3,11 +3,17 @@ use std::fmt::{Debug, Formatter};
3
3
  use magnus::Error;
4
4
  use polars::prelude::PolarsError;
5
5
 
6
- use crate::exceptions::{ComputeError, InvalidOperationError};
7
- use crate::rb_modules;
6
+ use crate::RbErr;
7
+ use crate::exceptions::{
8
+ AssertionError, ColumnNotFoundError, ComputeError, DuplicateError, InvalidOperationError,
9
+ NoDataError, OutOfBoundsError, RbIOError, RbRuntimeError, SQLInterfaceError, SQLSyntaxError,
10
+ SchemaError, SchemaFieldNotFoundError, ShapeError, StringCacheMismatchError,
11
+ StructFieldNotFoundError,
12
+ };
8
13
 
9
14
  pub enum RbPolarsErr {
10
15
  Polars(PolarsError),
16
+ Ruby(RbErr),
11
17
  Other(String),
12
18
  }
13
19
 
@@ -17,9 +23,9 @@ impl From<PolarsError> for RbPolarsErr {
17
23
  }
18
24
  }
19
25
 
20
- impl From<std::io::Error> for RbPolarsErr {
21
- fn from(value: std::io::Error) -> Self {
22
- RbPolarsErr::Other(format!("{value:?}"))
26
+ impl From<RbErr> for RbPolarsErr {
27
+ fn from(err: RbErr) -> Self {
28
+ RbPolarsErr::Ruby(err)
23
29
  }
24
30
  }
25
31
 
@@ -27,13 +33,43 @@ impl From<RbPolarsErr> for Error {
27
33
  fn from(err: RbPolarsErr) -> Self {
28
34
  match err {
29
35
  RbPolarsErr::Polars(err) => match err {
36
+ PolarsError::AssertionError(err) => AssertionError::new_err(err.to_string()),
37
+ PolarsError::ColumnNotFound(name) => ColumnNotFoundError::new_err(name.to_string()),
30
38
  PolarsError::ComputeError(err) => ComputeError::new_err(err.to_string()),
39
+ PolarsError::Duplicate(err) => DuplicateError::new_err(err.to_string()),
31
40
  PolarsError::InvalidOperation(err) => {
32
41
  InvalidOperationError::new_err(err.to_string())
33
42
  }
34
- _ => Error::new(rb_modules::error(), err.to_string()),
43
+ PolarsError::IO { error, msg } => {
44
+ let msg = if let Some(msg) = msg {
45
+ msg.to_string()
46
+ } else {
47
+ error.to_string()
48
+ };
49
+ RbIOError::new_err(msg)
50
+ }
51
+ PolarsError::NoData(err) => NoDataError::new_err(err.to_string()),
52
+ PolarsError::OutOfBounds(err) => OutOfBoundsError::new_err(err.to_string()),
53
+ PolarsError::SQLInterface(name) => SQLInterfaceError::new_err(name.to_string()),
54
+ PolarsError::SQLSyntax(name) => SQLSyntaxError::new_err(name.to_string()),
55
+ PolarsError::SchemaFieldNotFound(name) => {
56
+ SchemaFieldNotFoundError::new_err(name.to_string())
57
+ }
58
+ PolarsError::SchemaMismatch(err) => SchemaError::new_err(err.to_string()),
59
+ PolarsError::ShapeMismatch(err) => ShapeError::new_err(err.to_string()),
60
+ PolarsError::StringCacheMismatch(err) => {
61
+ StringCacheMismatchError::new_err(err.to_string())
62
+ }
63
+ PolarsError::StructFieldNotFound(name) => {
64
+ StructFieldNotFoundError::new_err(name.to_string())
65
+ }
66
+ PolarsError::Context { .. } => {
67
+ let tmp = RbPolarsErr::Polars(err.context_trace());
68
+ RbErr::from(tmp)
69
+ }
35
70
  },
36
- RbPolarsErr::Other(err) => Error::new(rb_modules::error(), err.to_string()),
71
+ RbPolarsErr::Ruby(err) => err,
72
+ err => RbRuntimeError::new_err(format!("{:?}", &err)),
37
73
  }
38
74
  }
39
75
  }
@@ -43,6 +79,7 @@ impl Debug for RbPolarsErr {
43
79
  use RbPolarsErr::*;
44
80
  match self {
45
81
  Polars(err) => write!(f, "{err:?}"),
82
+ Ruby(err) => write!(f, "{err:?}"),
46
83
  Other(err) => write!(f, "BindingsError: {err:?}"),
47
84
  }
48
85
  }
@@ -1,9 +1,9 @@
1
1
  use crate::rb_modules;
2
- use magnus::{Error, Ruby};
2
+ use magnus::{Error, Module, Ruby};
3
3
  use std::borrow::Cow;
4
4
 
5
5
  macro_rules! create_exception {
6
- ($type:ident, $cls:expr) => {
6
+ ($type:ident) => {
7
7
  pub struct $type {}
8
8
 
9
9
  impl $type {
@@ -11,18 +11,51 @@ macro_rules! create_exception {
11
11
  where
12
12
  T: Into<Cow<'static, str>>,
13
13
  {
14
- Error::new($cls, message)
14
+ let ruby = Ruby::get().unwrap();
15
+ let cls = rb_modules::polars(&ruby)
16
+ .const_get(stringify!($type))
17
+ .unwrap();
18
+ Error::new(cls, message)
15
19
  }
16
20
  }
17
21
  };
18
22
  }
19
23
 
20
- create_exception!(RbTypeError, Ruby::get().unwrap().exception_type_error());
21
- create_exception!(RbValueError, Ruby::get().unwrap().exception_arg_error());
22
- create_exception!(
23
- RbOverflowError,
24
- Ruby::get().unwrap().exception_range_error()
25
- );
26
- create_exception!(RbIndexError, Ruby::get().unwrap().exception_index_error());
27
- create_exception!(ComputeError, rb_modules::compute_error());
28
- create_exception!(InvalidOperationError, rb_modules::invalid_operation_error());
24
+ create_exception!(AssertionError);
25
+ create_exception!(ColumnNotFoundError);
26
+ create_exception!(ComputeError);
27
+ create_exception!(DuplicateError);
28
+ create_exception!(InvalidOperationError);
29
+ create_exception!(NoDataError);
30
+ create_exception!(OutOfBoundsError);
31
+ create_exception!(SQLInterfaceError);
32
+ create_exception!(SQLSyntaxError);
33
+ create_exception!(SchemaError);
34
+ create_exception!(SchemaFieldNotFoundError);
35
+ create_exception!(ShapeError);
36
+ create_exception!(StringCacheMismatchError);
37
+ create_exception!(StructFieldNotFoundError);
38
+
39
+ macro_rules! create_ruby_exception {
40
+ ($type:ident, $cls:ident) => {
41
+ pub struct $type {}
42
+
43
+ impl $type {
44
+ pub fn new_err<T>(message: T) -> Error
45
+ where
46
+ T: Into<Cow<'static, str>>,
47
+ {
48
+ let cls = Ruby::get().unwrap().$cls();
49
+ Error::new(cls, message)
50
+ }
51
+ }
52
+ };
53
+ }
54
+
55
+ create_ruby_exception!(RbKeyboardInterrupt, exception_interrupt);
56
+ create_ruby_exception!(RbIndexError, exception_index_error);
57
+ create_ruby_exception!(RbIOError, exception_io_error);
58
+ create_ruby_exception!(RbOverflowError, exception_range_error);
59
+ create_ruby_exception!(RbRuntimeError, exception_runtime_error);
60
+ create_ruby_exception!(RbTypeError, exception_type_error);
61
+ create_ruby_exception!(RbValueError, exception_arg_error);
@@ -159,4 +159,16 @@ impl RbExpr {
159
159
  pub fn arr_explode(&self) -> Self {
160
160
  self.inner.clone().arr().explode().into()
161
161
  }
162
+
163
+ pub fn arr_eval(&self, expr: &RbExpr, as_list: bool) -> Self {
164
+ self.inner
165
+ .clone()
166
+ .arr()
167
+ .eval(expr.inner.clone(), as_list)
168
+ .into()
169
+ }
170
+
171
+ pub fn arr_agg(&self, expr: &RbExpr) -> Self {
172
+ self.inner.clone().arr().agg(expr.inner.clone()).into()
173
+ }
162
174
  }
@@ -26,8 +26,8 @@ impl RbDataTypeExpr {
26
26
  DataTypeExpr::OfExpr(Box::new(expr.inner.clone())).into()
27
27
  }
28
28
 
29
- pub fn collect_dtype(ruby: &Ruby, rb_self: &Self, schema: Wrap<Schema>) -> RbResult<Value> {
30
- let dtype = rb_self
29
+ pub fn collect_dtype(ruby: &Ruby, self_: &Self, schema: Wrap<Schema>) -> RbResult<Value> {
30
+ let dtype = self_
31
31
  .clone()
32
32
  .inner
33
33
  .into_datatype(&schema.0)
@@ -26,10 +26,6 @@ impl RbExpr {
26
26
  self.inner.clone().dt().offset_by(by.inner.clone()).into()
27
27
  }
28
28
 
29
- pub fn dt_with_time_unit(&self, tu: Wrap<TimeUnit>) -> Self {
30
- self.inner.clone().dt().with_time_unit(tu.0).into()
31
- }
32
-
33
29
  pub fn dt_convert_time_zone(&self, time_zone: String) -> RbResult<Self> {
34
30
  Ok(self
35
31
  .inner
@@ -90,7 +86,6 @@ impl RbExpr {
90
86
  self.inner.clone().dt().round(every.inner.clone()).into()
91
87
  }
92
88
 
93
- #[allow(clippy::too_many_arguments)]
94
89
  pub fn dt_replace(
95
90
  &self,
96
91
  year: &Self,
@@ -162,6 +157,10 @@ impl RbExpr {
162
157
  self.clone().inner.dt().month().into()
163
158
  }
164
159
 
160
+ pub fn dt_days_in_month(&self) -> Self {
161
+ self.inner.clone().dt().days_in_month().into()
162
+ }
163
+
165
164
  pub fn dt_week(&self) -> Self {
166
165
  self.clone().inner.dt().week().into()
167
166
  }
@@ -4,12 +4,14 @@ use magnus::{RArray, Value};
4
4
  use polars::lazy::dsl;
5
5
  use polars::prelude::*;
6
6
  use polars::series::ops::NullBehavior;
7
+ use polars_core::chunked_array::cast::CastOptions;
7
8
  use polars_core::series::IsSorted;
8
9
 
10
+ use super::datatype::RbDataTypeExpr;
9
11
  use super::selector::RbSelector;
10
12
  use crate::conversion::{Wrap, parse_fill_null_strategy};
13
+ use crate::expr::ToExprs;
11
14
  use crate::map::lazy::map_single;
12
- use crate::rb_exprs_to_exprs;
13
15
  use crate::{RbExpr, RbPolarsErr, RbResult};
14
16
 
15
17
  impl RbExpr {
@@ -161,6 +163,10 @@ impl RbExpr {
161
163
  self.inner.clone().last().into()
162
164
  }
163
165
 
166
+ pub fn item(&self, allow_empty: bool) -> Self {
167
+ self.inner.clone().item(allow_empty).into()
168
+ }
169
+
164
170
  pub fn implode(&self) -> Self {
165
171
  self.inner.clone().implode().into()
166
172
  }
@@ -254,14 +260,20 @@ impl RbExpr {
254
260
  self.inner.clone().null_count().into()
255
261
  }
256
262
 
257
- pub fn cast(&self, data_type: Wrap<DataType>, strict: bool) -> RbResult<Self> {
258
- let dt = data_type.0;
259
- let expr = if strict {
260
- self.inner.clone().strict_cast(dt)
263
+ pub fn cast(&self, dtype: &RbDataTypeExpr, strict: bool, wrap_numerical: bool) -> Self {
264
+ let options = if wrap_numerical {
265
+ CastOptions::Overflowing
266
+ } else if strict {
267
+ CastOptions::Strict
261
268
  } else {
262
- self.inner.clone().cast(dt)
269
+ CastOptions::NonStrict
263
270
  };
264
- Ok(expr.into())
271
+
272
+ let expr = self
273
+ .inner
274
+ .clone()
275
+ .cast_with_options(dtype.inner.clone(), options);
276
+ expr.into()
265
277
  }
266
278
 
267
279
  pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self {
@@ -286,7 +298,7 @@ impl RbExpr {
286
298
  }
287
299
 
288
300
  pub fn top_k_by(&self, by: RArray, k: &Self, reverse: Vec<bool>) -> RbResult<Self> {
289
- let by = rb_exprs_to_exprs(by)?;
301
+ let by = by.to_exprs()?;
290
302
  Ok(self
291
303
  .inner
292
304
  .clone()
@@ -299,7 +311,7 @@ impl RbExpr {
299
311
  }
300
312
 
301
313
  pub fn bottom_k_by(&self, by: RArray, k: &Self, reverse: Vec<bool>) -> RbResult<Self> {
302
- let by = rb_exprs_to_exprs(by)?;
314
+ let by = by.to_exprs()?;
303
315
  Ok(self
304
316
  .inner
305
317
  .clone()
@@ -355,7 +367,7 @@ impl RbExpr {
355
367
  multithreaded: bool,
356
368
  maintain_order: bool,
357
369
  ) -> RbResult<Self> {
358
- let by = rb_exprs_to_exprs(by)?;
370
+ let by = by.to_exprs()?;
359
371
  Ok(self
360
372
  .inner
361
373
  .clone()
@@ -585,9 +597,33 @@ impl RbExpr {
585
597
  self.inner.clone().is_duplicated().into()
586
598
  }
587
599
 
588
- pub fn over(&self, partition_by: RArray) -> RbResult<Self> {
589
- let partition_by = rb_exprs_to_exprs(partition_by)?;
590
- Ok(self.inner.clone().over(partition_by).into())
600
+ pub fn over(
601
+ &self,
602
+ partition_by: Option<RArray>,
603
+ order_by: Option<RArray>,
604
+ order_by_descending: bool,
605
+ order_by_nulls_last: bool,
606
+ mapping_strategy: Wrap<WindowMapping>,
607
+ ) -> RbResult<Self> {
608
+ let partition_by = partition_by.map(|v| v.to_exprs()).transpose()?;
609
+
610
+ let order_by = order_by.map(|v| v.to_exprs()).transpose()?.map(|order_by| {
611
+ (
612
+ order_by,
613
+ SortOptions {
614
+ descending: order_by_descending,
615
+ nulls_last: order_by_nulls_last,
616
+ ..Default::default()
617
+ },
618
+ )
619
+ });
620
+
621
+ Ok(self
622
+ .inner
623
+ .clone()
624
+ .over_with_options(partition_by, order_by, mapping_strategy.0)
625
+ .map_err(RbPolarsErr::from)?
626
+ .into())
591
627
  }
592
628
 
593
629
  pub fn rolling(
@@ -47,6 +47,10 @@ impl RbExpr {
47
47
  self.inner.clone().list().eval(expr.inner.clone()).into()
48
48
  }
49
49
 
50
+ pub fn list_agg(&self, expr: &RbExpr) -> Self {
51
+ self.inner.clone().list().agg(expr.inner.clone()).into()
52
+ }
53
+
50
54
  pub fn list_filter(&self, predicate: &RbExpr) -> Self {
51
55
  self.inner
52
56
  .clone()
@@ -1,6 +1,7 @@
1
1
  use magnus::{RArray, Ruby};
2
2
  use polars::prelude::Schema;
3
3
 
4
+ use crate::expr::ToRbExprs;
4
5
  use crate::{RbExpr, RbPolarsErr, RbResult, Wrap};
5
6
 
6
7
  impl RbExpr {
@@ -8,15 +9,15 @@ impl RbExpr {
8
9
  self.inner == other.inner
9
10
  }
10
11
 
11
- pub fn meta_pop(ruby: &Ruby, rb_self: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
12
+ pub fn meta_pop(ruby: &Ruby, self_: &Self, schema: Option<Wrap<Schema>>) -> RbResult<RArray> {
12
13
  let schema = schema.as_ref().map(|s| &s.0);
13
- let exprs = rb_self
14
+ let exprs = self_
14
15
  .inner
15
16
  .clone()
16
17
  .meta()
17
18
  .pop(schema)
18
19
  .map_err(RbPolarsErr::from)?;
19
- Ok(ruby.ary_from_iter(exprs.iter().map(|e| RbExpr::from(e.clone()))))
20
+ Ok(exprs.to_rbexprs(ruby))
20
21
  }
21
22
 
22
23
  pub fn meta_root_names(&self) -> Vec<String> {
@@ -83,4 +84,8 @@ impl RbExpr {
83
84
  pub fn meta_tree_format(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
84
85
  self.compute_tree_format(false, schema)
85
86
  }
87
+
88
+ pub fn meta_show_graph(&self, schema: Option<Wrap<Schema>>) -> RbResult<String> {
89
+ self.compute_tree_format(true, schema)
90
+ }
86
91
  }