polars-df 0.20.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/Cargo.lock +192 -186
  4. data/LICENSE.txt +1 -1
  5. data/ext/polars/Cargo.toml +19 -9
  6. data/ext/polars/src/batched_csv.rs +2 -2
  7. data/ext/polars/src/catalog/mod.rs +1 -0
  8. data/ext/polars/src/catalog/unity.rs +450 -0
  9. data/ext/polars/src/conversion/any_value.rs +9 -19
  10. data/ext/polars/src/conversion/categorical.rs +30 -0
  11. data/ext/polars/src/conversion/chunked_array.rs +8 -8
  12. data/ext/polars/src/conversion/mod.rs +275 -109
  13. data/ext/polars/src/dataframe/construction.rs +2 -2
  14. data/ext/polars/src/dataframe/export.rs +2 -2
  15. data/ext/polars/src/dataframe/general.rs +4 -2
  16. data/ext/polars/src/dataframe/io.rs +2 -2
  17. data/ext/polars/src/exceptions.rs +2 -1
  18. data/ext/polars/src/expr/array.rs +73 -4
  19. data/ext/polars/src/expr/binary.rs +26 -1
  20. data/ext/polars/src/expr/bitwise.rs +39 -0
  21. data/ext/polars/src/expr/categorical.rs +20 -0
  22. data/ext/polars/src/expr/datatype.rs +37 -0
  23. data/ext/polars/src/expr/datetime.rs +58 -0
  24. data/ext/polars/src/expr/general.rs +106 -22
  25. data/ext/polars/src/expr/list.rs +45 -2
  26. data/ext/polars/src/expr/meta.rs +5 -28
  27. data/ext/polars/src/expr/mod.rs +4 -1
  28. data/ext/polars/src/expr/name.rs +10 -2
  29. data/ext/polars/src/expr/rolling.rs +21 -1
  30. data/ext/polars/src/expr/selector.rs +219 -0
  31. data/ext/polars/src/expr/string.rs +73 -6
  32. data/ext/polars/src/expr/struct.rs +9 -1
  33. data/ext/polars/src/file.rs +11 -5
  34. data/ext/polars/src/functions/io.rs +21 -11
  35. data/ext/polars/src/functions/lazy.rs +26 -54
  36. data/ext/polars/src/functions/meta.rs +2 -2
  37. data/ext/polars/src/functions/misc.rs +1 -1
  38. data/ext/polars/src/functions/string_cache.rs +4 -5
  39. data/ext/polars/src/interop/numo/numo_rs.rs +1 -1
  40. data/ext/polars/src/interop/numo/to_numo_series.rs +1 -1
  41. data/ext/polars/src/io/mod.rs +102 -0
  42. data/ext/polars/src/lazyframe/general.rs +124 -111
  43. data/ext/polars/src/lazyframe/serde.rs +1 -1
  44. data/ext/polars/src/lazyframe/sink.rs +6 -6
  45. data/ext/polars/src/lib.rs +216 -29
  46. data/ext/polars/src/map/dataframe.rs +9 -9
  47. data/ext/polars/src/map/lazy.rs +1 -1
  48. data/ext/polars/src/map/mod.rs +31 -19
  49. data/ext/polars/src/map/series.rs +9 -9
  50. data/ext/polars/src/on_startup.rs +5 -2
  51. data/ext/polars/src/rb_modules.rs +1 -1
  52. data/ext/polars/src/series/aggregation.rs +44 -0
  53. data/ext/polars/src/series/construction.rs +11 -7
  54. data/ext/polars/src/series/export.rs +6 -4
  55. data/ext/polars/src/series/general.rs +75 -210
  56. data/ext/polars/src/series/import.rs +2 -2
  57. data/ext/polars/src/series/map.rs +227 -0
  58. data/ext/polars/src/series/mod.rs +2 -1
  59. data/ext/polars/src/series/scatter.rs +1 -1
  60. data/ext/polars/src/utils.rs +10 -2
  61. data/lib/polars/array_expr.rb +382 -3
  62. data/lib/polars/array_name_space.rb +281 -0
  63. data/lib/polars/binary_expr.rb +67 -0
  64. data/lib/polars/binary_name_space.rb +43 -0
  65. data/lib/polars/cat_expr.rb +224 -0
  66. data/lib/polars/cat_name_space.rb +130 -32
  67. data/lib/polars/catalog/unity/catalog_info.rb +20 -0
  68. data/lib/polars/catalog/unity/column_info.rb +31 -0
  69. data/lib/polars/catalog/unity/namespace_info.rb +21 -0
  70. data/lib/polars/catalog/unity/table_info.rb +50 -0
  71. data/lib/polars/catalog.rb +448 -0
  72. data/lib/polars/config.rb +2 -2
  73. data/lib/polars/convert.rb +12 -2
  74. data/lib/polars/data_frame.rb +834 -48
  75. data/lib/polars/data_type_expr.rb +52 -0
  76. data/lib/polars/data_types.rb +61 -5
  77. data/lib/polars/date_time_expr.rb +251 -0
  78. data/lib/polars/date_time_name_space.rb +299 -0
  79. data/lib/polars/exceptions.rb +7 -2
  80. data/lib/polars/expr.rb +1247 -211
  81. data/lib/polars/functions/col.rb +6 -5
  82. data/lib/polars/functions/datatype.rb +21 -0
  83. data/lib/polars/functions/lazy.rb +127 -15
  84. data/lib/polars/functions/repeat.rb +4 -0
  85. data/lib/polars/io/csv.rb +19 -1
  86. data/lib/polars/io/json.rb +16 -0
  87. data/lib/polars/io/ndjson.rb +13 -0
  88. data/lib/polars/io/parquet.rb +70 -66
  89. data/lib/polars/io/scan_options.rb +47 -0
  90. data/lib/polars/lazy_frame.rb +1099 -95
  91. data/lib/polars/list_expr.rb +400 -11
  92. data/lib/polars/list_name_space.rb +321 -5
  93. data/lib/polars/meta_expr.rb +71 -22
  94. data/lib/polars/name_expr.rb +36 -0
  95. data/lib/polars/scan_cast_options.rb +64 -0
  96. data/lib/polars/schema.rb +84 -3
  97. data/lib/polars/selector.rb +210 -0
  98. data/lib/polars/selectors.rb +932 -203
  99. data/lib/polars/series.rb +1083 -63
  100. data/lib/polars/string_expr.rb +435 -9
  101. data/lib/polars/string_name_space.rb +729 -45
  102. data/lib/polars/struct_expr.rb +103 -0
  103. data/lib/polars/struct_name_space.rb +19 -1
  104. data/lib/polars/utils/parse.rb +40 -0
  105. data/lib/polars/utils/various.rb +18 -1
  106. data/lib/polars/utils.rb +9 -1
  107. data/lib/polars/version.rb +1 -1
  108. data/lib/polars.rb +10 -0
  109. metadata +20 -2
@@ -1,13 +1,13 @@
1
1
  use magnus::encoding::{self, EncodingCapable};
2
2
  use magnus::{
3
- class, prelude::*, typed_data::Obj, value::Opaque, Float, Integer, RArray, RString, Ruby, Value,
3
+ Float, Integer, RArray, RString, Ruby, Value, class, prelude::*, typed_data::Obj, value::Opaque,
4
4
  };
5
5
  use polars::lazy::dsl;
6
6
  use polars::prelude::*;
7
7
 
8
- use crate::conversion::{get_lf, get_rbseq, Wrap};
8
+ use crate::conversion::{Wrap, get_lf, get_rbseq};
9
+ use crate::expr::datatype::RbDataTypeExpr;
9
10
  use crate::map::lazy::binary_lambda;
10
- use crate::prelude::vec_extract_wrapped;
11
11
  use crate::rb_exprs_to_exprs;
12
12
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
13
13
 
@@ -85,6 +85,10 @@ pub fn as_struct(exprs: RArray) -> RbResult<RbExpr> {
85
85
  Ok(dsl::as_struct(exprs).into())
86
86
  }
87
87
 
88
+ pub fn field(names: Vec<String>) -> RbExpr {
89
+ dsl::Expr::Field(names.into_iter().map(|x| x.into()).collect()).into()
90
+ }
91
+
88
92
  pub fn coalesce(exprs: RArray) -> RbResult<RbExpr> {
89
93
  let exprs = rb_exprs_to_exprs(exprs)?;
90
94
  Ok(dsl::coalesce(&exprs).into())
@@ -103,10 +107,6 @@ pub fn collect_all(lfs: RArray) -> RbResult<RArray> {
103
107
  })))
104
108
  }
105
109
 
106
- pub fn cols(names: Vec<String>) -> RbExpr {
107
- dsl::cols(names).into()
108
- }
109
-
110
110
  pub fn concat_lf(
111
111
  lfs: Value,
112
112
  rechunk: bool,
@@ -166,20 +166,24 @@ pub fn cum_fold(
166
166
  acc: &RbExpr,
167
167
  lambda: Value,
168
168
  exprs: RArray,
169
+ returns_scalar: bool,
170
+ return_dtype: Option<&RbDataTypeExpr>,
169
171
  include_init: bool,
170
172
  ) -> RbResult<RbExpr> {
171
173
  let exprs = rb_exprs_to_exprs(exprs)?;
172
174
  let lambda = Opaque::from(lambda);
173
-
174
- let func = move |a: Column, b: Column| {
175
- binary_lambda(
176
- Ruby::get().unwrap().get_inner(lambda),
177
- a.take_materialized_series(),
178
- b.take_materialized_series(),
179
- )
180
- .map(|v| v.map(Column::from))
181
- };
182
- Ok(dsl::cum_fold_exprs(acc.inner.clone(), func, exprs, include_init).into())
175
+ let func = PlanCallback::new(move |(a, b): (Series, Series)| {
176
+ binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
177
+ });
178
+ Ok(dsl::cum_fold_exprs(
179
+ acc.inner.clone(),
180
+ func,
181
+ exprs,
182
+ returns_scalar,
183
+ return_dtype.map(|v| v.inner.clone()),
184
+ include_init,
185
+ )
186
+ .into())
183
187
  }
184
188
 
185
189
  pub fn concat_lf_diagonal(
@@ -220,24 +224,6 @@ pub fn concat_lf_horizontal(lfs: RArray, parallel: bool) -> RbResult<RbLazyFrame
220
224
  Ok(lf.into())
221
225
  }
222
226
 
223
- pub fn dtype_cols(dtypes: RArray) -> RbResult<RbExpr> {
224
- let dtypes = dtypes
225
- .into_iter()
226
- .map(Wrap::<DataType>::try_convert)
227
- .collect::<RbResult<Vec<Wrap<DataType>>>>()?;
228
- let dtypes = vec_extract_wrapped(dtypes);
229
- Ok(dsl::dtype_cols(dtypes).into())
230
- }
231
-
232
- pub fn index_cols(indices: Vec<i64>) -> RbExpr {
233
- if indices.len() == 1 {
234
- dsl::nth(indices[0])
235
- } else {
236
- dsl::index_cols(indices)
237
- }
238
- .into()
239
- }
240
-
241
227
  #[allow(clippy::too_many_arguments)]
242
228
  pub fn duration(
243
229
  weeks: Option<&RbExpr>,
@@ -274,42 +260,28 @@ pub fn duration(
274
260
  dsl::duration(args).into()
275
261
  }
276
262
 
277
- pub fn first() -> RbExpr {
278
- dsl::first().into()
279
- }
280
-
281
263
  pub fn fold(
282
264
  acc: &RbExpr,
283
265
  lambda: Value,
284
266
  exprs: RArray,
285
267
  returns_scalar: bool,
286
- return_dtype: Option<Wrap<DataType>>,
268
+ return_dtype: Option<&RbDataTypeExpr>,
287
269
  ) -> RbResult<RbExpr> {
288
270
  let exprs = rb_exprs_to_exprs(exprs)?;
289
271
  let lambda = Opaque::from(lambda);
290
-
291
- let func = move |a: Column, b: Column| {
292
- binary_lambda(
293
- Ruby::get().unwrap().get_inner(lambda),
294
- a.take_materialized_series(),
295
- b.take_materialized_series(),
296
- )
297
- .map(|v| v.map(Column::from))
298
- };
272
+ let func = PlanCallback::new(move |(a, b): (Series, Series)| {
273
+ binary_lambda(Ruby::get().unwrap().get_inner(lambda), a, b).map(|v| v.unwrap())
274
+ });
299
275
  Ok(dsl::fold_exprs(
300
276
  acc.inner.clone(),
301
277
  func,
302
278
  exprs,
303
279
  returns_scalar,
304
- return_dtype.map(|w| w.0),
280
+ return_dtype.map(|w| w.inner.clone()),
305
281
  )
306
282
  .into())
307
283
  }
308
284
 
309
- pub fn last() -> RbExpr {
310
- dsl::last().into()
311
- }
312
-
313
285
  pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
314
286
  if value.is_kind_of(class::true_class()) || value.is_kind_of(class::false_class()) {
315
287
  Ok(dsl::lit(bool::try_convert(value)?).into())
@@ -1,8 +1,8 @@
1
1
  use magnus::{IntoValue, Value};
2
2
  use polars_core;
3
+ use polars_core::POOL;
3
4
  use polars_core::fmt::FloatFmt;
4
5
  use polars_core::prelude::IDX_DTYPE;
5
- use polars_core::POOL;
6
6
 
7
7
  use crate::conversion::Wrap;
8
8
  use crate::{RbResult, RbValueError};
@@ -22,7 +22,7 @@ pub fn set_float_fmt(fmt: String) -> RbResult<()> {
22
22
  e => {
23
23
  return Err(RbValueError::new_err(format!(
24
24
  "fmt must be one of {{'full', 'mixed'}}, got {e}",
25
- )))
25
+ )));
26
26
  }
27
27
  };
28
28
  polars_core::fmt::set_float_fmt(fmt);
@@ -1,6 +1,6 @@
1
+ use crate::RbResult;
1
2
  use crate::conversion::Wrap;
2
3
  use crate::prelude::DataType;
3
- use crate::RbResult;
4
4
 
5
5
  pub fn dtype_str_repr(dtype: Wrap<DataType>) -> RbResult<String> {
6
6
  let dtype = dtype.0;
@@ -1,17 +1,17 @@
1
1
  use crate::RbResult;
2
2
  use magnus::{RArray, Ruby, Value};
3
- use polars_core::StringCacheHolder;
4
3
 
5
4
  pub fn enable_string_cache() {
6
- polars_core::enable_string_cache()
5
+ // The string cache no longer exists.
7
6
  }
8
7
 
9
8
  pub fn disable_string_cache() {
10
- polars_core::disable_string_cache()
9
+ // The string cache no longer exists.
11
10
  }
12
11
 
13
12
  pub fn using_string_cache() -> bool {
14
- polars_core::using_string_cache()
13
+ // The string cache no longer exists.
14
+ true
15
15
  }
16
16
 
17
17
  #[magnus::wrap(class = "Polars::RbStringCacheHolder")]
@@ -19,7 +19,6 @@ pub struct RbStringCacheHolder {}
19
19
 
20
20
  impl RbStringCacheHolder {
21
21
  pub fn hold() -> RbResult<Value> {
22
- let _hold = StringCacheHolder::hold();
23
22
  Ruby::get().unwrap().yield_splat(RArray::new())
24
23
  }
25
24
  }
@@ -1,4 +1,4 @@
1
- use magnus::{class, prelude::*, IntoValue, Module, RArray, RClass, RModule, Value};
1
+ use magnus::{IntoValue, Module, RArray, RClass, RModule, Value, class, prelude::*};
2
2
 
3
3
  use crate::RbResult;
4
4
 
@@ -3,10 +3,10 @@ use num_traits::{Float, NumCast};
3
3
  use polars_core::prelude::*;
4
4
 
5
5
  use super::numo_rs::{Element, RbArray1};
6
+ use crate::RbResult;
6
7
  use crate::error::RbPolarsErr;
7
8
  use crate::raise_err;
8
9
  use crate::series::RbSeries;
9
- use crate::RbResult;
10
10
 
11
11
  impl RbSeries {
12
12
  /// Convert this Series to a Numo array.
@@ -0,0 +1,102 @@
1
+ use std::sync::Arc;
2
+
3
+ use magnus::{TryConvert, Value, value::ReprValue};
4
+ use polars::prelude::deletion::DeletionFilesList;
5
+ use polars::prelude::{
6
+ CastColumnsPolicy, ColumnMapping, ExtraColumnsPolicy, MissingColumnsPolicy, PlSmallStr, Schema,
7
+ UnifiedScanArgs,
8
+ };
9
+ use polars_io::{HiveOptions, RowIndex};
10
+ use polars_utils::IdxSize;
11
+ use polars_utils::plpath::PlPathRef;
12
+ use polars_utils::slice_enum::Slice;
13
+
14
+ use crate::RbResult;
15
+ use crate::prelude::Wrap;
16
+
17
+ /// Interface to `class ScanOptions` on the Ruby side
18
+ pub struct RbScanOptions(Value);
19
+
20
+ impl TryConvert for RbScanOptions {
21
+ fn try_convert(ob: Value) -> RbResult<Self> {
22
+ Ok(Self(ob))
23
+ }
24
+ }
25
+
26
+ impl RbScanOptions {
27
+ pub fn extract_unified_scan_args(
28
+ &self,
29
+ // For cloud_options init
30
+ first_path: Option<PlPathRef>,
31
+ ) -> RbResult<UnifiedScanArgs> {
32
+ let row_index: Option<(Wrap<PlSmallStr>, IdxSize)> = self.0.funcall("row_index", ())?;
33
+ let pre_slice: Option<(i64, usize)> = self.0.funcall("pre_slice", ())?;
34
+ let cast_options: Wrap<CastColumnsPolicy> = self.0.funcall("cast_options", ())?;
35
+ let extra_columns: Wrap<ExtraColumnsPolicy> = self.0.funcall("extra_columns", ())?;
36
+ let missing_columns: Wrap<MissingColumnsPolicy> = self.0.funcall("missing_columns", ())?;
37
+ let include_file_paths: Option<Wrap<PlSmallStr>> =
38
+ self.0.funcall("include_file_paths", ())?;
39
+ let glob: bool = self.0.funcall("glob", ())?;
40
+ let hive_partitioning: Option<bool> = self.0.funcall("hive_partitioning", ())?;
41
+ let hive_schema: Option<Wrap<Schema>> = self.0.funcall("hive_schema", ())?;
42
+ let try_parse_hive_dates: bool = self.0.funcall("try_parse_hive_dates", ())?;
43
+ let rechunk: bool = self.0.funcall("rechunk", ())?;
44
+ let cache: bool = self.0.funcall("cache", ())?;
45
+ let storage_options: Option<Vec<(String, String)>> =
46
+ self.0.funcall("storage_options", ())?;
47
+ let retries: usize = self.0.funcall("retries", ())?;
48
+ let deletion_files: Option<Wrap<DeletionFilesList>> =
49
+ self.0.funcall("deletion_files", ())?;
50
+ let column_mapping: Option<Wrap<ColumnMapping>> = self.0.funcall("column_mapping", ())?;
51
+
52
+ let cloud_options = storage_options;
53
+
54
+ let cloud_options = if let Some(first_path) = first_path {
55
+ use crate::prelude::parse_cloud_options;
56
+
57
+ let first_path_url = first_path.to_str();
58
+ let cloud_options =
59
+ parse_cloud_options(first_path_url, cloud_options.unwrap_or_default())?;
60
+
61
+ Some(cloud_options.with_max_retries(retries))
62
+ } else {
63
+ None
64
+ };
65
+
66
+ let hive_schema = hive_schema.map(|s| Arc::new(s.0));
67
+
68
+ let row_index = row_index.map(|(name, offset)| RowIndex {
69
+ name: name.0,
70
+ offset,
71
+ });
72
+
73
+ let hive_options = HiveOptions {
74
+ enabled: hive_partitioning,
75
+ hive_start_idx: 0,
76
+ schema: hive_schema,
77
+ try_parse_dates: try_parse_hive_dates,
78
+ };
79
+
80
+ let unified_scan_args = UnifiedScanArgs {
81
+ // Schema is currently still stored inside the options per scan type, but we do eventually
82
+ // want to put it here instead.
83
+ schema: None,
84
+ cloud_options,
85
+ hive_options,
86
+ rechunk,
87
+ cache,
88
+ glob,
89
+ projection: None,
90
+ row_index,
91
+ pre_slice: pre_slice.map(Slice::from),
92
+ cast_columns_policy: cast_options.0,
93
+ missing_columns_policy: missing_columns.0,
94
+ extra_columns_policy: extra_columns.0,
95
+ include_file_paths: include_file_paths.map(|x| x.0),
96
+ deletion_files: DeletionFilesList::filter_empty(deletion_files.map(|x| x.0)),
97
+ column_mapping: column_mapping.map(|x| x.0),
98
+ };
99
+
100
+ Ok(unified_scan_args)
101
+ }
102
+ }