polars-df 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +33 -0
  3. data/Cargo.lock +270 -97
  4. data/LICENSE.txt +1 -1
  5. data/README.md +1 -3
  6. data/ext/polars/Cargo.toml +19 -18
  7. data/ext/polars/src/catalog/unity.rs +15 -20
  8. data/ext/polars/src/conversion/any_value.rs +53 -29
  9. data/ext/polars/src/conversion/chunked_array.rs +58 -56
  10. data/ext/polars/src/conversion/datetime.rs +58 -7
  11. data/ext/polars/src/conversion/mod.rs +200 -150
  12. data/ext/polars/src/dataframe/export.rs +15 -12
  13. data/ext/polars/src/dataframe/general.rs +25 -7
  14. data/ext/polars/src/dataframe/map.rs +6 -4
  15. data/ext/polars/src/error.rs +1 -1
  16. data/ext/polars/src/expr/array.rs +0 -24
  17. data/ext/polars/src/expr/datatype.rs +13 -3
  18. data/ext/polars/src/expr/datetime.rs +4 -4
  19. data/ext/polars/src/expr/general.rs +35 -15
  20. data/ext/polars/src/expr/list.rs +0 -26
  21. data/ext/polars/src/expr/rolling.rs +24 -0
  22. data/ext/polars/src/functions/business.rs +2 -2
  23. data/ext/polars/src/functions/io.rs +4 -3
  24. data/ext/polars/src/functions/lazy.rs +65 -46
  25. data/ext/polars/src/functions/meta.rs +6 -5
  26. data/ext/polars/src/functions/mod.rs +0 -1
  27. data/ext/polars/src/functions/range.rs +13 -0
  28. data/ext/polars/src/functions/utils.rs +4 -2
  29. data/ext/polars/src/interop/arrow/mod.rs +4 -2
  30. data/ext/polars/src/interop/arrow/to_rb.rs +1 -1
  31. data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
  32. data/ext/polars/src/io/scan_options.rs +6 -3
  33. data/ext/polars/src/io/sink_options.rs +2 -0
  34. data/ext/polars/src/lazyframe/general.rs +243 -17
  35. data/ext/polars/src/lazyframe/optflags.rs +2 -1
  36. data/ext/polars/src/lib.rs +39 -35
  37. data/ext/polars/src/map/lazy.rs +5 -2
  38. data/ext/polars/src/map/series.rs +19 -18
  39. data/ext/polars/src/on_startup.rs +25 -6
  40. data/ext/polars/src/ruby/numo.rs +3 -4
  41. data/ext/polars/src/ruby/plan_callback.rs +1 -4
  42. data/ext/polars/src/ruby/rb_modules.rs +2 -4
  43. data/ext/polars/src/ruby/ruby_udf.rs +7 -9
  44. data/ext/polars/src/ruby/utils.rs +12 -1
  45. data/ext/polars/src/series/aggregation.rs +13 -1
  46. data/ext/polars/src/series/construction.rs +31 -50
  47. data/ext/polars/src/series/export.rs +33 -38
  48. data/ext/polars/src/series/general.rs +6 -6
  49. data/ext/polars/src/series/map.rs +3 -2
  50. data/ext/polars/src/series/scatter.rs +4 -4
  51. data/ext/polars/src/utils.rs +31 -7
  52. data/lib/polars/array_expr.rb +23 -7
  53. data/lib/polars/array_name_space.rb +16 -2
  54. data/lib/polars/binary_name_space.rb +32 -0
  55. data/lib/polars/collect_batches.rb +4 -0
  56. data/lib/polars/data_frame.rb +144 -11
  57. data/lib/polars/data_type_group.rb +5 -0
  58. data/lib/polars/date_time_expr.rb +91 -3
  59. data/lib/polars/date_time_name_space.rb +7 -1
  60. data/lib/polars/expr.rb +247 -44
  61. data/lib/polars/functions/business.rb +2 -2
  62. data/lib/polars/functions/datatype.rb +30 -0
  63. data/lib/polars/functions/eager.rb +80 -7
  64. data/lib/polars/functions/lazy.rb +97 -2
  65. data/lib/polars/functions/range/linear_space.rb +118 -0
  66. data/lib/polars/io/csv.rb +27 -5
  67. data/lib/polars/io/database.rb +2 -3
  68. data/lib/polars/io/ipc.rb +2 -2
  69. data/lib/polars/io/lines.rb +172 -0
  70. data/lib/polars/io/parquet.rb +1 -1
  71. data/lib/polars/io/sink_options.rb +5 -2
  72. data/lib/polars/lazy_frame.rb +517 -14
  73. data/lib/polars/list_expr.rb +21 -7
  74. data/lib/polars/list_name_space.rb +16 -2
  75. data/lib/polars/query_opt_flags.rb +23 -5
  76. data/lib/polars/selectors.rb +2 -2
  77. data/lib/polars/series.rb +176 -19
  78. data/lib/polars/sql_context.rb +2 -2
  79. data/lib/polars/string_cache.rb +19 -72
  80. data/lib/polars/string_expr.rb +1 -7
  81. data/lib/polars/string_name_space.rb +1 -7
  82. data/lib/polars/utils/construction/series.rb +24 -39
  83. data/lib/polars/utils/convert.rb +16 -6
  84. data/lib/polars/utils/parse.rb +7 -0
  85. data/lib/polars/utils/reduce_balanced.rb +43 -0
  86. data/lib/polars/utils/various.rb +5 -0
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars.rb +2 -1
  89. metadata +4 -17
  90. data/ext/polars/src/functions/string_cache.rs +0 -24
@@ -1,7 +1,8 @@
1
1
  use std::hash::BuildHasher;
2
2
 
3
+ use arrow::bitmap::MutableBitmap;
3
4
  use either::Either;
4
- use magnus::{IntoValue, RArray, Ruby, Value, prelude::*, value::Opaque};
5
+ use magnus::{RArray, Ruby, Value, prelude::*, value::Opaque};
5
6
  use polars::prelude::*;
6
7
 
7
8
  use crate::conversion::*;
@@ -9,6 +10,7 @@ use crate::prelude::strings_to_pl_smallstr;
9
10
  use crate::rb_modules::pl_utils;
10
11
  use crate::ruby::exceptions::RbIndexError;
11
12
  use crate::ruby::gvl::GvlExt;
13
+ use crate::ruby::utils::TryIntoValue;
12
14
  use crate::series::ToRbSeries;
13
15
  use crate::series::to_series;
14
16
  use crate::utils::EnterPolarsExt;
@@ -143,13 +145,13 @@ impl RbDataFrame {
143
145
  Ok(())
144
146
  }
145
147
 
146
- pub fn dtypes(ruby: &Ruby, self_: &Self) -> RArray {
148
+ pub fn dtypes(ruby: &Ruby, self_: &Self) -> RbResult<RArray> {
147
149
  let df = self_.df.read();
148
150
  let iter = df
149
151
  .columns()
150
152
  .iter()
151
- .map(|s| Wrap(s.dtype().clone()).into_value_with(ruby));
152
- ruby.ary_from_iter(iter)
153
+ .map(|s| Wrap(s.dtype().clone()).try_into_value_with(ruby));
154
+ ruby.ary_try_from_iter(iter)
153
155
  }
154
156
 
155
157
  pub fn n_chunks(&self) -> usize {
@@ -503,10 +505,26 @@ impl RbDataFrame {
503
505
  })
504
506
  }
505
507
 
506
- pub fn to_struct(rb: &Ruby, self_: &Self, name: String) -> RbResult<RbSeries> {
508
+ pub fn to_struct(
509
+ rb: &Ruby,
510
+ self_: &Self,
511
+ name: String,
512
+ invalid_indices: Vec<usize>,
513
+ ) -> RbResult<RbSeries> {
507
514
  rb.enter_polars_series(|| {
508
- let ca = self_.df.read().clone().into_struct(name.into());
509
- Ok(ca)
515
+ let mut ca = self_.df.read().clone().into_struct(name.into());
516
+
517
+ if !invalid_indices.is_empty() {
518
+ let mut validity = MutableBitmap::with_capacity(ca.len());
519
+ validity.extend_constant(ca.len(), true);
520
+ for i in invalid_indices {
521
+ validity.set(i, false);
522
+ }
523
+ ca.rechunk_mut();
524
+ Ok(ca.with_outer_validity(Some(validity.freeze())))
525
+ } else {
526
+ Ok(ca)
527
+ }
510
528
  })
511
529
  }
512
530
 
@@ -5,7 +5,7 @@ use polars_core::utils::CustomIterTools;
5
5
  use super::*;
6
6
  use crate::error::RbPolarsErr;
7
7
  use crate::prelude::*;
8
- use crate::ruby::utils::to_pl_err;
8
+ use crate::ruby::utils::{TryIntoValue, to_pl_err};
9
9
  use crate::series::construction::series_from_objects;
10
10
  use crate::{RbResult, RbSeries, raise_err};
11
11
 
@@ -28,9 +28,11 @@ impl RbDataFrame {
28
28
  drop(df); // Release lock before calling lambda.
29
29
 
30
30
  let lambda_result_iter = (0..height).map(move |_| {
31
- let iter = iters.iter_mut().map(|it| Wrap(it.next().unwrap()));
32
- let tpl = rb.ary_from_iter(iter);
33
- lambda.funcall::<_, _, Value>("call", (tpl,))
31
+ let iter = iters
32
+ .iter_mut()
33
+ .map(|it| Wrap(it.next().unwrap()).try_into_value_with(rb));
34
+ rb.ary_try_from_iter(iter)
35
+ .and_then(|tpl| lambda.funcall::<_, _, Value>("call", (tpl,)))
34
36
  });
35
37
 
36
38
  // Simple case: return type set.
@@ -63,7 +63,7 @@ impl From<RbPolarsErr> for Error {
63
63
  PolarsError::StructFieldNotFound(name) => {
64
64
  StructFieldNotFoundError::new_err(name.to_string())
65
65
  }
66
- PolarsError::Context { .. } => {
66
+ PolarsError::Context { .. } | PolarsError::ExprContext { .. } => {
67
67
  let tmp = RbPolarsErr::Polars(err.context_trace());
68
68
  RbErr::from(tmp)
69
69
  }
@@ -38,30 +38,10 @@ impl RbExpr {
38
38
  self.inner.clone().arr().median().into()
39
39
  }
40
40
 
41
- pub fn arr_unique(&self, maintain_order: bool) -> Self {
42
- if maintain_order {
43
- self.inner.clone().arr().unique_stable().into()
44
- } else {
45
- self.inner.clone().arr().unique().into()
46
- }
47
- }
48
-
49
- pub fn arr_n_unique(&self) -> Self {
50
- self.inner.clone().arr().n_unique().into()
51
- }
52
-
53
41
  pub fn arr_to_list(&self) -> Self {
54
42
  self.inner.clone().arr().to_list().into()
55
43
  }
56
44
 
57
- pub fn arr_all(&self) -> Self {
58
- self.inner.clone().arr().all().into()
59
- }
60
-
61
- pub fn arr_any(&self) -> Self {
62
- self.inner.clone().arr().any().into()
63
- }
64
-
65
45
  pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
66
46
  self.inner
67
47
  .clone()
@@ -74,10 +54,6 @@ impl RbExpr {
74
54
  .into()
75
55
  }
76
56
 
77
- pub fn arr_reverse(&self) -> Self {
78
- self.inner.clone().arr().reverse().into()
79
- }
80
-
81
57
  pub fn arr_arg_min(&self) -> Self {
82
58
  self.inner.clone().arr().arg_min().into()
83
59
  }
@@ -1,7 +1,8 @@
1
- use magnus::{IntoValue, Ruby, Value};
2
- use polars::prelude::{DataType, DataTypeExpr, Schema};
1
+ use magnus::{RArray, Ruby, TryConvert, Value};
2
+ use polars::prelude::{DataType, DataTypeExpr, PlSmallStr, Schema};
3
3
 
4
4
  use crate::prelude::Wrap;
5
+ use crate::ruby::utils::TryIntoValue;
5
6
  use crate::{RbExpr, RbPolarsErr, RbResult};
6
7
 
7
8
  #[magnus::wrap(class = "Polars::RbDataTypeExpr")]
@@ -36,6 +37,15 @@ impl RbDataTypeExpr {
36
37
  .inner
37
38
  .into_datatype(&schema.0)
38
39
  .map_err(RbPolarsErr::from)?;
39
- Ok(Wrap(dtype).into_value_with(ruby))
40
+ Wrap(dtype).try_into_value_with(ruby)
41
+ }
42
+
43
+ pub fn struct_with_fields(rb_fields: RArray) -> RbResult<Self> {
44
+ let mut fields = Vec::new();
45
+ for v in rb_fields.into_iter() {
46
+ let (name, dt_expr) = <(String, &RbDataTypeExpr)>::try_convert(v)?;
47
+ fields.push((PlSmallStr::from_string(name), dt_expr.inner.clone()));
48
+ }
49
+ Ok(DataTypeExpr::StructWithFields(fields).into())
40
50
  }
41
51
  }
@@ -8,13 +8,13 @@ impl RbExpr {
8
8
  &self,
9
9
  n: &RbExpr,
10
10
  week_mask: [bool; 7],
11
- holidays: Vec<i32>,
11
+ holidays: &RbExpr,
12
12
  roll: Wrap<Roll>,
13
13
  ) -> Self {
14
14
  self.inner
15
15
  .clone()
16
16
  .dt()
17
- .add_business_days(n.inner.clone(), week_mask, holidays, roll.0)
17
+ .add_business_days(n.inner.clone(), week_mask, holidays.inner.clone(), roll.0)
18
18
  .into()
19
19
  }
20
20
 
@@ -133,11 +133,11 @@ impl RbExpr {
133
133
  self.clone().inner.dt().year().into()
134
134
  }
135
135
 
136
- pub fn dt_is_business_day(&self, week_mask: [bool; 7], holidays: Vec<i32>) -> Self {
136
+ pub fn dt_is_business_day(&self, week_mask: [bool; 7], holidays: &RbExpr) -> Self {
137
137
  self.inner
138
138
  .clone()
139
139
  .dt()
140
- .is_business_day(week_mask, holidays)
140
+ .is_business_day(week_mask, holidays.inner.clone())
141
141
  .into()
142
142
  }
143
143
 
@@ -5,13 +5,13 @@ use polars::lazy::dsl;
5
5
  use polars::prelude::*;
6
6
  use polars::series::ops::NullBehavior;
7
7
  use polars_core::chunked_array::cast::CastOptions;
8
- use polars_core::series::IsSorted;
8
+ use polars_plan::plans::AExprSorted;
9
9
 
10
10
  use super::datatype::RbDataTypeExpr;
11
11
  use super::selector::RbSelector;
12
12
  use crate::conversion::{Wrap, parse_fill_null_strategy};
13
13
  use crate::expr::ToExprs;
14
- use crate::{RbExpr, RbPolarsErr, RbResult};
14
+ use crate::{RbDataType, RbExpr, RbPolarsErr, RbResult};
15
15
 
16
16
  impl RbExpr {
17
17
  pub fn add(&self, rhs: &Self) -> RbResult<Self> {
@@ -118,6 +118,14 @@ impl RbExpr {
118
118
  self.inner.clone().max().into()
119
119
  }
120
120
 
121
+ pub fn min_by(&self, by: &Self) -> Self {
122
+ self.inner.clone().min_by(by.inner.clone()).into()
123
+ }
124
+
125
+ pub fn max_by(&self, by: &Self) -> Self {
126
+ self.inner.clone().max_by(by.inner.clone()).into()
127
+ }
128
+
121
129
  pub fn nan_max(&self) -> Self {
122
130
  self.inner.clone().nan_max().into()
123
131
  }
@@ -174,8 +182,8 @@ impl RbExpr {
174
182
  self.inner.clone().item(allow_empty).into()
175
183
  }
176
184
 
177
- pub fn implode(&self) -> Self {
178
- self.inner.clone().implode().into()
185
+ pub fn implode(&self, maintain_order: bool) -> Self {
186
+ self.inner.clone().implode(maintain_order).into()
179
187
  }
180
188
 
181
189
  pub fn quantile(&self, quantile: &Self, interpolation: Wrap<QuantileMethod>) -> Self {
@@ -358,8 +366,11 @@ impl RbExpr {
358
366
  .into()
359
367
  }
360
368
 
361
- pub fn gather(&self, idx: &Self) -> Self {
362
- self.inner.clone().gather(idx.inner.clone()).into()
369
+ pub fn gather(&self, idx: &Self, null_on_oob: bool) -> Self {
370
+ self.inner
371
+ .clone()
372
+ .gather(idx.inner.clone(), null_on_oob)
373
+ .into()
363
374
  }
364
375
 
365
376
  pub fn get(&self, idx: &Self, null_on_oob: bool) -> Self {
@@ -522,6 +533,10 @@ impl RbExpr {
522
533
  self.clone().inner.round_sig_figs(digits).into()
523
534
  }
524
535
 
536
+ pub fn truncate(&self, decimals: u32) -> Self {
537
+ self.inner.clone().truncate(decimals).into()
538
+ }
539
+
525
540
  pub fn floor(&self) -> Self {
526
541
  self.inner.clone().floor().into()
527
542
  }
@@ -719,8 +734,11 @@ impl RbExpr {
719
734
  self.inner.clone().dot(other.inner.clone()).into()
720
735
  }
721
736
 
722
- pub fn reinterpret(&self, signed: bool) -> Self {
723
- self.inner.clone().reinterpret(signed).into()
737
+ pub fn reinterpret(&self, signed: Option<bool>, dtype: Option<RbDataType>) -> Self {
738
+ self.inner
739
+ .clone()
740
+ .reinterpret(signed, dtype.map(|dt| dt.0))
741
+ .into()
724
742
  }
725
743
 
726
744
  pub fn mode(&self, maintain_order: bool) -> Self {
@@ -896,6 +914,10 @@ impl RbExpr {
896
914
  self.inner.clone().all(drop_nulls).into()
897
915
  }
898
916
 
917
+ pub fn is_empty(&self, ignore_nulls: bool) -> Self {
918
+ self.inner.clone().is_empty(ignore_nulls).into()
919
+ }
920
+
899
921
  pub fn log(&self, base: &RbExpr) -> Self {
900
922
  self.inner.clone().log(base.inner.clone()).into()
901
923
  }
@@ -916,13 +938,11 @@ impl RbExpr {
916
938
  self.inner.clone().hash(seed, seed_1, seed_2, seed_3).into()
917
939
  }
918
940
 
919
- pub fn set_sorted_flag(&self, descending: bool) -> Self {
920
- let is_sorted = if descending {
921
- IsSorted::Descending
922
- } else {
923
- IsSorted::Ascending
924
- };
925
- self.inner.clone().set_sorted_flag(is_sorted).into()
941
+ pub fn set_sorted_flag(&self, descending: bool, nulls_last: bool) -> Self {
942
+ let sortedness = AExprSorted::default()
943
+ .with_desc(Some(descending))
944
+ .with_nulls_last(Some(nulls_last));
945
+ self.inner.clone().set_sorted_flag(sortedness).into()
926
946
  }
927
947
 
928
948
  pub fn replace(&self, old: &Self, new: &Self) -> Self {
@@ -7,14 +7,6 @@ use crate::conversion::Wrap;
7
7
  use crate::{RbExpr, RbResult};
8
8
 
9
9
  impl RbExpr {
10
- pub fn list_all(&self) -> Self {
11
- self.inner.clone().list().all().into()
12
- }
13
-
14
- pub fn list_any(&self) -> Self {
15
- self.inner.clone().list().any().into()
16
- }
17
-
18
10
  pub fn list_arg_max(&self) -> Self {
19
11
  self.inner.clone().list().arg_max().into()
20
12
  }
@@ -103,10 +95,6 @@ impl RbExpr {
103
95
  self.inner.clone().list().min().into()
104
96
  }
105
97
 
106
- pub fn list_reverse(&self) -> Self {
107
- self.inner.clone().list().reverse().into()
108
- }
109
-
110
98
  pub fn list_shift(&self, periods: &RbExpr) -> Self {
111
99
  self.inner
112
100
  .clone()
@@ -213,20 +201,6 @@ impl RbExpr {
213
201
  .into())
214
202
  }
215
203
 
216
- pub fn list_n_unique(&self) -> Self {
217
- self.inner.clone().list().n_unique().into()
218
- }
219
-
220
- pub fn list_unique(&self, maintain_order: bool) -> Self {
221
- let e = self.inner.clone();
222
-
223
- if maintain_order {
224
- e.list().unique_stable().into()
225
- } else {
226
- e.list().unique().into()
227
- }
228
- }
229
-
230
204
  pub fn list_set_operation(&self, other: &RbExpr, operation: Wrap<SetOperation>) -> Self {
231
205
  let e = self.inner.clone().list();
232
206
  match operation.0 {
@@ -1,6 +1,9 @@
1
+ use magnus::Value;
1
2
  use polars::prelude::*;
2
3
 
3
4
  use crate::conversion::Wrap;
5
+ use crate::ruby::plan_callback::PlanCallbackExt;
6
+ use crate::ruby::ruby_function::RubyObject;
4
7
  use crate::{RbExpr, RbPolarsErr, RbResult};
5
8
 
6
9
  impl RbExpr {
@@ -406,4 +409,25 @@ impl RbExpr {
406
409
 
407
410
  self.inner.clone().rolling_kurtosis(options).into()
408
411
  }
412
+
413
+ pub fn rolling_map(
414
+ &self,
415
+ lambda: Value,
416
+ window_size: usize,
417
+ weights: Option<Vec<f64>>,
418
+ min_periods: Option<usize>,
419
+ center: bool,
420
+ ) -> Self {
421
+ let min_periods = min_periods.unwrap_or(window_size);
422
+ let options = RollingOptionsFixedWindow {
423
+ window_size,
424
+ weights,
425
+ min_periods,
426
+ center,
427
+ ..Default::default()
428
+ };
429
+ let function = PlanCallback::new_ruby(RubyObject::from(lambda));
430
+
431
+ self.inner.clone().rolling_map(function, options).into()
432
+ }
409
433
  }
@@ -6,9 +6,9 @@ pub fn business_day_count(
6
6
  start: &RbExpr,
7
7
  end: &RbExpr,
8
8
  week_mask: [bool; 7],
9
- holidays: Vec<i32>,
9
+ holidays: &RbExpr,
10
10
  ) -> RbExpr {
11
11
  let start = start.inner.clone();
12
12
  let end = end.inner.clone();
13
- dsl::business_day_count(start, end, week_mask, holidays).into()
13
+ dsl::business_day_count(start, end, week_mask, holidays.inner.clone()).into()
14
14
  }
@@ -8,6 +8,7 @@ use crate::conversion::Wrap;
8
8
  use crate::file::{EitherRustRubyFile, get_either_file};
9
9
  use crate::io::cloud_options::OptRbCloudOptions;
10
10
  use crate::ruby::gvl::GvlExt;
11
+ use crate::ruby::utils::TryIntoValue;
11
12
  use crate::{RbPolarsErr, RbResult};
12
13
 
13
14
  pub fn read_ipc_schema(rb: &Ruby, rb_f: Value) -> RbResult<RHash> {
@@ -32,7 +33,6 @@ pub fn read_parquet_metadata(
32
33
  ) -> RbResult<RHash> {
33
34
  use std::io::Cursor;
34
35
 
35
- use polars_io::pl_async::get_runtime;
36
36
  use polars_parquet::read::read_metadata;
37
37
  use polars_parquet::read::schema::read_custom_key_value_metadata;
38
38
 
@@ -53,7 +53,7 @@ pub fn read_parquet_metadata(
53
53
  use polars_error::PolarsResult;
54
54
 
55
55
  rb.detach(|| {
56
- get_runtime().block_on(async {
56
+ polars_core::runtime::ASYNC.block_on(async {
57
57
  let mut reader =
58
58
  ParquetObjectStore::from_uri(p, cloud_options.as_ref(), None).await?;
59
59
  let result = reader.get_metadata().await?;
@@ -97,9 +97,10 @@ pub fn read_parquet_schema(rb_f: Value) -> RbResult<RHash> {
97
97
  }
98
98
 
99
99
  fn fields_to_rbdict(schema: &ArrowSchema, dict: &RHash) -> RbResult<()> {
100
+ let ruby = &Ruby::get_with(*dict);
100
101
  for field in schema.iter_values() {
101
102
  let dt = Wrap(polars::prelude::DataType::from_arrow_field(field));
102
- dict.aset(field.name.as_str(), dt)?;
103
+ dict.aset(field.name.as_str(), dt.try_into_value_with(ruby)?)?;
103
104
  }
104
105
  Ok(())
105
106
  }
@@ -1,17 +1,23 @@
1
1
  use magnus::encoding::EncodingCapable;
2
- use magnus::{Float, Integer, RArray, RString, Ruby, Value, prelude::*, typed_data::Obj};
2
+ use magnus::{
3
+ Float, Integer, RArray, RString, Ruby, Value, prelude::*, typed_data::Obj, value::Qfalse,
4
+ value::Qtrue,
5
+ };
3
6
  use polars::lazy::dsl;
4
7
  use polars::prelude::*;
8
+ use polars_plan::plans::DynLiteralValue;
5
9
 
10
+ use crate::conversion::any_value::rb_object_to_any_value;
6
11
  use crate::conversion::{Wrap, get_lf, get_rbseq};
7
12
  use crate::expr::ToExprs;
8
13
  use crate::expr::datatype::RbDataTypeExpr;
9
14
  use crate::lazyframe::RbOptFlags;
15
+ use crate::ruby::exceptions::{RbTypeError, RbValueError};
10
16
  use crate::ruby::plan_callback::PlanCallbackExt;
11
17
  use crate::ruby::ruby_function::RubyObject;
12
18
  use crate::ruby::thread::start_background_ruby_thread;
13
19
  use crate::utils::EnterPolarsExt;
14
- use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError, map};
20
+ use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, map};
15
21
 
16
22
  macro_rules! set_unwrapped_or_0 {
17
23
  ($($var:ident),+ $(,)?) => {
@@ -130,6 +136,13 @@ pub fn collect_all(
130
136
  Ok(ruby.ary_from_iter(dfs.into_iter().map(Into::<RbDataFrame>::into)))
131
137
  }
132
138
 
139
+ pub fn explain_all(rb: &Ruby, lfs: RArray, optflags: &RbOptFlags) -> RbResult<String> {
140
+ let plans = lfs_to_plans(lfs)?;
141
+ let explained =
142
+ rb.enter_polars(|| LazyFrame::explain_all(plans, optflags.clone().inner.into_inner()))?;
143
+ Ok(explained)
144
+ }
145
+
133
146
  pub fn collect_all_lazy(lfs: RArray, optflags: &RbOptFlags) -> RbResult<RbLazyFrame> {
134
147
  let plans = lfs_to_plans(lfs)?;
135
148
 
@@ -371,34 +384,28 @@ pub fn fold(
371
384
  .into())
372
385
  }
373
386
 
374
- pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
387
+ pub fn lit(rb: &Ruby, value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr> {
375
388
  let ruby = Ruby::get_with(value);
376
- if value.is_kind_of(ruby.class_true_class()) || value.is_kind_of(ruby.class_false_class()) {
377
- Ok(dsl::lit(bool::try_convert(value)?).into())
378
- } else if let Some(v) = Integer::from_value(value) {
379
- match v.to_i64() {
380
- Ok(val) => {
381
- if val > 0 && val < i32::MAX as i64 || val < 0 && val > i32::MIN as i64 {
382
- Ok(dsl::lit(val as i32).into())
383
- } else {
384
- Ok(dsl::lit(val).into())
385
- }
386
- }
387
- _ => {
388
- let val = v.to_u64()?;
389
- Ok(dsl::lit(val).into())
390
- }
391
- }
392
- } else if let Some(v) = Float::from_value(value) {
393
- Ok(dsl::lit(v.to_f64()).into())
394
- } else if let Some(v) = RString::from_value(value) {
395
- if v.enc_get() == ruby.utf8_encindex() {
396
- Ok(dsl::lit(v.to_string()?).into())
389
+ if Qtrue::from_value(value).is_some() {
390
+ Ok(dsl::lit(true).into())
391
+ } else if Qfalse::from_value(value).is_some() {
392
+ Ok(dsl::lit(false).into())
393
+ } else if let Some(int) = Integer::from_value(value) {
394
+ let v = i128::try_convert(int.as_value())
395
+ .map_err(|e| polars_err!(InvalidOperation: "integer too large for Polars: {e}"))
396
+ .map_err(RbPolarsErr::from)?;
397
+ Ok(Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Int(v))).into())
398
+ } else if let Some(float) = Float::from_value(value) {
399
+ let val = f64::try_convert(float.as_value())?;
400
+ Ok(Expr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(val))).into())
401
+ } else if let Some(rbstr) = RString::from_value(value) {
402
+ if rbstr.enc_get() == ruby.utf8_encindex() {
403
+ Ok(dsl::lit(rbstr.to_string()?).into())
397
404
  } else {
398
- Ok(dsl::lit(unsafe { v.as_slice() }).into())
405
+ Ok(dsl::lit(unsafe { rbstr.as_slice() }).into())
399
406
  }
400
- } else if let Ok(series) = Obj::<RbSeries>::try_convert(value) {
401
- let s = series.series.read();
407
+ } else if let Ok(series) = <&RbSeries>::try_convert(value) {
408
+ let s = series.clone().series.into_inner();
402
409
  if is_scalar {
403
410
  let av = s
404
411
  .get(0)
@@ -406,17 +413,39 @@ pub fn lit(value: Value, allow_object: bool, is_scalar: bool) -> RbResult<RbExpr
406
413
  let av = av.into_static();
407
414
  Ok(dsl::lit(Scalar::new(s.dtype().clone(), av)).into())
408
415
  } else {
409
- Ok(dsl::lit(s.clone()).into())
416
+ Ok(dsl::lit(s).into())
410
417
  }
411
418
  } else if value.is_nil() {
412
419
  Ok(dsl::lit(Null {}).into())
413
- } else if allow_object {
414
- todo!()
415
420
  } else {
416
- Err(RbValueError::new_err(format!(
417
- "could not convert value {:?} as a Literal",
418
- value.to_string()
419
- )))
421
+ let raise = || {
422
+ RbTypeError::new_err(format!(
423
+ "cannot create expression literal for value of type {}.\
424
+ \n\nHint: Pass `allow_object: true` to accept any value and create a literal of type Object.",
425
+ unsafe { value.classname() },
426
+ ))
427
+ };
428
+
429
+ let av = rb_object_to_any_value(value, true, allow_object).map_err(|_| raise())?;
430
+ match av {
431
+ AnyValue::ObjectOwned(_) => {
432
+ // Check again for object allowance as for cached addresses this is not checked.
433
+ if allow_object {
434
+ let s = RbSeries::new_object(
435
+ rb,
436
+ "".to_string(),
437
+ rb.ary_new_from_values(&[value]),
438
+ false,
439
+ )?
440
+ .series
441
+ .into_inner();
442
+ Ok(dsl::lit(s).into())
443
+ } else {
444
+ Err(raise())
445
+ }
446
+ }
447
+ _ => Ok(Expr::Literal(LiteralValue::from(av)).into()),
448
+ }
420
449
  }
421
450
  }
422
451
 
@@ -454,7 +483,7 @@ pub fn reduce(
454
483
  .into())
455
484
  }
456
485
 
457
- pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbResult<RbExpr> {
486
+ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbExpr {
458
487
  let mut value = value.inner.clone();
459
488
  let n = n.inner.clone();
460
489
 
@@ -462,17 +491,7 @@ pub fn repeat(value: &RbExpr, n: &RbExpr, dtype: Option<Wrap<DataType>>) -> RbRe
462
491
  value = value.cast(dtype.0);
463
492
  }
464
493
 
465
- if let Expr::Literal(lv) = &value {
466
- let av = lv.to_any_value().unwrap();
467
- // Integer inputs that fit in Int32 are parsed as such
468
- if let DataType::Int64 = av.dtype() {
469
- let int_value = av.try_extract::<i64>().unwrap();
470
- if int_value >= i32::MIN as i64 && int_value <= i32::MAX as i64 {
471
- value = value.cast(DataType::Int32);
472
- }
473
- }
474
- }
475
- Ok(dsl::repeat(value, n).into())
494
+ dsl::repeat(value, n).into()
476
495
  }
477
496
 
478
497
  pub fn spearman_rank_corr(a: &RbExpr, b: &RbExpr, propagate_nans: bool) -> RbExpr {
@@ -1,18 +1,19 @@
1
- use magnus::{IntoValue, Ruby, Value};
1
+ use magnus::{Ruby, Value};
2
2
  use polars_core;
3
- use polars_core::POOL;
4
3
  use polars_core::fmt::FloatFmt;
5
4
  use polars_core::prelude::IDX_DTYPE;
5
+ use polars_core::runtime::RAYON;
6
6
 
7
7
  use crate::conversion::Wrap;
8
+ use crate::ruby::utils::TryIntoValue;
8
9
  use crate::{RbResult, RbValueError};
9
10
 
10
- pub fn get_index_type(ruby: &Ruby) -> Value {
11
- Wrap(IDX_DTYPE).into_value_with(ruby)
11
+ pub fn get_index_type(ruby: &Ruby) -> RbResult<Value> {
12
+ Wrap(IDX_DTYPE).try_into_value_with(ruby)
12
13
  }
13
14
 
14
15
  pub fn thread_pool_size() -> usize {
15
- POOL.current_num_threads()
16
+ RAYON.current_num_threads()
16
17
  }
17
18
 
18
19
  pub fn set_float_fmt(fmt: String) -> RbResult<()> {
@@ -7,7 +7,6 @@ pub mod meta;
7
7
  pub mod misc;
8
8
  pub mod random;
9
9
  pub mod range;
10
- pub mod string_cache;
11
10
  pub mod strings;
12
11
  pub mod utils;
13
12
  pub mod whenthen;
@@ -138,6 +138,19 @@ pub fn time_ranges(
138
138
  Ok(dsl::time_ranges(start, end, every, closed).into())
139
139
  }
140
140
 
141
+ pub fn linear_space(
142
+ start: &RbExpr,
143
+ end: &RbExpr,
144
+ num_samples: &RbExpr,
145
+ closed: Wrap<ClosedInterval>,
146
+ ) -> RbResult<RbExpr> {
147
+ let start = start.inner.clone();
148
+ let end = end.inner.clone();
149
+ let num_samples = num_samples.inner.clone();
150
+ let closed = closed.0;
151
+ Ok(dsl::linear_space(start, end, num_samples, closed).into())
152
+ }
153
+
141
154
  pub fn linear_spaces(
142
155
  start: &RbExpr,
143
156
  end: &RbExpr,
@@ -1,6 +1,8 @@
1
1
  use crate::RbResult;
2
- use polars_core::config::get_engine_affinity;
3
2
 
4
3
  pub fn rb_get_engine_affinity() -> RbResult<String> {
5
- Ok(get_engine_affinity())
4
+ Ok(polars_config::config()
5
+ .engine_affinity()
6
+ .as_static_str()
7
+ .to_string())
6
8
  }