polars-df 0.25.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -1
  3. data/Cargo.lock +268 -95
  4. data/LICENSE.txt +1 -1
  5. data/README.md +1 -3
  6. data/ext/polars/Cargo.toml +18 -18
  7. data/ext/polars/src/catalog/unity.rs +15 -20
  8. data/ext/polars/src/conversion/any_value.rs +25 -24
  9. data/ext/polars/src/conversion/chunked_array.rs +58 -56
  10. data/ext/polars/src/conversion/datetime.rs +58 -7
  11. data/ext/polars/src/conversion/mod.rs +155 -141
  12. data/ext/polars/src/dataframe/export.rs +15 -12
  13. data/ext/polars/src/dataframe/general.rs +5 -4
  14. data/ext/polars/src/dataframe/map.rs +6 -4
  15. data/ext/polars/src/error.rs +1 -1
  16. data/ext/polars/src/expr/array.rs +0 -24
  17. data/ext/polars/src/expr/datatype.rs +3 -2
  18. data/ext/polars/src/expr/datetime.rs +4 -4
  19. data/ext/polars/src/expr/general.rs +27 -15
  20. data/ext/polars/src/expr/list.rs +0 -26
  21. data/ext/polars/src/functions/business.rs +2 -2
  22. data/ext/polars/src/functions/io.rs +4 -3
  23. data/ext/polars/src/functions/lazy.rs +58 -46
  24. data/ext/polars/src/functions/meta.rs +6 -5
  25. data/ext/polars/src/functions/mod.rs +0 -1
  26. data/ext/polars/src/functions/utils.rs +4 -2
  27. data/ext/polars/src/interop/arrow/mod.rs +4 -2
  28. data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
  29. data/ext/polars/src/io/scan_options.rs +6 -3
  30. data/ext/polars/src/io/sink_options.rs +2 -0
  31. data/ext/polars/src/lazyframe/general.rs +28 -13
  32. data/ext/polars/src/lazyframe/optflags.rs +2 -1
  33. data/ext/polars/src/lib.rs +14 -33
  34. data/ext/polars/src/map/lazy.rs +5 -2
  35. data/ext/polars/src/map/series.rs +19 -18
  36. data/ext/polars/src/on_startup.rs +16 -7
  37. data/ext/polars/src/ruby/numo.rs +3 -4
  38. data/ext/polars/src/ruby/rb_modules.rs +2 -4
  39. data/ext/polars/src/ruby/ruby_udf.rs +7 -9
  40. data/ext/polars/src/ruby/utils.rs +12 -1
  41. data/ext/polars/src/series/aggregation.rs +13 -1
  42. data/ext/polars/src/series/export.rs +33 -38
  43. data/ext/polars/src/series/general.rs +4 -3
  44. data/ext/polars/src/series/map.rs +3 -2
  45. data/ext/polars/src/series/scatter.rs +4 -4
  46. data/ext/polars/src/utils.rs +31 -7
  47. data/lib/polars/array_expr.rb +23 -7
  48. data/lib/polars/array_name_space.rb +16 -2
  49. data/lib/polars/binary_name_space.rb +32 -0
  50. data/lib/polars/data_frame.rb +73 -10
  51. data/lib/polars/date_time_expr.rb +91 -3
  52. data/lib/polars/date_time_name_space.rb +7 -1
  53. data/lib/polars/expr.rb +122 -44
  54. data/lib/polars/functions/business.rb +2 -2
  55. data/lib/polars/functions/eager.rb +80 -7
  56. data/lib/polars/functions/lazy.rb +5 -2
  57. data/lib/polars/io/csv.rb +27 -5
  58. data/lib/polars/io/ipc.rb +1 -1
  59. data/lib/polars/io/lines.rb +4 -4
  60. data/lib/polars/io/sink_options.rb +4 -2
  61. data/lib/polars/lazy_frame.rb +97 -14
  62. data/lib/polars/list_expr.rb +21 -7
  63. data/lib/polars/list_name_space.rb +16 -2
  64. data/lib/polars/query_opt_flags.rb +22 -5
  65. data/lib/polars/selectors.rb +1 -1
  66. data/lib/polars/series.rb +88 -19
  67. data/lib/polars/sql_context.rb +2 -2
  68. data/lib/polars/string_cache.rb +19 -72
  69. data/lib/polars/string_expr.rb +1 -7
  70. data/lib/polars/string_name_space.rb +1 -7
  71. data/lib/polars/utils/construction/series.rb +8 -3
  72. data/lib/polars/utils/convert.rb +16 -6
  73. data/lib/polars/utils/parse.rb +7 -0
  74. data/lib/polars/utils/reduce_balanced.rb +43 -0
  75. data/lib/polars/utils/various.rb +5 -0
  76. data/lib/polars/version.rb +1 -1
  77. data/lib/polars.rb +1 -1
  78. metadata +3 -17
  79. data/ext/polars/src/functions/string_cache.rs +0 -24
@@ -2,49 +2,49 @@ use magnus::{IntoValue, Ruby, Value, value::ReprValue};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::prelude::*;
5
+ use crate::ruby::utils::TryIntoValue;
5
6
  use crate::{RbPolarsErr, RbResult, RbSeries};
6
7
 
7
8
  impl RbSeries {
8
9
  /// Convert this Series to a Ruby array.
9
10
  /// This operation copies data.
10
- pub fn to_a(&self) -> RbResult<Value> {
11
- let series = &self.series.read();
11
+ pub fn to_a(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
12
+ let series = &self_.series.read();
12
13
 
13
- fn to_a_recursive(series: &Series) -> RbResult<Value> {
14
- let ruby = Ruby::get().unwrap();
14
+ fn to_a_recursive(ruby: &Ruby, series: &Series) -> RbResult<Value> {
15
15
  let rblist = match series.dtype() {
16
16
  DataType::Boolean => ruby
17
- .ary_from_iter(series.bool().map_err(RbPolarsErr::from)?)
17
+ .ary_from_iter(series.bool().map_err(RbPolarsErr::from)?.iter())
18
18
  .as_value(),
19
19
  DataType::UInt8 => ruby
20
- .ary_from_iter(series.u8().map_err(RbPolarsErr::from)?)
20
+ .ary_from_iter(series.u8().map_err(RbPolarsErr::from)?.iter())
21
21
  .as_value(),
22
22
  DataType::UInt16 => ruby
23
- .ary_from_iter(series.u16().map_err(RbPolarsErr::from)?)
23
+ .ary_from_iter(series.u16().map_err(RbPolarsErr::from)?.iter())
24
24
  .as_value(),
25
25
  DataType::UInt32 => ruby
26
- .ary_from_iter(series.u32().map_err(RbPolarsErr::from)?)
26
+ .ary_from_iter(series.u32().map_err(RbPolarsErr::from)?.iter())
27
27
  .as_value(),
28
28
  DataType::UInt64 => ruby
29
- .ary_from_iter(series.u64().map_err(RbPolarsErr::from)?)
29
+ .ary_from_iter(series.u64().map_err(RbPolarsErr::from)?.iter())
30
30
  .as_value(),
31
31
  DataType::UInt128 => ruby
32
- .ary_from_iter(series.u128().map_err(RbPolarsErr::from)?)
32
+ .ary_from_iter(series.u128().map_err(RbPolarsErr::from)?.iter())
33
33
  .as_value(),
34
34
  DataType::Int8 => ruby
35
- .ary_from_iter(series.i8().map_err(RbPolarsErr::from)?)
35
+ .ary_from_iter(series.i8().map_err(RbPolarsErr::from)?.iter())
36
36
  .as_value(),
37
37
  DataType::Int16 => ruby
38
- .ary_from_iter(series.i16().map_err(RbPolarsErr::from)?)
38
+ .ary_from_iter(series.i16().map_err(RbPolarsErr::from)?.iter())
39
39
  .as_value(),
40
40
  DataType::Int32 => ruby
41
- .ary_from_iter(series.i32().map_err(RbPolarsErr::from)?)
41
+ .ary_from_iter(series.i32().map_err(RbPolarsErr::from)?.iter())
42
42
  .as_value(),
43
43
  DataType::Int64 => ruby
44
- .ary_from_iter(series.i64().map_err(RbPolarsErr::from)?)
44
+ .ary_from_iter(series.i64().map_err(RbPolarsErr::from)?.iter())
45
45
  .as_value(),
46
46
  DataType::Int128 => ruby
47
- .ary_from_iter(series.i128().map_err(RbPolarsErr::from)?)
47
+ .ary_from_iter(series.i128().map_err(RbPolarsErr::from)?.iter())
48
48
  .as_value(),
49
49
  DataType::Float16 => ruby
50
50
  .ary_from_iter(
@@ -53,14 +53,15 @@ impl RbSeries {
53
53
  .cast(&DataType::Float32)
54
54
  .map_err(RbPolarsErr::from)?
55
55
  .f32()
56
- .map_err(RbPolarsErr::from)?,
56
+ .map_err(RbPolarsErr::from)?
57
+ .iter(),
57
58
  )
58
59
  .as_value(),
59
60
  DataType::Float32 => ruby
60
- .ary_from_iter(series.f32().map_err(RbPolarsErr::from)?)
61
+ .ary_from_iter(series.f32().map_err(RbPolarsErr::from)?.iter())
61
62
  .as_value(),
62
63
  DataType::Float64 => ruby
63
- .ary_from_iter(series.f64().map_err(RbPolarsErr::from)?)
64
+ .ary_from_iter(series.f64().map_err(RbPolarsErr::from)?.iter())
64
65
  .as_value(),
65
66
  DataType::Categorical(_, _) | DataType::Enum(_, _) => {
66
67
  with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
@@ -71,10 +72,7 @@ impl RbSeries {
71
72
  let v = ruby.ary_new_capa(series.len());
72
73
  for i in 0..series.len() {
73
74
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
74
- match obj {
75
- Some(val) => v.push(val.to_value()).unwrap(),
76
- None => v.push(ruby.qnil()).unwrap(),
77
- };
75
+ v.push(obj.cloned().into_value_with(ruby))?;
78
76
  }
79
77
  v.as_value()
80
78
  }
@@ -87,7 +85,7 @@ impl RbSeries {
87
85
  v.push(ruby.qnil()).unwrap();
88
86
  }
89
87
  Some(s) => {
90
- let rblst = to_a_recursive(s.as_ref())?;
88
+ let rblst = to_a_recursive(ruby, s.as_ref())?;
91
89
  v.push(rblst)?;
92
90
  }
93
91
  }
@@ -103,7 +101,7 @@ impl RbSeries {
103
101
  v.push(ruby.qnil()).unwrap();
104
102
  }
105
103
  Some(s) => {
106
- let rblst = to_a_recursive(s.as_ref())?;
104
+ let rblst = to_a_recursive(ruby, s.as_ref())?;
107
105
  v.push(rblst)?;
108
106
  }
109
107
  }
@@ -112,35 +110,35 @@ impl RbSeries {
112
110
  }
113
111
  DataType::Date => {
114
112
  let ca = series.date().map_err(RbPolarsErr::from)?;
115
- return Ok(Wrap(ca).into_value_with(&ruby));
113
+ return Wrap(ca).try_into_value_with(ruby);
116
114
  }
117
115
  DataType::Time => {
118
116
  let ca = series.time().map_err(RbPolarsErr::from)?;
119
- return Ok(Wrap(ca).into_value_with(&ruby));
117
+ return Wrap(ca).try_into_value_with(ruby);
120
118
  }
121
119
  DataType::Datetime(_, _) => {
122
120
  let ca = series.datetime().map_err(RbPolarsErr::from)?;
123
- return Ok(Wrap(ca).into_value_with(&ruby));
121
+ return Wrap(ca).try_into_value_with(ruby);
124
122
  }
125
123
  DataType::Decimal(_, _) => {
126
124
  let ca = series.decimal().map_err(RbPolarsErr::from)?;
127
- return Ok(Wrap(ca).into_value_with(&ruby));
125
+ return Wrap(ca).try_into_value_with(ruby);
128
126
  }
129
127
  DataType::String => {
130
128
  let ca = series.str().map_err(RbPolarsErr::from)?;
131
- return Ok(Wrap(ca).into_value_with(&ruby));
129
+ return Wrap(ca).try_into_value_with(ruby);
132
130
  }
133
131
  DataType::Struct(_) => {
134
132
  let ca = series.struct_().map_err(RbPolarsErr::from)?;
135
- return Ok(Wrap(ca).into_value_with(&ruby));
133
+ return Wrap(ca).try_into_value_with(ruby);
136
134
  }
137
135
  DataType::Duration(_) => {
138
136
  let ca = series.duration().map_err(RbPolarsErr::from)?;
139
- return Ok(Wrap(ca).into_value_with(&ruby));
137
+ return Wrap(ca).try_into_value_with(ruby);
140
138
  }
141
139
  DataType::Binary => {
142
140
  let ca = series.binary().map_err(RbPolarsErr::from)?;
143
- return Ok(Wrap(ca).into_value_with(&ruby));
141
+ return Wrap(ca).try_into_value_with(ruby);
144
142
  }
145
143
  DataType::Null => {
146
144
  let null: Option<u8> = None;
@@ -163,10 +161,7 @@ impl RbSeries {
163
161
  }
164
162
  impl ExactSizeIterator for NullIter {}
165
163
 
166
- Ruby::get()
167
- .unwrap()
168
- .ary_from_iter(NullIter { iter, n })
169
- .as_value()
164
+ ruby.ary_from_iter(NullIter { iter, n }).as_value()
170
165
  }
171
166
  DataType::Unknown(_) => {
172
167
  panic!("to_a not implemented for unknown")
@@ -175,12 +170,12 @@ impl RbSeries {
175
170
  unreachable!()
176
171
  }
177
172
  DataType::Extension(_, _) => {
178
- return to_a_recursive(series.ext().unwrap().storage());
173
+ return to_a_recursive(ruby, series.ext().unwrap().storage());
179
174
  }
180
175
  };
181
176
  Ok(rblist.as_value())
182
177
  }
183
178
 
184
- to_a_recursive(series)
179
+ to_a_recursive(ruby, series)
185
180
  }
186
181
  }
@@ -10,6 +10,7 @@ use crate::ruby::exceptions::{RbIndexError, RbRuntimeError, RbValueError};
10
10
  use crate::ruby::gvl::GvlExt;
11
11
  use crate::ruby::plan_callback::PlanCallbackExt;
12
12
  use crate::ruby::ruby_function::RubyObject;
13
+ use crate::ruby::utils::TryIntoValue;
13
14
  use crate::utils::EnterPolarsExt;
14
15
  use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
15
16
 
@@ -112,7 +113,7 @@ impl RbSeries {
112
113
  let rbseries = RbSeries::new(s);
113
114
  rb_modules::pl_utils(ruby).funcall("wrap_s", (rbseries,))
114
115
  }
115
- _ => Ok(Wrap(av).into_value_with(ruby)),
116
+ _ => Wrap(av).try_into_value_with(ruby),
116
117
  }
117
118
  }
118
119
 
@@ -156,8 +157,8 @@ impl RbSeries {
156
157
  self.series.write().rename(name.into());
157
158
  }
158
159
 
159
- pub fn dtype(rb: &Ruby, self_: &Self) -> Value {
160
- Wrap(self_.series.read().dtype().clone()).into_value_with(rb)
160
+ pub fn dtype(rb: &Ruby, self_: &Self) -> RbResult<Value> {
161
+ Wrap(self_.series.read().dtype().clone()).try_into_value_with(rb)
161
162
  }
162
163
 
163
164
  pub fn set_sorted_flag(&self, descending: bool) -> Self {
@@ -4,6 +4,7 @@ use super::RbSeries;
4
4
  use crate::map::series::ApplyLambdaGeneric;
5
5
  use crate::prelude::*;
6
6
  use crate::ruby::gvl::GvlExt;
7
+ use crate::ruby::utils::TryIntoValue;
7
8
  use crate::series::construction::series_from_objects;
8
9
  use crate::{RbPolarsErr, RbResult};
9
10
  use crate::{apply_all_polars_dtypes, raise_err};
@@ -78,7 +79,7 @@ fn call_and_collect_objects<T, I>(
78
79
  skip_nulls: bool,
79
80
  ) -> RbResult<Series>
80
81
  where
81
- T: IntoValue,
82
+ T: TryIntoValue,
82
83
  I: Iterator<Item = Option<T>>,
83
84
  {
84
85
  let mut objects = Vec::with_capacity(len);
@@ -91,7 +92,7 @@ where
91
92
  continue;
92
93
  }
93
94
  None => rb.qnil().into_value_with(rb),
94
- Some(val) => val.into_value_with(rb),
95
+ Some(val) => val.try_into_value_with(rb)?,
95
96
  };
96
97
  let out: Value = lambda.funcall("call", (arg,))?;
97
98
  objects.push(ObjectValue {
@@ -139,23 +139,23 @@ fn scatter_impl(
139
139
  with_match_physical_numeric_polars_type!(dt, |$T| {
140
140
  let ca: &mut ChunkedArray<$T> = mutable_s.as_mut();
141
141
  let values: &ChunkedArray<$T> = values.as_ref().as_ref();
142
- ca.scatter(idx, values)
142
+ ca.scatter(idx, values.iter())
143
143
  })
144
144
  }
145
145
  DataType::Boolean => {
146
146
  let ca: &mut ChunkedArray<BooleanType> = mutable_s.as_mut();
147
147
  let values = values.bool()?;
148
- ca.scatter(idx, values)
148
+ ca.scatter(idx, values.iter())
149
149
  }
150
150
  DataType::Binary => {
151
151
  let ca: &mut ChunkedArray<BinaryType> = mutable_s.as_mut();
152
152
  let values = values.binary()?;
153
- ca.scatter(idx, values)
153
+ ca.scatter(idx, values.iter())
154
154
  }
155
155
  DataType::String => {
156
156
  let ca: &mut ChunkedArray<StringType> = mutable_s.as_mut();
157
157
  let values = values.str()?;
158
- ca.scatter(idx, values)
158
+ ca.scatter(idx, values.iter())
159
159
  }
160
160
  _ => Err(PolarsError::ComputeError(
161
161
  format!("not yet implemented for dtype: {logical_dtype}").into(),
@@ -16,7 +16,6 @@ macro_rules! apply_all_polars_dtypes {
16
16
  ($self:expr, $method:ident, $($args:expr),*) => {
17
17
  match $self.dtype() {
18
18
  DataType::Boolean => $self.bool().unwrap().$method($($args),*),
19
- DataType::String => $self.str().unwrap().$method($($args),*),
20
19
  DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
21
20
  DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
22
21
  DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
@@ -27,15 +26,40 @@ macro_rules! apply_all_polars_dtypes {
27
26
  DataType::Int32 => $self.i32().unwrap().$method($($args),*),
28
27
  DataType::Int64 => $self.i64().unwrap().$method($($args),*),
29
28
  DataType::Int128 => $self.i128().unwrap().$method($($args),*),
30
- DataType::Float16 => todo!(),
29
+ DataType::Float16 => $self.cast(&DataType::Float32).unwrap().f32().unwrap().$method($($args),*),
31
30
  DataType::Float32 => $self.f32().unwrap().$method($($args),*),
32
31
  DataType::Float64 => $self.f64().unwrap().$method($($args),*),
33
- DataType::Date => $self.date().unwrap().physical().$method($($args),*),
34
- DataType::Datetime(_, _) => $self.datetime().unwrap().physical().$method($($args),*),
35
- // TODO implement
36
- // DataType::List(_) => $self.list().unwrap().$method($($args),*),
32
+ DataType::String => $self.str().unwrap().$method($($args),*),
33
+ DataType::Binary => $self.binary().unwrap().$method($($args),*),
34
+ DataType::Decimal(_, _) => $self.decimal().unwrap().$method($($args),*),
35
+
36
+ DataType::Date => $self.date().unwrap().$method($($args),*),
37
+ DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
38
+ DataType::Duration(_) => $self.duration().unwrap().$method($($args),*),
39
+ DataType::Time => $self.time().unwrap().$method($($args),*),
40
+
41
+ DataType::List(_) => $self.list().unwrap().$method($($args),*),
37
42
  DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
38
- dt => panic!("dtype {:?} not supported", dt)
43
+ DataType::Array(_, _) => $self.array().unwrap().$method($($args),*),
44
+
45
+ dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => match dt.cat_physical().unwrap() {
46
+ CategoricalPhysical::U8 => $self.cat8().unwrap().$method($($args),*),
47
+ CategoricalPhysical::U16 => $self.cat16().unwrap().$method($($args),*),
48
+ CategoricalPhysical::U32 => $self.cat32().unwrap().$method($($args),*),
49
+ },
50
+
51
+ DataType::Object(_) => {
52
+ $self
53
+ .as_any()
54
+ .downcast_ref::<ObjectChunked<ObjectValue>>()
55
+ .unwrap()
56
+ .$method($($args),*)
57
+ },
58
+ DataType::Extension(_, _) => $self.ext().unwrap().$method($($args),*),
59
+
60
+ DataType::Null => $self.null().unwrap().$method($($args),*),
61
+
62
+ dt @ (DataType::BinaryOffset | DataType::Unknown(_)) => panic!("dtype {:?} not supported", dt)
39
63
  }
40
64
  }
41
65
  }
@@ -377,7 +377,9 @@ module Polars
377
377
  # # │ [1, 2] │
378
378
  # # └───────────┘
379
379
  def unique(maintain_order: false)
380
- Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
380
+ eval(
381
+ F.element.unique(maintain_order: maintain_order), as_list: true
382
+ )
381
383
  end
382
384
 
383
385
  # Count the number of unique values in every sub-arrays.
@@ -403,7 +405,7 @@ module Polars
403
405
  # # │ [2, 3, 4] ┆ 3 │
404
406
  # # └───────────────┴──────────┘
405
407
  def n_unique
406
- Utils.wrap_expr(_rbexpr.arr_n_unique)
408
+ agg(F.element.n_unique)
407
409
  end
408
410
 
409
411
  # Convert an Array column into a List column with the same inner data type.
@@ -432,6 +434,13 @@ module Polars
432
434
 
433
435
  # Evaluate whether any boolean value is true for every subarray.
434
436
  #
437
+ # @param ignore_nulls [Boolean]
438
+ # * If set to `true` (default), null values are ignored. If there
439
+ # are no non-null values, the output is `false`.
440
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
441
+ # if the column contains any null values and no `true` values,
442
+ # the output is null.
443
+ #
435
444
  # @return [Expr]
436
445
  #
437
446
  # @example
@@ -461,12 +470,19 @@ module Polars
461
470
  # # │ [null, null] ┆ false │
462
471
  # # │ null ┆ null │
463
472
  # # └────────────────┴───────┘
464
- def any
465
- Utils.wrap_expr(_rbexpr.arr_any)
473
+ def any(ignore_nulls: true)
474
+ agg(F.element.any(ignore_nulls: ignore_nulls))
466
475
  end
467
476
 
468
477
  # Evaluate whether all boolean values are true for every subarray.
469
478
  #
479
+ # @param ignore_nulls [Boolean]
480
+ # * If set to `true` (default), null values are ignored. If there
481
+ # are no non-null values, the output is `true`.
482
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
483
+ # if the column contains any null values and no `false` values,
484
+ # the output is null.
485
+ #
470
486
  # @return [Expr]
471
487
  #
472
488
  # @example
@@ -496,8 +512,8 @@ module Polars
496
512
  # # │ [null, null] ┆ true │
497
513
  # # │ null ┆ null │
498
514
  # # └────────────────┴───────┘
499
- def all
500
- Utils.wrap_expr(_rbexpr.arr_all)
515
+ def all(ignore_nulls: true)
516
+ agg(F.element.all(ignore_nulls: ignore_nulls))
501
517
  end
502
518
 
503
519
  # Sort the arrays in this column.
@@ -567,7 +583,7 @@ module Polars
567
583
  # # │ [9, 1, 2] ┆ [2, 1, 9] │
568
584
  # # └───────────────┴───────────────┘
569
585
  def reverse
570
- Utils.wrap_expr(_rbexpr.arr_reverse)
586
+ eval(F.element.reverse)
571
587
  end
572
588
 
573
589
  # Retrieve the index of the minimal value in every sub-array.
@@ -212,6 +212,13 @@ module Polars
212
212
 
213
213
  # Evaluate whether any boolean value is true for every subarray.
214
214
  #
215
+ # @param ignore_nulls [Boolean]
216
+ # * If set to `true` (default), null values are ignored. If there
217
+ # are no non-null values, the output is `false`.
218
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
219
+ # if the column contains any null values and no `true` values,
220
+ # the output is null.
221
+ #
215
222
  # @return [Series]
216
223
  #
217
224
  # @example
@@ -230,7 +237,7 @@ module Polars
230
237
  # # false
231
238
  # # null
232
239
  # # ]
233
- def any
240
+ def any(ignore_nulls: true)
234
241
  super
235
242
  end
236
243
 
@@ -378,6 +385,13 @@ module Polars
378
385
 
379
386
  # Evaluate whether all boolean values are true for every subarray.
380
387
  #
388
+ # @param ignore_nulls [Boolean]
389
+ # * If set to `true` (default), null values are ignored. If there
390
+ # are no non-null values, the output is `true`.
391
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
392
+ # if the column contains any null values and no `false` values,
393
+ # the output is null.
394
+ #
381
395
  # @return [Series]
382
396
  #
383
397
  # @example
@@ -396,7 +410,7 @@ module Polars
396
410
  # # true
397
411
  # # null
398
412
  # # ]
399
- def all
413
+ def all(ignore_nulls: true)
400
414
  super
401
415
  end
402
416
 
@@ -226,6 +226,38 @@ module Polars
226
226
  super
227
227
  end
228
228
 
229
+ # Get the byte value at the given index.
230
+ #
231
+ # For example, index `0` would return the first byte of every binary value
232
+ # and index `-1` would return the last byte of every binary value.
233
+ # The behavior if an index is out of bounds is determined by the argument
234
+ # `null_on_oob`.
235
+ #
236
+ # @param index [Object]
237
+ # Index to return per binary value
238
+ # @param null_on_oob [Boolean]
239
+ # Behavior if an index is out of bounds:
240
+ #
241
+ # * true -> set as null
242
+ # * false -> raise an error
243
+ #
244
+ # @return [Series]
245
+ #
246
+ # @example
247
+ # s = Polars::Series.new("a", ["\x01\x02\x03".b, "".b, "\x04\x05".b])
248
+ # s.bin.get(0, null_on_oob: true)
249
+ # # =>
250
+ # # shape: (3,)
251
+ # # Series: 'a' [u8]
252
+ # # [
253
+ # # 1
254
+ # # null
255
+ # # 4
256
+ # # ]
257
+ def get(index, null_on_oob: false)
258
+ super
259
+ end
260
+
229
261
  # Take the first `n` bytes of the binary values.
230
262
  #
231
263
  # @param n [Object]
@@ -3808,6 +3808,57 @@ module Polars
3808
3808
  .collect(optimizations: QueryOptFlags._eager)
3809
3809
  end
3810
3810
 
3811
+ # Selects rows from this DataFrame at the given indices.
3812
+ #
3813
+ # @note
3814
+ # This functionality is experimental. It may be
3815
+ # changed at any point without it being considered a breaking change.
3816
+ #
3817
+ # @param indices [Object]
3818
+ # The indices of the rows to select.
3819
+ # @param null_on_oob [Boolean]
3820
+ # If true when an index is out-of-bounds a null row will be generated
3821
+ # instead of raising an error.
3822
+ #
3823
+ # @return [DataFrame]
3824
+ #
3825
+ # @example
3826
+ # df = Polars::DataFrame.new({"x" => [2, 1, 0], "s" => ["foo", "bar", "baz"]})
3827
+ # df.gather([2, 0, 0])
3828
+ # # =>
3829
+ # # shape: (3, 2)
3830
+ # # ┌─────┬─────┐
3831
+ # # │ x ┆ s │
3832
+ # # │ --- ┆ --- │
3833
+ # # │ i64 ┆ str │
3834
+ # # ╞═════╪═════╡
3835
+ # # │ 0 ┆ baz │
3836
+ # # │ 2 ┆ foo │
3837
+ # # │ 2 ┆ foo │
3838
+ # # └─────┴─────┘
3839
+ #
3840
+ # @example
3841
+ # df.gather([0, 10, 1], null_on_oob: true)
3842
+ # # =>
3843
+ # # shape: (3, 2)
3844
+ # # ┌──────┬──────┐
3845
+ # # │ x ┆ s │
3846
+ # # │ --- ┆ --- │
3847
+ # # │ i64 ┆ str │
3848
+ # # ╞══════╪══════╡
3849
+ # # │ 2 ┆ foo │
3850
+ # # │ null ┆ null │
3851
+ # # │ 1 ┆ bar │
3852
+ # # └──────┴──────┘
3853
+ def gather(
3854
+ indices,
3855
+ null_on_oob: false
3856
+ )
3857
+ lazy
3858
+ .gather(indices, null_on_oob: null_on_oob)
3859
+ .collect(optimizations: QueryOptFlags._eager)
3860
+ end
3861
+
3811
3862
  # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
3812
3863
  #
3813
3864
  # The UDF will receive each row as a tuple of values: `udf(row)`.
@@ -4527,6 +4578,8 @@ module Polars
4527
4578
  # @param separator [String]
4528
4579
  # Used as separator/delimiter in generated column names in case of multiple
4529
4580
  # `values` columns.
4581
+ # @param column_naming ['auto', 'combine']
4582
+ # How resulting column names will be constructed.
4530
4583
  #
4531
4584
  # @return [DataFrame]
4532
4585
  #
@@ -4557,7 +4610,8 @@ module Polars
4557
4610
  aggregate_function: nil,
4558
4611
  maintain_order: true,
4559
4612
  sort_columns: false,
4560
- separator: "_"
4613
+ separator: "_",
4614
+ column_naming: "auto"
4561
4615
  )
4562
4616
  if on_columns.nil?
4563
4617
  cols = select(on).unique(maintain_order: true)
@@ -4577,7 +4631,8 @@ module Polars
4577
4631
  values: values,
4578
4632
  aggregate_function: aggregate_function,
4579
4633
  maintain_order: maintain_order,
4580
- separator: separator
4634
+ separator: separator,
4635
+ column_naming: column_naming
4581
4636
  )
4582
4637
  .collect(optimizations: QueryOptFlags._eager)
4583
4638
  end
@@ -4593,7 +4648,8 @@ module Polars
4593
4648
  #
4594
4649
  # @param on [Object]
4595
4650
  # Column(s) or selector(s) to use as values variables; if `on`
4596
- # is empty all columns that are not in `index` will be used.
4651
+ # is empty no columns will be used. If set to `nil` (default)
4652
+ # all columns that are not in `index` will be used.
4597
4653
  # @param index [Object]
4598
4654
  # Column(s) or selector(s) to use as identifier variables.
4599
4655
  # @param variable_name [Object]
@@ -4627,7 +4683,7 @@ module Polars
4627
4683
  # # │ z ┆ c ┆ 6 │
4628
4684
  # # └─────┴──────────┴───────┘
4629
4685
  def unpivot(on = nil, index: nil, variable_name: nil, value_name: nil)
4630
- on = on.nil? ? [] : Utils._expand_selectors(self, on)
4686
+ on = on.nil? ? nil : Utils._expand_selectors(self, on)
4631
4687
  index = index.nil? ? [] : Utils._expand_selectors(self, index)
4632
4688
 
4633
4689
  _from_rbdf(_df.unpivot(on, index, value_name, variable_name))
@@ -6488,7 +6544,7 @@ module Polars
6488
6544
  # # │ foo ┆ 1 ┆ a ┆ true ┆ [1, 2] ┆ baz │
6489
6545
  # # │ bar ┆ 2 ┆ b ┆ null ┆ [3] ┆ womp │
6490
6546
  # # └────────┴─────┴─────┴──────┴───────────┴───────┘
6491
- def unnest(columns, *more_columns, separator: nil)
6547
+ def unnest(columns = nil, *more_columns, separator: nil)
6492
6548
  lazy.unnest(columns, *more_columns, separator: separator).collect(optimizations: QueryOptFlags._eager)
6493
6549
  end
6494
6550
 
@@ -6504,6 +6560,10 @@ module Polars
6504
6560
  # Other DataFrame that must be merged
6505
6561
  # @param key [String]
6506
6562
  # Key that is sorted.
6563
+ # @param maintain_order [Boolean]
6564
+ # If `true`, the output is guaranteed to have left-biased ordering
6565
+ # for equal keys: rows from the left frame appear before rows from
6566
+ # the right frame when their keys are equal.
6507
6567
  #
6508
6568
  # @return [DataFrame]
6509
6569
  #
@@ -6530,8 +6590,8 @@ module Polars
6530
6590
  # # │ steve ┆ 42 │
6531
6591
  # # │ elise ┆ 44 │
6532
6592
  # # └────────┴─────┘
6533
- def merge_sorted(other, key)
6534
- lazy.merge_sorted(other.lazy, key).collect(optimizations: QueryOptFlags._eager)
6593
+ def merge_sorted(other, key, maintain_order: false)
6594
+ lazy.merge_sorted(other.lazy, key, maintain_order: maintain_order).collect(optimizations: QueryOptFlags._eager)
6535
6595
  end
6536
6596
 
6537
6597
  # Flag a column as sorted.
@@ -6545,14 +6605,17 @@ module Polars
6545
6605
  # Column that is sorted.
6546
6606
  # @param descending [Boolean]
6547
6607
  # Whether the column is sorted in descending order.
6608
+ # @param nulls_last [Boolean]
6609
+ # Whether the nulls are at the end.
6548
6610
  #
6549
6611
  # @return [DataFrame]
6550
6612
  def set_sorted(
6551
6613
  column,
6552
- descending: false
6614
+ descending: false,
6615
+ nulls_last: false
6553
6616
  )
6554
6617
  lazy
6555
- .set_sorted(column, descending: descending)
6618
+ .set_sorted(column, descending: descending, nulls_last: nulls_last)
6556
6619
  .collect(optimizations: QueryOptFlags._eager)
6557
6620
  end
6558
6621
 
@@ -7111,7 +7174,7 @@ module Polars
7111
7174
  end
7112
7175
 
7113
7176
  def _select_rows_by_slice(df, key)
7114
- return Slice.new(df).apply(key)
7177
+ Slice.new(df).apply(key)
7115
7178
  end
7116
7179
 
7117
7180
  def _select_rows_by_index(df, key)