polars-df 0.25.0 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +33 -0
  3. data/Cargo.lock +270 -97
  4. data/LICENSE.txt +1 -1
  5. data/README.md +1 -3
  6. data/ext/polars/Cargo.toml +19 -18
  7. data/ext/polars/src/catalog/unity.rs +15 -20
  8. data/ext/polars/src/conversion/any_value.rs +53 -29
  9. data/ext/polars/src/conversion/chunked_array.rs +58 -56
  10. data/ext/polars/src/conversion/datetime.rs +58 -7
  11. data/ext/polars/src/conversion/mod.rs +200 -150
  12. data/ext/polars/src/dataframe/export.rs +15 -12
  13. data/ext/polars/src/dataframe/general.rs +25 -7
  14. data/ext/polars/src/dataframe/map.rs +6 -4
  15. data/ext/polars/src/error.rs +1 -1
  16. data/ext/polars/src/expr/array.rs +0 -24
  17. data/ext/polars/src/expr/datatype.rs +13 -3
  18. data/ext/polars/src/expr/datetime.rs +4 -4
  19. data/ext/polars/src/expr/general.rs +35 -15
  20. data/ext/polars/src/expr/list.rs +0 -26
  21. data/ext/polars/src/expr/rolling.rs +24 -0
  22. data/ext/polars/src/functions/business.rs +2 -2
  23. data/ext/polars/src/functions/io.rs +4 -3
  24. data/ext/polars/src/functions/lazy.rs +65 -46
  25. data/ext/polars/src/functions/meta.rs +6 -5
  26. data/ext/polars/src/functions/mod.rs +0 -1
  27. data/ext/polars/src/functions/range.rs +13 -0
  28. data/ext/polars/src/functions/utils.rs +4 -2
  29. data/ext/polars/src/interop/arrow/mod.rs +4 -2
  30. data/ext/polars/src/interop/arrow/to_rb.rs +1 -1
  31. data/ext/polars/src/interop/numo/to_numo_series.rs +26 -25
  32. data/ext/polars/src/io/scan_options.rs +6 -3
  33. data/ext/polars/src/io/sink_options.rs +2 -0
  34. data/ext/polars/src/lazyframe/general.rs +243 -17
  35. data/ext/polars/src/lazyframe/optflags.rs +2 -1
  36. data/ext/polars/src/lib.rs +39 -35
  37. data/ext/polars/src/map/lazy.rs +5 -2
  38. data/ext/polars/src/map/series.rs +19 -18
  39. data/ext/polars/src/on_startup.rs +25 -6
  40. data/ext/polars/src/ruby/numo.rs +3 -4
  41. data/ext/polars/src/ruby/plan_callback.rs +1 -4
  42. data/ext/polars/src/ruby/rb_modules.rs +2 -4
  43. data/ext/polars/src/ruby/ruby_udf.rs +7 -9
  44. data/ext/polars/src/ruby/utils.rs +12 -1
  45. data/ext/polars/src/series/aggregation.rs +13 -1
  46. data/ext/polars/src/series/construction.rs +31 -50
  47. data/ext/polars/src/series/export.rs +33 -38
  48. data/ext/polars/src/series/general.rs +6 -6
  49. data/ext/polars/src/series/map.rs +3 -2
  50. data/ext/polars/src/series/scatter.rs +4 -4
  51. data/ext/polars/src/utils.rs +31 -7
  52. data/lib/polars/array_expr.rb +23 -7
  53. data/lib/polars/array_name_space.rb +16 -2
  54. data/lib/polars/binary_name_space.rb +32 -0
  55. data/lib/polars/collect_batches.rb +4 -0
  56. data/lib/polars/data_frame.rb +144 -11
  57. data/lib/polars/data_type_group.rb +5 -0
  58. data/lib/polars/date_time_expr.rb +91 -3
  59. data/lib/polars/date_time_name_space.rb +7 -1
  60. data/lib/polars/expr.rb +247 -44
  61. data/lib/polars/functions/business.rb +2 -2
  62. data/lib/polars/functions/datatype.rb +30 -0
  63. data/lib/polars/functions/eager.rb +80 -7
  64. data/lib/polars/functions/lazy.rb +97 -2
  65. data/lib/polars/functions/range/linear_space.rb +118 -0
  66. data/lib/polars/io/csv.rb +27 -5
  67. data/lib/polars/io/database.rb +2 -3
  68. data/lib/polars/io/ipc.rb +2 -2
  69. data/lib/polars/io/lines.rb +172 -0
  70. data/lib/polars/io/parquet.rb +1 -1
  71. data/lib/polars/io/sink_options.rb +5 -2
  72. data/lib/polars/lazy_frame.rb +517 -14
  73. data/lib/polars/list_expr.rb +21 -7
  74. data/lib/polars/list_name_space.rb +16 -2
  75. data/lib/polars/query_opt_flags.rb +23 -5
  76. data/lib/polars/selectors.rb +2 -2
  77. data/lib/polars/series.rb +176 -19
  78. data/lib/polars/sql_context.rb +2 -2
  79. data/lib/polars/string_cache.rb +19 -72
  80. data/lib/polars/string_expr.rb +1 -7
  81. data/lib/polars/string_name_space.rb +1 -7
  82. data/lib/polars/utils/construction/series.rb +24 -39
  83. data/lib/polars/utils/convert.rb +16 -6
  84. data/lib/polars/utils/parse.rb +7 -0
  85. data/lib/polars/utils/reduce_balanced.rb +43 -0
  86. data/lib/polars/utils/various.rb +5 -0
  87. data/lib/polars/version.rb +1 -1
  88. data/lib/polars.rb +2 -1
  89. metadata +4 -17
  90. data/ext/polars/src/functions/string_cache.rs +0 -24
@@ -4,9 +4,10 @@ use num_traits::AsPrimitive;
4
4
  use polars::prelude::*;
5
5
 
6
6
  use crate::any_value::rb_object_to_any_value;
7
- use crate::conversion::{Wrap, slice_extract_wrapped, vec_extract_wrapped};
7
+ use crate::conversion::Wrap;
8
8
  use crate::prelude::ObjectValue;
9
- use crate::{RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
9
+ use crate::ruby::gvl::GvlExt;
10
+ use crate::{RbResult, RbSeries, RbTypeError, RbValueError};
10
11
 
11
12
  pub fn series_from_objects(rb: &Ruby, name: PlSmallStr, objects: Vec<ObjectValue>) -> Series {
12
13
  let mut validity = BitmapBuilder::with_capacity(objects.len());
@@ -107,21 +108,26 @@ impl RbSeries {
107
108
  }
108
109
  }
109
110
 
110
- fn vec_wrap_any_value<'s>(arr: RArray) -> RbResult<Vec<Wrap<AnyValue<'s>>>> {
111
- let mut val = Vec::with_capacity(arr.len());
112
- for v in arr.into_iter() {
113
- val.push(Wrap::<AnyValue<'s>>::try_convert(v)?);
114
- }
115
- Ok(val)
111
+ fn convert_to_avs(
112
+ values: RArray,
113
+ strict: bool,
114
+ allow_object: bool,
115
+ ) -> RbResult<Vec<AnyValue<'static>>> {
116
+ values
117
+ .into_iter()
118
+ .map(|v| rb_object_to_any_value(v, strict, allow_object))
119
+ .collect()
116
120
  }
117
121
 
118
122
  impl RbSeries {
119
123
  pub fn new_from_any_values(name: String, values: RArray, strict: bool) -> RbResult<Self> {
120
- let any_values_result = vec_wrap_any_value(values);
121
- // from anyvalues is fallible
124
+ let any_values_result = values
125
+ .into_iter()
126
+ .map(|v| rb_object_to_any_value(v, strict, true))
127
+ .collect::<RbResult<Vec<AnyValue>>>();
128
+
122
129
  let result = any_values_result.and_then(|avs| {
123
- let avs = slice_extract_wrapped(&avs);
124
- let s = Series::from_any_values(name.clone().into(), avs, strict).map_err(|e| {
130
+ let s = Series::from_any_values(name.clone().into(), avs.as_slice(), strict).map_err(|e| {
125
131
  RbTypeError::new_err(format!(
126
132
  "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
127
133
  ))
@@ -131,7 +137,7 @@ impl RbSeries {
131
137
 
132
138
  // Fall back to Object type for non-strict construction.
133
139
  if !strict && result.is_err() {
134
- return Self::new_object(name, values, strict);
140
+ return Ruby::attach(|rb| Self::new_object(rb, name, values, strict));
135
141
  }
136
142
 
137
143
  result
@@ -143,21 +149,13 @@ impl RbSeries {
143
149
  dtype: Wrap<DataType>,
144
150
  strict: bool,
145
151
  ) -> RbResult<Self> {
146
- let any_values = values
147
- .into_iter()
148
- .map(|v| rb_object_to_any_value(v, strict))
149
- .collect::<RbResult<Vec<AnyValue>>>()?;
150
- let s = Series::from_any_values_and_dtype(
151
- name.into(),
152
- any_values.as_slice(),
153
- &dtype.0,
154
- strict,
155
- )
156
- .map_err(|e| {
157
- RbTypeError::new_err(format!(
158
- "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
159
- ))
160
- })?;
152
+ let avs = convert_to_avs(values, strict, false)?;
153
+ let s = Series::from_any_values_and_dtype(name.into(), avs.as_slice(), &dtype.0, strict)
154
+ .map_err(|e| {
155
+ RbTypeError::new_err(format!(
156
+ "{e}\n\nHint: Try setting `strict: false` to allow passing data with mixed types."
157
+ ))
158
+ })?;
161
159
  Ok(s.into())
162
160
  }
163
161
 
@@ -204,7 +202,7 @@ impl RbSeries {
204
202
  Ok(Series::new_null(name.into(), len).into())
205
203
  }
206
204
 
207
- pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
205
+ pub fn new_object(_rb: &Ruby, name: String, val: RArray, _strict: bool) -> RbResult<Self> {
208
206
  let val = val
209
207
  .into_iter()
210
208
  .map(ObjectValue::from)
@@ -233,29 +231,12 @@ impl RbSeries {
233
231
  }
234
232
 
235
233
  pub fn new_array(
236
- width: usize,
237
- inner: Option<Wrap<DataType>>,
238
234
  name: String,
239
- val: RArray,
240
- _strict: bool,
235
+ values: RArray,
236
+ strict: bool,
237
+ dtype: Wrap<DataType>,
241
238
  ) -> RbResult<Self> {
242
- let val = vec_wrap_any_value(val)?;
243
- let val = vec_extract_wrapped(val);
244
- let out = Series::new(name.into(), &val);
245
- match out.dtype() {
246
- DataType::List(list_inner) => {
247
- let out = out
248
- .cast(&DataType::Array(
249
- Box::new(inner.map(|dt| dt.0).unwrap_or(*list_inner.clone())),
250
- width,
251
- ))
252
- .map_err(RbPolarsErr::from)?;
253
- Ok(out.into())
254
- }
255
- _ => Err(RbValueError::new_err(
256
- "could not create Array from input".to_string(),
257
- )),
258
- }
239
+ Self::new_from_any_values_and_dtype(name, values, dtype, strict)
259
240
  }
260
241
 
261
242
  pub fn new_decimal(name: String, values: RArray, strict: bool) -> RbResult<Self> {
@@ -2,49 +2,49 @@ use magnus::{IntoValue, Ruby, Value, value::ReprValue};
2
2
  use polars_core::prelude::*;
3
3
 
4
4
  use crate::prelude::*;
5
+ use crate::ruby::utils::TryIntoValue;
5
6
  use crate::{RbPolarsErr, RbResult, RbSeries};
6
7
 
7
8
  impl RbSeries {
8
9
  /// Convert this Series to a Ruby array.
9
10
  /// This operation copies data.
10
- pub fn to_a(&self) -> RbResult<Value> {
11
- let series = &self.series.read();
11
+ pub fn to_a(ruby: &Ruby, self_: &Self) -> RbResult<Value> {
12
+ let series = &self_.series.read();
12
13
 
13
- fn to_a_recursive(series: &Series) -> RbResult<Value> {
14
- let ruby = Ruby::get().unwrap();
14
+ fn to_a_recursive(ruby: &Ruby, series: &Series) -> RbResult<Value> {
15
15
  let rblist = match series.dtype() {
16
16
  DataType::Boolean => ruby
17
- .ary_from_iter(series.bool().map_err(RbPolarsErr::from)?)
17
+ .ary_from_iter(series.bool().map_err(RbPolarsErr::from)?.iter())
18
18
  .as_value(),
19
19
  DataType::UInt8 => ruby
20
- .ary_from_iter(series.u8().map_err(RbPolarsErr::from)?)
20
+ .ary_from_iter(series.u8().map_err(RbPolarsErr::from)?.iter())
21
21
  .as_value(),
22
22
  DataType::UInt16 => ruby
23
- .ary_from_iter(series.u16().map_err(RbPolarsErr::from)?)
23
+ .ary_from_iter(series.u16().map_err(RbPolarsErr::from)?.iter())
24
24
  .as_value(),
25
25
  DataType::UInt32 => ruby
26
- .ary_from_iter(series.u32().map_err(RbPolarsErr::from)?)
26
+ .ary_from_iter(series.u32().map_err(RbPolarsErr::from)?.iter())
27
27
  .as_value(),
28
28
  DataType::UInt64 => ruby
29
- .ary_from_iter(series.u64().map_err(RbPolarsErr::from)?)
29
+ .ary_from_iter(series.u64().map_err(RbPolarsErr::from)?.iter())
30
30
  .as_value(),
31
31
  DataType::UInt128 => ruby
32
- .ary_from_iter(series.u128().map_err(RbPolarsErr::from)?)
32
+ .ary_from_iter(series.u128().map_err(RbPolarsErr::from)?.iter())
33
33
  .as_value(),
34
34
  DataType::Int8 => ruby
35
- .ary_from_iter(series.i8().map_err(RbPolarsErr::from)?)
35
+ .ary_from_iter(series.i8().map_err(RbPolarsErr::from)?.iter())
36
36
  .as_value(),
37
37
  DataType::Int16 => ruby
38
- .ary_from_iter(series.i16().map_err(RbPolarsErr::from)?)
38
+ .ary_from_iter(series.i16().map_err(RbPolarsErr::from)?.iter())
39
39
  .as_value(),
40
40
  DataType::Int32 => ruby
41
- .ary_from_iter(series.i32().map_err(RbPolarsErr::from)?)
41
+ .ary_from_iter(series.i32().map_err(RbPolarsErr::from)?.iter())
42
42
  .as_value(),
43
43
  DataType::Int64 => ruby
44
- .ary_from_iter(series.i64().map_err(RbPolarsErr::from)?)
44
+ .ary_from_iter(series.i64().map_err(RbPolarsErr::from)?.iter())
45
45
  .as_value(),
46
46
  DataType::Int128 => ruby
47
- .ary_from_iter(series.i128().map_err(RbPolarsErr::from)?)
47
+ .ary_from_iter(series.i128().map_err(RbPolarsErr::from)?.iter())
48
48
  .as_value(),
49
49
  DataType::Float16 => ruby
50
50
  .ary_from_iter(
@@ -53,14 +53,15 @@ impl RbSeries {
53
53
  .cast(&DataType::Float32)
54
54
  .map_err(RbPolarsErr::from)?
55
55
  .f32()
56
- .map_err(RbPolarsErr::from)?,
56
+ .map_err(RbPolarsErr::from)?
57
+ .iter(),
57
58
  )
58
59
  .as_value(),
59
60
  DataType::Float32 => ruby
60
- .ary_from_iter(series.f32().map_err(RbPolarsErr::from)?)
61
+ .ary_from_iter(series.f32().map_err(RbPolarsErr::from)?.iter())
61
62
  .as_value(),
62
63
  DataType::Float64 => ruby
63
- .ary_from_iter(series.f64().map_err(RbPolarsErr::from)?)
64
+ .ary_from_iter(series.f64().map_err(RbPolarsErr::from)?.iter())
64
65
  .as_value(),
65
66
  DataType::Categorical(_, _) | DataType::Enum(_, _) => {
66
67
  with_match_categorical_physical_type!(series.dtype().cat_physical().unwrap(), |$C| {
@@ -71,10 +72,7 @@ impl RbSeries {
71
72
  let v = ruby.ary_new_capa(series.len());
72
73
  for i in 0..series.len() {
73
74
  let obj: Option<&ObjectValue> = series.get_object(i).map(|any| any.into());
74
- match obj {
75
- Some(val) => v.push(val.to_value()).unwrap(),
76
- None => v.push(ruby.qnil()).unwrap(),
77
- };
75
+ v.push(obj.cloned().into_value_with(ruby))?;
78
76
  }
79
77
  v.as_value()
80
78
  }
@@ -87,7 +85,7 @@ impl RbSeries {
87
85
  v.push(ruby.qnil()).unwrap();
88
86
  }
89
87
  Some(s) => {
90
- let rblst = to_a_recursive(s.as_ref())?;
88
+ let rblst = to_a_recursive(ruby, s.as_ref())?;
91
89
  v.push(rblst)?;
92
90
  }
93
91
  }
@@ -103,7 +101,7 @@ impl RbSeries {
103
101
  v.push(ruby.qnil()).unwrap();
104
102
  }
105
103
  Some(s) => {
106
- let rblst = to_a_recursive(s.as_ref())?;
104
+ let rblst = to_a_recursive(ruby, s.as_ref())?;
107
105
  v.push(rblst)?;
108
106
  }
109
107
  }
@@ -112,35 +110,35 @@ impl RbSeries {
112
110
  }
113
111
  DataType::Date => {
114
112
  let ca = series.date().map_err(RbPolarsErr::from)?;
115
- return Ok(Wrap(ca).into_value_with(&ruby));
113
+ return Wrap(ca).try_into_value_with(ruby);
116
114
  }
117
115
  DataType::Time => {
118
116
  let ca = series.time().map_err(RbPolarsErr::from)?;
119
- return Ok(Wrap(ca).into_value_with(&ruby));
117
+ return Wrap(ca).try_into_value_with(ruby);
120
118
  }
121
119
  DataType::Datetime(_, _) => {
122
120
  let ca = series.datetime().map_err(RbPolarsErr::from)?;
123
- return Ok(Wrap(ca).into_value_with(&ruby));
121
+ return Wrap(ca).try_into_value_with(ruby);
124
122
  }
125
123
  DataType::Decimal(_, _) => {
126
124
  let ca = series.decimal().map_err(RbPolarsErr::from)?;
127
- return Ok(Wrap(ca).into_value_with(&ruby));
125
+ return Wrap(ca).try_into_value_with(ruby);
128
126
  }
129
127
  DataType::String => {
130
128
  let ca = series.str().map_err(RbPolarsErr::from)?;
131
- return Ok(Wrap(ca).into_value_with(&ruby));
129
+ return Wrap(ca).try_into_value_with(ruby);
132
130
  }
133
131
  DataType::Struct(_) => {
134
132
  let ca = series.struct_().map_err(RbPolarsErr::from)?;
135
- return Ok(Wrap(ca).into_value_with(&ruby));
133
+ return Wrap(ca).try_into_value_with(ruby);
136
134
  }
137
135
  DataType::Duration(_) => {
138
136
  let ca = series.duration().map_err(RbPolarsErr::from)?;
139
- return Ok(Wrap(ca).into_value_with(&ruby));
137
+ return Wrap(ca).try_into_value_with(ruby);
140
138
  }
141
139
  DataType::Binary => {
142
140
  let ca = series.binary().map_err(RbPolarsErr::from)?;
143
- return Ok(Wrap(ca).into_value_with(&ruby));
141
+ return Wrap(ca).try_into_value_with(ruby);
144
142
  }
145
143
  DataType::Null => {
146
144
  let null: Option<u8> = None;
@@ -163,10 +161,7 @@ impl RbSeries {
163
161
  }
164
162
  impl ExactSizeIterator for NullIter {}
165
163
 
166
- Ruby::get()
167
- .unwrap()
168
- .ary_from_iter(NullIter { iter, n })
169
- .as_value()
164
+ ruby.ary_from_iter(NullIter { iter, n }).as_value()
170
165
  }
171
166
  DataType::Unknown(_) => {
172
167
  panic!("to_a not implemented for unknown")
@@ -175,12 +170,12 @@ impl RbSeries {
175
170
  unreachable!()
176
171
  }
177
172
  DataType::Extension(_, _) => {
178
- return to_a_recursive(series.ext().unwrap().storage());
173
+ return to_a_recursive(ruby, series.ext().unwrap().storage());
179
174
  }
180
175
  };
181
176
  Ok(rblist.as_value())
182
177
  }
183
178
 
184
- to_a_recursive(series)
179
+ to_a_recursive(ruby, series)
185
180
  }
186
181
  }
@@ -10,7 +10,7 @@ use crate::ruby::exceptions::{RbIndexError, RbRuntimeError, RbValueError};
10
10
  use crate::ruby::gvl::GvlExt;
11
11
  use crate::ruby::plan_callback::PlanCallbackExt;
12
12
  use crate::ruby::ruby_function::RubyObject;
13
- use crate::ruby::thread::start_background_ruby_thread;
13
+ use crate::ruby::utils::TryIntoValue;
14
14
  use crate::utils::EnterPolarsExt;
15
15
  use crate::{RbDataFrame, RbErr, RbPolarsErr, RbResult, RbSeries};
16
16
 
@@ -113,7 +113,7 @@ impl RbSeries {
113
113
  let rbseries = RbSeries::new(s);
114
114
  rb_modules::pl_utils(ruby).funcall("wrap_s", (rbseries,))
115
115
  }
116
- _ => Ok(Wrap(av).into_value_with(ruby)),
116
+ _ => Wrap(av).try_into_value_with(ruby),
117
117
  }
118
118
  }
119
119
 
@@ -157,8 +157,8 @@ impl RbSeries {
157
157
  self.series.write().rename(name.into());
158
158
  }
159
159
 
160
- pub fn dtype(rb: &Ruby, self_: &Self) -> Value {
161
- Wrap(self_.series.read().dtype().clone()).into_value_with(rb)
160
+ pub fn dtype(rb: &Ruby, self_: &Self) -> RbResult<Value> {
161
+ Wrap(self_.series.read().dtype().clone()).try_into_value_with(rb)
162
162
  }
163
163
 
164
164
  pub fn set_sorted_flag(&self, descending: bool) -> Self {
@@ -445,10 +445,10 @@ impl RbSeries {
445
445
  width_strat: Wrap<ListToStructWidthStrategy>,
446
446
  name_gen: Option<Value>,
447
447
  ) -> RbResult<Self> {
448
- start_background_ruby_thread(rb);
449
448
  let name_gen = name_gen.map(RubyObject::from);
449
+ // ensure new_ruby is called with GVL
450
+ let get_index_name = name_gen.map(PlanCallback::<usize, String>::new_ruby);
450
451
  rb.enter_polars(|| {
451
- let get_index_name = name_gen.map(PlanCallback::<usize, String>::new_ruby);
452
452
  let get_index_name = get_index_name.map(|f| {
453
453
  NameGenerator(Arc::new(move |i| f.call(i).map(PlSmallStr::from)) as Arc<_>)
454
454
  });
@@ -4,6 +4,7 @@ use super::RbSeries;
4
4
  use crate::map::series::ApplyLambdaGeneric;
5
5
  use crate::prelude::*;
6
6
  use crate::ruby::gvl::GvlExt;
7
+ use crate::ruby::utils::TryIntoValue;
7
8
  use crate::series::construction::series_from_objects;
8
9
  use crate::{RbPolarsErr, RbResult};
9
10
  use crate::{apply_all_polars_dtypes, raise_err};
@@ -78,7 +79,7 @@ fn call_and_collect_objects<T, I>(
78
79
  skip_nulls: bool,
79
80
  ) -> RbResult<Series>
80
81
  where
81
- T: IntoValue,
82
+ T: TryIntoValue,
82
83
  I: Iterator<Item = Option<T>>,
83
84
  {
84
85
  let mut objects = Vec::with_capacity(len);
@@ -91,7 +92,7 @@ where
91
92
  continue;
92
93
  }
93
94
  None => rb.qnil().into_value_with(rb),
94
- Some(val) => val.into_value_with(rb),
95
+ Some(val) => val.try_into_value_with(rb)?,
95
96
  };
96
97
  let out: Value = lambda.funcall("call", (arg,))?;
97
98
  objects.push(ObjectValue {
@@ -139,23 +139,23 @@ fn scatter_impl(
139
139
  with_match_physical_numeric_polars_type!(dt, |$T| {
140
140
  let ca: &mut ChunkedArray<$T> = mutable_s.as_mut();
141
141
  let values: &ChunkedArray<$T> = values.as_ref().as_ref();
142
- ca.scatter(idx, values)
142
+ ca.scatter(idx, values.iter())
143
143
  })
144
144
  }
145
145
  DataType::Boolean => {
146
146
  let ca: &mut ChunkedArray<BooleanType> = mutable_s.as_mut();
147
147
  let values = values.bool()?;
148
- ca.scatter(idx, values)
148
+ ca.scatter(idx, values.iter())
149
149
  }
150
150
  DataType::Binary => {
151
151
  let ca: &mut ChunkedArray<BinaryType> = mutable_s.as_mut();
152
152
  let values = values.binary()?;
153
- ca.scatter(idx, values)
153
+ ca.scatter(idx, values.iter())
154
154
  }
155
155
  DataType::String => {
156
156
  let ca: &mut ChunkedArray<StringType> = mutable_s.as_mut();
157
157
  let values = values.str()?;
158
- ca.scatter(idx, values)
158
+ ca.scatter(idx, values.iter())
159
159
  }
160
160
  _ => Err(PolarsError::ComputeError(
161
161
  format!("not yet implemented for dtype: {logical_dtype}").into(),
@@ -16,7 +16,6 @@ macro_rules! apply_all_polars_dtypes {
16
16
  ($self:expr, $method:ident, $($args:expr),*) => {
17
17
  match $self.dtype() {
18
18
  DataType::Boolean => $self.bool().unwrap().$method($($args),*),
19
- DataType::String => $self.str().unwrap().$method($($args),*),
20
19
  DataType::UInt8 => $self.u8().unwrap().$method($($args),*),
21
20
  DataType::UInt16 => $self.u16().unwrap().$method($($args),*),
22
21
  DataType::UInt32 => $self.u32().unwrap().$method($($args),*),
@@ -27,15 +26,40 @@ macro_rules! apply_all_polars_dtypes {
27
26
  DataType::Int32 => $self.i32().unwrap().$method($($args),*),
28
27
  DataType::Int64 => $self.i64().unwrap().$method($($args),*),
29
28
  DataType::Int128 => $self.i128().unwrap().$method($($args),*),
30
- DataType::Float16 => todo!(),
29
+ DataType::Float16 => $self.cast(&DataType::Float32).unwrap().f32().unwrap().$method($($args),*),
31
30
  DataType::Float32 => $self.f32().unwrap().$method($($args),*),
32
31
  DataType::Float64 => $self.f64().unwrap().$method($($args),*),
33
- DataType::Date => $self.date().unwrap().physical().$method($($args),*),
34
- DataType::Datetime(_, _) => $self.datetime().unwrap().physical().$method($($args),*),
35
- // TODO implement
36
- // DataType::List(_) => $self.list().unwrap().$method($($args),*),
32
+ DataType::String => $self.str().unwrap().$method($($args),*),
33
+ DataType::Binary => $self.binary().unwrap().$method($($args),*),
34
+ DataType::Decimal(_, _) => $self.decimal().unwrap().$method($($args),*),
35
+
36
+ DataType::Date => $self.date().unwrap().$method($($args),*),
37
+ DataType::Datetime(_, _) => $self.datetime().unwrap().$method($($args),*),
38
+ DataType::Duration(_) => $self.duration().unwrap().$method($($args),*),
39
+ DataType::Time => $self.time().unwrap().$method($($args),*),
40
+
41
+ DataType::List(_) => $self.list().unwrap().$method($($args),*),
37
42
  DataType::Struct(_) => $self.struct_().unwrap().$method($($args),*),
38
- dt => panic!("dtype {:?} not supported", dt)
43
+ DataType::Array(_, _) => $self.array().unwrap().$method($($args),*),
44
+
45
+ dt @ (DataType::Categorical(_, _) | DataType::Enum(_, _)) => match dt.cat_physical().unwrap() {
46
+ CategoricalPhysical::U8 => $self.cat8().unwrap().$method($($args),*),
47
+ CategoricalPhysical::U16 => $self.cat16().unwrap().$method($($args),*),
48
+ CategoricalPhysical::U32 => $self.cat32().unwrap().$method($($args),*),
49
+ },
50
+
51
+ DataType::Object(_) => {
52
+ $self
53
+ .as_any()
54
+ .downcast_ref::<ObjectChunked<ObjectValue>>()
55
+ .unwrap()
56
+ .$method($($args),*)
57
+ },
58
+ DataType::Extension(_, _) => $self.ext().unwrap().$method($($args),*),
59
+
60
+ DataType::Null => $self.null().unwrap().$method($($args),*),
61
+
62
+ dt @ (DataType::BinaryOffset | DataType::Unknown(_)) => panic!("dtype {:?} not supported", dt)
39
63
  }
40
64
  }
41
65
  }
@@ -377,7 +377,9 @@ module Polars
377
377
  # # │ [1, 2] │
378
378
  # # └───────────┘
379
379
  def unique(maintain_order: false)
380
- Utils.wrap_expr(_rbexpr.arr_unique(maintain_order))
380
+ eval(
381
+ F.element.unique(maintain_order: maintain_order), as_list: true
382
+ )
381
383
  end
382
384
 
383
385
  # Count the number of unique values in every sub-arrays.
@@ -403,7 +405,7 @@ module Polars
403
405
  # # │ [2, 3, 4] ┆ 3 │
404
406
  # # └───────────────┴──────────┘
405
407
  def n_unique
406
- Utils.wrap_expr(_rbexpr.arr_n_unique)
408
+ agg(F.element.n_unique)
407
409
  end
408
410
 
409
411
  # Convert an Array column into a List column with the same inner data type.
@@ -432,6 +434,13 @@ module Polars
432
434
 
433
435
  # Evaluate whether any boolean value is true for every subarray.
434
436
  #
437
+ # @param ignore_nulls [Boolean]
438
+ # * If set to `true` (default), null values are ignored. If there
439
+ # are no non-null values, the output is `false`.
440
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
441
+ # if the column contains any null values and no `true` values,
442
+ # the output is null.
443
+ #
435
444
  # @return [Expr]
436
445
  #
437
446
  # @example
@@ -461,12 +470,19 @@ module Polars
461
470
  # # │ [null, null] ┆ false │
462
471
  # # │ null ┆ null │
463
472
  # # └────────────────┴───────┘
464
- def any
465
- Utils.wrap_expr(_rbexpr.arr_any)
473
+ def any(ignore_nulls: true)
474
+ agg(F.element.any(ignore_nulls: ignore_nulls))
466
475
  end
467
476
 
468
477
  # Evaluate whether all boolean values are true for every subarray.
469
478
  #
479
+ # @param ignore_nulls [Boolean]
480
+ # * If set to `true` (default), null values are ignored. If there
481
+ # are no non-null values, the output is `true`.
482
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
483
+ # if the column contains any null values and no `false` values,
484
+ # the output is null.
485
+ #
470
486
  # @return [Expr]
471
487
  #
472
488
  # @example
@@ -496,8 +512,8 @@ module Polars
496
512
  # # │ [null, null] ┆ true │
497
513
  # # │ null ┆ null │
498
514
  # # └────────────────┴───────┘
499
- def all
500
- Utils.wrap_expr(_rbexpr.arr_all)
515
+ def all(ignore_nulls: true)
516
+ agg(F.element.all(ignore_nulls: ignore_nulls))
501
517
  end
502
518
 
503
519
  # Sort the arrays in this column.
@@ -567,7 +583,7 @@ module Polars
567
583
  # # │ [9, 1, 2] ┆ [2, 1, 9] │
568
584
  # # └───────────────┴───────────────┘
569
585
  def reverse
570
- Utils.wrap_expr(_rbexpr.arr_reverse)
586
+ eval(F.element.reverse)
571
587
  end
572
588
 
573
589
  # Retrieve the index of the minimal value in every sub-array.
@@ -212,6 +212,13 @@ module Polars
212
212
 
213
213
  # Evaluate whether any boolean value is true for every subarray.
214
214
  #
215
+ # @param ignore_nulls [Boolean]
216
+ # * If set to `true` (default), null values are ignored. If there
217
+ # are no non-null values, the output is `false`.
218
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
219
+ # if the column contains any null values and no `true` values,
220
+ # the output is null.
221
+ #
215
222
  # @return [Series]
216
223
  #
217
224
  # @example
@@ -230,7 +237,7 @@ module Polars
230
237
  # # false
231
238
  # # null
232
239
  # # ]
233
- def any
240
+ def any(ignore_nulls: true)
234
241
  super
235
242
  end
236
243
 
@@ -378,6 +385,13 @@ module Polars
378
385
 
379
386
  # Evaluate whether all boolean values are true for every subarray.
380
387
  #
388
+ # @param ignore_nulls [Boolean]
389
+ # * If set to `true` (default), null values are ignored. If there
390
+ # are no non-null values, the output is `true`.
391
+ # * If set to `false`, [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to deal with nulls:
392
+ # if the column contains any null values and no `false` values,
393
+ # the output is null.
394
+ #
381
395
  # @return [Series]
382
396
  #
383
397
  # @example
@@ -396,7 +410,7 @@ module Polars
396
410
  # # true
397
411
  # # null
398
412
  # # ]
399
- def all
413
+ def all(ignore_nulls: true)
400
414
  super
401
415
  end
402
416
 
@@ -226,6 +226,38 @@ module Polars
226
226
  super
227
227
  end
228
228
 
229
+ # Get the byte value at the given index.
230
+ #
231
+ # For example, index `0` would return the first byte of every binary value
232
+ # and index `-1` would return the last byte of every binary value.
233
+ # The behavior if an index is out of bounds is determined by the argument
234
+ # `null_on_oob`.
235
+ #
236
+ # @param index [Object]
237
+ # Index to return per binary value
238
+ # @param null_on_oob [Boolean]
239
+ # Behavior if an index is out of bounds:
240
+ #
241
+ # * true -> set as null
242
+ # * false -> raise an error
243
+ #
244
+ # @return [Series]
245
+ #
246
+ # @example
247
+ # s = Polars::Series.new("a", ["\x01\x02\x03".b, "".b, "\x04\x05".b])
248
+ # s.bin.get(0, null_on_oob: true)
249
+ # # =>
250
+ # # shape: (3,)
251
+ # # Series: 'a' [u8]
252
+ # # [
253
+ # # 1
254
+ # # null
255
+ # # 4
256
+ # # ]
257
+ def get(index, null_on_oob: false)
258
+ super
259
+ end
260
+
229
261
  # Take the first `n` bytes of the binary values.
230
262
  #
231
263
  # @param n [Object]
@@ -14,5 +14,9 @@ module Polars
14
14
  yield DataFrame._from_rbdf(rbdf)
15
15
  end
16
16
  end
17
+
18
+ def arrow_c_stream
19
+ @inner.arrow_c_stream
20
+ end
17
21
  end
18
22
  end