polars-df 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +3 -2
  7. data/ext/polars/Cargo.toml +18 -8
  8. data/ext/polars/src/batched_csv.rs +7 -5
  9. data/ext/polars/src/conversion/anyvalue.rs +186 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
  12. data/ext/polars/src/dataframe.rs +108 -66
  13. data/ext/polars/src/expr/array.rs +78 -0
  14. data/ext/polars/src/expr/datetime.rs +29 -58
  15. data/ext/polars/src/expr/general.rs +83 -36
  16. data/ext/polars/src/expr/list.rs +58 -6
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +62 -11
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/file.rs +158 -11
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +120 -50
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/string_cache.rs +14 -0
  26. data/ext/polars/src/functions/whenthen.rs +47 -17
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
  28. data/ext/polars/src/lib.rs +246 -179
  29. data/ext/polars/src/map/dataframe.rs +17 -9
  30. data/ext/polars/src/series/aggregation.rs +20 -0
  31. data/ext/polars/src/series/mod.rs +35 -4
  32. data/lib/polars/array_expr.rb +453 -0
  33. data/lib/polars/array_name_space.rb +346 -0
  34. data/lib/polars/batched_csv_reader.rb +4 -2
  35. data/lib/polars/cat_expr.rb +24 -0
  36. data/lib/polars/cat_name_space.rb +75 -0
  37. data/lib/polars/config.rb +2 -2
  38. data/lib/polars/data_frame.rb +306 -96
  39. data/lib/polars/data_types.rb +191 -28
  40. data/lib/polars/date_time_expr.rb +41 -18
  41. data/lib/polars/date_time_name_space.rb +9 -3
  42. data/lib/polars/exceptions.rb +12 -1
  43. data/lib/polars/expr.rb +898 -215
  44. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  45. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  46. data/lib/polars/functions/as_datatype.rb +248 -0
  47. data/lib/polars/functions/col.rb +47 -0
  48. data/lib/polars/functions/eager.rb +182 -0
  49. data/lib/polars/functions/lazy.rb +1280 -0
  50. data/lib/polars/functions/len.rb +49 -0
  51. data/lib/polars/functions/lit.rb +35 -0
  52. data/lib/polars/functions/random.rb +16 -0
  53. data/lib/polars/functions/range/date_range.rb +103 -0
  54. data/lib/polars/functions/range/int_range.rb +51 -0
  55. data/lib/polars/functions/repeat.rb +144 -0
  56. data/lib/polars/functions/whenthen.rb +96 -0
  57. data/lib/polars/functions.rb +29 -416
  58. data/lib/polars/group_by.rb +2 -2
  59. data/lib/polars/io.rb +36 -31
  60. data/lib/polars/lazy_frame.rb +405 -88
  61. data/lib/polars/list_expr.rb +158 -8
  62. data/lib/polars/list_name_space.rb +102 -0
  63. data/lib/polars/meta_expr.rb +175 -7
  64. data/lib/polars/series.rb +282 -41
  65. data/lib/polars/string_cache.rb +75 -0
  66. data/lib/polars/string_expr.rb +413 -96
  67. data/lib/polars/string_name_space.rb +4 -4
  68. data/lib/polars/testing.rb +507 -0
  69. data/lib/polars/utils.rb +106 -8
  70. data/lib/polars/version.rb +1 -1
  71. data/lib/polars/whenthen.rb +83 -0
  72. data/lib/polars.rb +16 -4
  73. metadata +37 -8
  74. data/lib/polars/lazy_functions.rb +0 -1181
  75. data/lib/polars/when.rb +0 -16
  76. data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,28 @@
1
+ pub(crate) mod anyvalue;
2
+ mod chunked_array;
3
+
1
4
  use std::fmt::{Debug, Display, Formatter};
2
5
  use std::hash::{Hash, Hasher};
6
+ use std::num::NonZeroUsize;
3
7
 
4
- use magnus::encoding::{EncodingCapable, Index};
5
8
  use magnus::{
6
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, Float, Integer, IntoValue,
7
- Module, RArray, RHash, RString, Ruby, Symbol, TryConvert, Value,
9
+ class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
+ Ruby, Symbol, TryConvert, Value,
8
11
  };
9
12
  use polars::chunked_array::object::PolarsObjectSafe;
10
13
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
14
  use polars::datatypes::AnyValue;
12
- use polars::frame::row::{any_values_to_dtype, Row};
15
+ use polars::frame::row::Row;
13
16
  use polars::frame::NullStrategy;
14
17
  use polars::io::avro::AvroCompression;
15
18
  use polars::prelude::*;
16
19
  use polars::series::ops::NullBehavior;
17
- use polars_utils::total_ord::TotalEq;
20
+ use polars_core::utils::arrow::array::Array;
21
+ use polars_utils::total_ord::{TotalEq, TotalHash};
18
22
  use smartstring::alias::String as SmartString;
19
23
 
20
24
  use crate::object::OBJECT_NAME;
21
- use crate::rb_modules::utils;
25
+ use crate::rb_modules::series;
22
26
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
23
27
 
24
28
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
78
82
  Ok(rbs.series.borrow().clone())
79
83
  }
80
84
 
81
- impl TryConvert for Wrap<StringChunked> {
82
- fn try_convert(obj: Value) -> RbResult<Self> {
83
- let (seq, len) = get_rbseq(obj)?;
84
- let mut builder = StringChunkedBuilder::new("", len, len * 25);
85
-
86
- for res in seq.each() {
87
- let item = res?;
88
- match String::try_convert(item) {
89
- Ok(val) => builder.append_value(&val),
90
- Err(_) => builder.append_null(),
91
- }
92
- }
93
- Ok(Wrap(builder.finish()))
94
- }
95
- }
96
-
97
- impl TryConvert for Wrap<BinaryChunked> {
98
- fn try_convert(obj: Value) -> RbResult<Self> {
99
- let (seq, len) = get_rbseq(obj)?;
100
- let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
101
-
102
- for res in seq.each() {
103
- let item = res?;
104
- match RString::try_convert(item) {
105
- Ok(val) => builder.append_value(unsafe { val.as_slice() }),
106
- Err(_) => builder.append_null(),
107
- }
108
- }
109
- Ok(Wrap(builder.finish()))
110
- }
85
+ pub(crate) fn to_series(s: RbSeries) -> Value {
86
+ let series = series();
87
+ series
88
+ .funcall::<_, _, Value>("_from_rbseries", (s,))
89
+ .unwrap()
111
90
  }
112
91
 
113
92
  impl TryConvert for Wrap<NullValues> {
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
134
113
  dict.into_value()
135
114
  }
136
115
 
137
- impl IntoValue for Wrap<AnyValue<'_>> {
138
- fn into_value_with(self, ruby: &Ruby) -> Value {
116
+ impl IntoValue for Wrap<DataType> {
117
+ fn into_value_with(self, _: &Ruby) -> Value {
118
+ let pl = crate::rb_modules::polars();
119
+
139
120
  match self.0 {
140
- AnyValue::UInt8(v) => ruby.into_value(v),
141
- AnyValue::UInt16(v) => ruby.into_value(v),
142
- AnyValue::UInt32(v) => ruby.into_value(v),
143
- AnyValue::UInt64(v) => ruby.into_value(v),
144
- AnyValue::Int8(v) => ruby.into_value(v),
145
- AnyValue::Int16(v) => ruby.into_value(v),
146
- AnyValue::Int32(v) => ruby.into_value(v),
147
- AnyValue::Int64(v) => ruby.into_value(v),
148
- AnyValue::Float32(v) => ruby.into_value(v),
149
- AnyValue::Float64(v) => ruby.into_value(v),
150
- AnyValue::Null => ruby.qnil().as_value(),
151
- AnyValue::Boolean(v) => ruby.into_value(v),
152
- AnyValue::String(v) => ruby.into_value(v),
153
- AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
154
- AnyValue::Categorical(idx, rev, arr) => {
155
- let s = if arr.is_null() {
156
- rev.get(idx)
157
- } else {
158
- unsafe { arr.deref_unchecked().value(idx as usize) }
159
- };
160
- s.into_value()
121
+ DataType::Int8 => {
122
+ let class = pl.const_get::<_, Value>("Int8").unwrap();
123
+ class.funcall("new", ()).unwrap()
161
124
  }
162
- AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
163
- AnyValue::Datetime(v, time_unit, time_zone) => {
164
- let time_unit = time_unit.to_ascii();
165
- utils()
166
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
167
- .unwrap()
125
+ DataType::Int16 => {
126
+ let class = pl.const_get::<_, Value>("Int16").unwrap();
127
+ class.funcall("new", ()).unwrap()
168
128
  }
169
- AnyValue::Duration(v, time_unit) => {
170
- let time_unit = time_unit.to_ascii();
171
- utils()
172
- .funcall("_to_ruby_duration", (v, time_unit))
173
- .unwrap()
129
+ DataType::Int32 => {
130
+ let class = pl.const_get::<_, Value>("Int32").unwrap();
131
+ class.funcall("new", ()).unwrap()
174
132
  }
175
- AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
176
- AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
177
- ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
178
- AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
179
- AnyValue::Object(v) => {
180
- let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
181
- object.to_object()
133
+ DataType::Int64 => {
134
+ let class = pl.const_get::<_, Value>("Int64").unwrap();
135
+ class.funcall("new", ()).unwrap()
182
136
  }
183
- AnyValue::ObjectOwned(v) => {
184
- let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
185
- object.to_object()
137
+ DataType::UInt8 => {
138
+ let class = pl.const_get::<_, Value>("UInt8").unwrap();
139
+ class.funcall("new", ()).unwrap()
140
+ }
141
+ DataType::UInt16 => {
142
+ let class = pl.const_get::<_, Value>("UInt16").unwrap();
143
+ class.funcall("new", ()).unwrap()
144
+ }
145
+ DataType::UInt32 => {
146
+ let class = pl.const_get::<_, Value>("UInt32").unwrap();
147
+ class.funcall("new", ()).unwrap()
148
+ }
149
+ DataType::UInt64 => {
150
+ let class = pl.const_get::<_, Value>("UInt64").unwrap();
151
+ class.funcall("new", ()).unwrap()
152
+ }
153
+ DataType::Float32 => {
154
+ let class = pl.const_get::<_, Value>("Float32").unwrap();
155
+ class.funcall("new", ()).unwrap()
156
+ }
157
+ DataType::Float64 => {
158
+ let class = pl.const_get::<_, Value>("Float64").unwrap();
159
+ class.funcall("new", ()).unwrap()
186
160
  }
187
- AnyValue::Binary(v) => RString::from_slice(v).into_value(),
188
- AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
189
- AnyValue::Decimal(v, scale) => utils()
190
- .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
191
- .unwrap(),
192
- }
193
- }
194
- }
195
-
196
- impl IntoValue for Wrap<DataType> {
197
- fn into_value_with(self, _: &Ruby) -> Value {
198
- let pl = crate::rb_modules::polars();
199
-
200
- match self.0 {
201
- DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
202
- DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
203
- DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
204
- DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
205
- DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
206
- DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
207
- DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
208
- DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
209
- DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
210
- DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
211
161
  DataType::Decimal(precision, scale) => {
212
- let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
213
- decimal_class
162
+ let class = pl.const_get::<_, Value>("Decimal").unwrap();
163
+ class
214
164
  .funcall::<_, _, Value>("new", (precision, scale))
215
165
  .unwrap()
216
166
  }
217
- DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
218
- DataType::String => pl.const_get::<_, Value>("String").unwrap(),
219
- DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
167
+ DataType::Boolean => {
168
+ let class = pl.const_get::<_, Value>("Boolean").unwrap();
169
+ class.funcall("new", ()).unwrap()
170
+ }
171
+ DataType::String => {
172
+ let class = pl.const_get::<_, Value>("String").unwrap();
173
+ class.funcall("new", ()).unwrap()
174
+ }
175
+ DataType::Binary => {
176
+ let class = pl.const_get::<_, Value>("Binary").unwrap();
177
+ class.funcall("new", ()).unwrap()
178
+ }
220
179
  DataType::Array(inner, size) => {
180
+ let class = pl.const_get::<_, Value>("Array").unwrap();
221
181
  let inner = Wrap(*inner);
222
- let list_class = pl.const_get::<_, Value>("Array").unwrap();
223
- list_class
224
- .funcall::<_, _, Value>("new", (size, inner))
225
- .unwrap()
182
+ let args = (inner, size);
183
+ class.funcall::<_, _, Value>("new", args).unwrap()
226
184
  }
227
185
  DataType::List(inner) => {
186
+ let class = pl.const_get::<_, Value>("List").unwrap();
228
187
  let inner = Wrap(*inner);
229
- let list_class = pl.const_get::<_, Value>("List").unwrap();
230
- list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
188
+ class.funcall::<_, _, Value>("new", (inner,)).unwrap()
189
+ }
190
+ DataType::Date => {
191
+ let class = pl.const_get::<_, Value>("Date").unwrap();
192
+ class.funcall("new", ()).unwrap()
231
193
  }
232
- DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
233
194
  DataType::Datetime(tu, tz) => {
234
195
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
235
196
  datetime_class
@@ -242,9 +203,26 @@ impl IntoValue for Wrap<DataType> {
242
203
  .funcall::<_, _, Value>("new", (tu.to_ascii(),))
243
204
  .unwrap()
244
205
  }
245
- DataType::Object(_, _) => pl.const_get::<_, Value>("Object").unwrap(),
246
- DataType::Categorical(_, _) => pl.const_get::<_, Value>("Categorical").unwrap(),
247
- DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
206
+ DataType::Object(_, _) => {
207
+ let class = pl.const_get::<_, Value>("Object").unwrap();
208
+ class.funcall("new", ()).unwrap()
209
+ }
210
+ DataType::Categorical(_, ordering) => {
211
+ let class = pl.const_get::<_, Value>("Categorical").unwrap();
212
+ class.funcall("new", (Wrap(ordering),)).unwrap()
213
+ }
214
+ DataType::Enum(rev_map, _) => {
215
+ // we should always have an initialized rev_map coming from rust
216
+ let categories = rev_map.as_ref().unwrap().get_categories();
217
+ let class = pl.const_get::<_, Value>("Enum").unwrap();
218
+ let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
219
+ let series = to_series(s.into());
220
+ class.funcall::<_, _, Value>("new", (series,)).unwrap()
221
+ }
222
+ DataType::Time => {
223
+ let class = pl.const_get::<_, Value>("Time").unwrap();
224
+ class.funcall("new", ()).unwrap()
225
+ }
248
226
  DataType::Struct(fields) => {
249
227
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
250
228
  let iter = fields.iter().map(|fld| {
@@ -260,12 +238,31 @@ impl IntoValue for Wrap<DataType> {
260
238
  .funcall::<_, _, Value>("new", (fields,))
261
239
  .unwrap()
262
240
  }
263
- DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
264
- DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
241
+ DataType::Null => {
242
+ let class = pl.const_get::<_, Value>("Null").unwrap();
243
+ class.funcall("new", ()).unwrap()
244
+ }
245
+ DataType::Unknown => {
246
+ let class = pl.const_get::<_, Value>("Unknown").unwrap();
247
+ class.funcall("new", ()).unwrap()
248
+ }
249
+ DataType::BinaryOffset => {
250
+ unimplemented!()
251
+ }
265
252
  }
266
253
  }
267
254
  }
268
255
 
256
+ impl IntoValue for Wrap<CategoricalOrdering> {
257
+ fn into_value_with(self, _: &Ruby) -> Value {
258
+ let ordering = match self.0 {
259
+ CategoricalOrdering::Physical => "physical",
260
+ CategoricalOrdering::Lexical => "lexical",
261
+ };
262
+ ordering.into_value()
263
+ }
264
+ }
265
+
269
266
  impl IntoValue for Wrap<TimeUnit> {
270
267
  fn into_value_with(self, _: &Ruby) -> Value {
271
268
  let tu = match self.0 {
@@ -277,114 +274,6 @@ impl IntoValue for Wrap<TimeUnit> {
277
274
  }
278
275
  }
279
276
 
280
- impl IntoValue for Wrap<&StringChunked> {
281
- fn into_value_with(self, _: &Ruby) -> Value {
282
- let iter = self.0.into_iter();
283
- RArray::from_iter(iter).into_value()
284
- }
285
- }
286
-
287
- impl IntoValue for Wrap<&BinaryChunked> {
288
- fn into_value_with(self, _: &Ruby) -> Value {
289
- let iter = self
290
- .0
291
- .into_iter()
292
- .map(|opt_bytes| opt_bytes.map(RString::from_slice));
293
- RArray::from_iter(iter).into_value()
294
- }
295
- }
296
-
297
- impl IntoValue for Wrap<&StructChunked> {
298
- fn into_value_with(self, _: &Ruby) -> Value {
299
- let s = self.0.clone().into_series();
300
- // todo! iterate its chunks and flatten.
301
- // make series::iter() accept a chunk index.
302
- let s = s.rechunk();
303
- let iter = s.iter().map(|av| {
304
- if let AnyValue::Struct(_, _, flds) = av {
305
- struct_dict(av._iter_struct_av(), flds)
306
- } else {
307
- unreachable!()
308
- }
309
- });
310
-
311
- RArray::from_iter(iter).into_value()
312
- }
313
- }
314
-
315
- impl IntoValue for Wrap<&DurationChunked> {
316
- fn into_value_with(self, _: &Ruby) -> Value {
317
- let utils = utils();
318
- let time_unit = Wrap(self.0.time_unit()).into_value();
319
- let iter = self.0.into_iter().map(|opt_v| {
320
- opt_v.map(|v| {
321
- utils
322
- .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
323
- .unwrap()
324
- })
325
- });
326
- RArray::from_iter(iter).into_value()
327
- }
328
- }
329
-
330
- impl IntoValue for Wrap<&DatetimeChunked> {
331
- fn into_value_with(self, _: &Ruby) -> Value {
332
- let utils = utils();
333
- let time_unit = Wrap(self.0.time_unit()).into_value();
334
- let time_zone = self.0.time_zone().clone().into_value();
335
- let iter = self.0.into_iter().map(|opt_v| {
336
- opt_v.map(|v| {
337
- utils
338
- .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
339
- .unwrap()
340
- })
341
- });
342
- RArray::from_iter(iter).into_value()
343
- }
344
- }
345
-
346
- impl IntoValue for Wrap<&TimeChunked> {
347
- fn into_value_with(self, _: &Ruby) -> Value {
348
- let utils = utils();
349
- let iter = self.0.into_iter().map(|opt_v| {
350
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
351
- });
352
- RArray::from_iter(iter).into_value()
353
- }
354
- }
355
-
356
- impl IntoValue for Wrap<&DateChunked> {
357
- fn into_value_with(self, _: &Ruby) -> Value {
358
- let utils = utils();
359
- let iter = self.0.into_iter().map(|opt_v| {
360
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
361
- });
362
- RArray::from_iter(iter).into_value()
363
- }
364
- }
365
-
366
- impl IntoValue for Wrap<&DecimalChunked> {
367
- fn into_value_with(self, _: &Ruby) -> Value {
368
- let utils = utils();
369
- let rb_scale = (-(self.0.scale() as i32)).into_value();
370
- let iter = self.0.into_iter().map(|opt_v| {
371
- opt_v.map(|v| {
372
- utils
373
- .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
374
- .unwrap()
375
- })
376
- });
377
- RArray::from_iter(iter).into_value()
378
- }
379
- }
380
-
381
- fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
382
- match digits.parse::<i128>() {
383
- Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
384
- Err(_) => None,
385
- }
386
- }
387
-
388
277
  impl TryConvert for Wrap<Field> {
389
278
  fn try_convert(ob: Value) -> RbResult<Self> {
390
279
  let name: String = ob.funcall("name", ())?;
@@ -410,6 +299,7 @@ impl TryConvert for Wrap<DataType> {
410
299
  "Polars::Binary" => DataType::Binary,
411
300
  "Polars::Boolean" => DataType::Boolean,
412
301
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
302
+ "Polars::Enum" => DataType::Enum(None, Default::default()),
413
303
  "Polars::Date" => DataType::Date,
414
304
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
415
305
  "Polars::Time" => DataType::Time,
@@ -431,6 +321,36 @@ impl TryConvert for Wrap<DataType> {
431
321
  } else if String::try_convert(ob).is_err() {
432
322
  let name = unsafe { ob.class().name() }.into_owned();
433
323
  match name.as_str() {
324
+ "Polars::Int8" => DataType::Int8,
325
+ "Polars::Int16" => DataType::Int16,
326
+ "Polars::Int32" => DataType::Int32,
327
+ "Polars::Int64" => DataType::Int64,
328
+ "Polars::UInt8" => DataType::UInt8,
329
+ "Polars::UInt16" => DataType::UInt16,
330
+ "Polars::UInt32" => DataType::UInt32,
331
+ "Polars::UInt64" => DataType::UInt64,
332
+ "Polars::String" => DataType::String,
333
+ "Polars::Binary" => DataType::Binary,
334
+ "Polars::Boolean" => DataType::Boolean,
335
+ "Polars::Categorical" => {
336
+ let ordering = ob
337
+ .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
338
+ .0;
339
+ DataType::Categorical(None, ordering)
340
+ }
341
+ "Polars::Enum" => {
342
+ let categories = ob.funcall("categories", ()).unwrap();
343
+ let s = get_series(categories)?;
344
+ let ca = s.str().map_err(RbPolarsErr::from)?;
345
+ let categories = ca.downcast_iter().next().unwrap().clone();
346
+ create_enum_data_type(categories)
347
+ }
348
+ "Polars::Date" => DataType::Date,
349
+ "Polars::Time" => DataType::Time,
350
+ "Polars::Float32" => DataType::Float32,
351
+ "Polars::Float64" => DataType::Float64,
352
+ "Polars::Null" => DataType::Null,
353
+ "Polars::Unknown" => DataType::Unknown,
434
354
  "Polars::Duration" => {
435
355
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
436
356
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
@@ -503,102 +423,6 @@ impl TryConvert for Wrap<DataType> {
503
423
  }
504
424
  }
505
425
 
506
- impl<'s> TryConvert for Wrap<AnyValue<'s>> {
507
- fn try_convert(ob: Value) -> RbResult<Self> {
508
- if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
509
- Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
510
- } else if let Some(v) = Integer::from_value(ob) {
511
- Ok(AnyValue::Int64(v.to_i64()?).into())
512
- } else if let Some(v) = Float::from_value(ob) {
513
- Ok(AnyValue::Float64(v.to_f64()).into())
514
- } else if let Some(v) = RString::from_value(ob) {
515
- if v.enc_get() == Index::utf8() {
516
- Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
517
- } else {
518
- Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
519
- }
520
- // call is_a? for ActiveSupport::TimeWithZone
521
- } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
522
- let sec = ob.funcall::<_, _, i64>("to_i", ())?;
523
- let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
524
- let v = sec * 1_000_000_000 + nsec;
525
- // TODO support time zone when possible
526
- // https://github.com/pola-rs/polars/issues/9103
527
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
528
- } else if ob.is_nil() {
529
- Ok(AnyValue::Null.into())
530
- } else if let Some(dict) = RHash::from_value(ob) {
531
- let len = dict.len();
532
- let mut keys = Vec::with_capacity(len);
533
- let mut vals = Vec::with_capacity(len);
534
- dict.foreach(|k: Value, v: Value| {
535
- let key = String::try_convert(k)?;
536
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
537
- let dtype = DataType::from(&val);
538
- keys.push(Field::new(&key, dtype));
539
- vals.push(val);
540
- Ok(ForEach::Continue)
541
- })?;
542
- Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
543
- } else if let Some(v) = RArray::from_value(ob) {
544
- if v.is_empty() {
545
- Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
546
- } else {
547
- let list = v;
548
-
549
- let mut avs = Vec::with_capacity(25);
550
- let mut iter = list.each();
551
-
552
- for item in (&mut iter).take(25) {
553
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
554
- }
555
-
556
- let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
-
558
- // push the rest
559
- avs.reserve(list.len());
560
- for item in iter {
561
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
562
- }
563
-
564
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
565
- .map_err(RbPolarsErr::from)?;
566
- Ok(Wrap(AnyValue::List(s)))
567
- }
568
- } else if ob.is_kind_of(crate::rb_modules::datetime()) {
569
- let sec: i64 = ob.funcall("to_i", ())?;
570
- let nsec: i64 = ob.funcall("nsec", ())?;
571
- Ok(Wrap(AnyValue::Datetime(
572
- sec * 1_000_000_000 + nsec,
573
- TimeUnit::Nanoseconds,
574
- &None,
575
- )))
576
- } else if ob.is_kind_of(crate::rb_modules::date()) {
577
- // convert to DateTime for UTC
578
- let v = ob
579
- .funcall::<_, _, Value>("to_datetime", ())?
580
- .funcall::<_, _, Value>("to_time", ())?
581
- .funcall::<_, _, i64>("to_i", ())?;
582
- Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
- } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
- let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
585
- let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
586
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
587
- })?;
588
- if sign < 0 {
589
- // TODO better error
590
- v = v.checked_neg().unwrap();
591
- }
592
- Ok(Wrap(AnyValue::Decimal(v, scale)))
593
- } else {
594
- Err(RbPolarsErr::other(format!(
595
- "object type not supported {:?}",
596
- ob
597
- )))
598
- }
599
- }
600
- }
601
-
602
426
  impl<'s> TryConvert for Wrap<Row<'s>> {
603
427
  fn try_convert(ob: Value) -> RbResult<Self> {
604
428
  let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
@@ -662,6 +486,15 @@ impl TotalEq for ObjectValue {
662
486
  }
663
487
  }
664
488
 
489
+ impl TotalHash for ObjectValue {
490
+ fn tot_hash<H>(&self, state: &mut H)
491
+ where
492
+ H: Hasher,
493
+ {
494
+ self.hash(state);
495
+ }
496
+ }
497
+
665
498
  impl Display for ObjectValue {
666
499
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
667
500
  write!(f, "{}", self.to_object())
@@ -715,24 +548,33 @@ impl Default for ObjectValue {
715
548
 
716
549
  pub(crate) fn dicts_to_rows(
717
550
  records: &Value,
718
- infer_schema_len: usize,
551
+ infer_schema_len: Option<usize>,
552
+ schema_columns: PlIndexSet<String>,
719
553
  ) -> RbResult<(Vec<Row>, Vec<String>)> {
554
+ let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
720
555
  let (dicts, len) = get_rbseq(*records)?;
721
556
 
722
- let mut key_names = PlIndexSet::new();
723
- for d in dicts.each().take(infer_schema_len) {
724
- let d = d?;
725
- let d = RHash::try_convert(d)?;
726
-
727
- d.foreach(|name: Value, _value: Value| {
728
- if let Some(v) = Symbol::from_value(name) {
729
- key_names.insert(v.name()?.into());
730
- } else {
731
- key_names.insert(String::try_convert(name)?);
732
- };
733
- Ok(ForEach::Continue)
734
- })?;
735
- }
557
+ let key_names = {
558
+ if !schema_columns.is_empty() {
559
+ schema_columns
560
+ } else {
561
+ let mut inferred_keys = PlIndexSet::new();
562
+ for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
563
+ let d = d?;
564
+ let d = RHash::try_convert(d)?;
565
+
566
+ d.foreach(|name: Value, _value: Value| {
567
+ if let Some(v) = Symbol::from_value(name) {
568
+ inferred_keys.insert(v.name()?.into());
569
+ } else {
570
+ inferred_keys.insert(String::try_convert(name)?);
571
+ };
572
+ Ok(ForEach::Continue)
573
+ })?;
574
+ }
575
+ inferred_keys
576
+ }
577
+ };
736
578
 
737
579
  let mut rows = Vec::with_capacity(len);
738
580
 
@@ -895,8 +737,7 @@ impl TryConvert for Wrap<JoinType> {
895
737
  "outer_coalesce" => JoinType::Outer { coalesce: true },
896
738
  "semi" => JoinType::Semi,
897
739
  "anti" => JoinType::Anti,
898
- // #[cfg(feature = "cross_join")]
899
- // "cross" => JoinType::Cross,
740
+ "cross" => JoinType::Cross,
900
741
  v => {
901
742
  return Err(RbValueError::new_err(format!(
902
743
  "how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
@@ -940,6 +781,21 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
940
781
  }
941
782
  }
942
783
 
784
+ impl TryConvert for Wrap<NonExistent> {
785
+ fn try_convert(ob: Value) -> RbResult<Self> {
786
+ let parsed = match String::try_convert(ob)?.as_str() {
787
+ "null" => NonExistent::Null,
788
+ "raise" => NonExistent::Raise,
789
+ v => {
790
+ return Err(RbValueError::new_err(format!(
791
+ "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
792
+ )))
793
+ }
794
+ };
795
+ Ok(Wrap(parsed))
796
+ }
797
+ }
798
+
943
799
  impl TryConvert for Wrap<NullBehavior> {
944
800
  fn try_convert(ob: Value) -> RbResult<Self> {
945
801
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1062,6 +918,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1062
918
  }
1063
919
  }
1064
920
 
921
+ impl TryConvert for Wrap<IpcCompression> {
922
+ fn try_convert(ob: Value) -> RbResult<Self> {
923
+ let parsed = match String::try_convert(ob)?.as_str() {
924
+ "lz4" => IpcCompression::LZ4,
925
+ "zstd" => IpcCompression::ZSTD,
926
+ v => {
927
+ return Err(RbValueError::new_err(format!(
928
+ "compression must be one of {{'lz4', 'zstd'}}, got {}",
929
+ v
930
+ )))
931
+ }
932
+ };
933
+ Ok(Wrap(parsed))
934
+ }
935
+ }
936
+
1065
937
  impl TryConvert for Wrap<SearchSortedSide> {
1066
938
  fn try_convert(ob: Value) -> RbResult<Self> {
1067
939
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1078,6 +950,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
1078
950
  }
1079
951
  }
1080
952
 
953
+ impl TryConvert for Wrap<WindowMapping> {
954
+ fn try_convert(ob: Value) -> RbResult<Self> {
955
+ let parsed = match String::try_convert(ob)?.as_str() {
956
+ "group_to_rows" => WindowMapping::GroupsToRows,
957
+ "join" => WindowMapping::Join,
958
+ "explode" => WindowMapping::Explode,
959
+ v => {
960
+ return Err(RbValueError::new_err(format!(
961
+ "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
962
+ )))
963
+ }
964
+ };
965
+ Ok(Wrap(parsed))
966
+ }
967
+ }
968
+
969
+ impl TryConvert for Wrap<JoinValidation> {
970
+ fn try_convert(ob: Value) -> RbResult<Self> {
971
+ let parsed = match String::try_convert(ob)?.as_str() {
972
+ "1:1" => JoinValidation::OneToOne,
973
+ "1:m" => JoinValidation::OneToMany,
974
+ "m:m" => JoinValidation::ManyToMany,
975
+ "m:1" => JoinValidation::ManyToOne,
976
+ v => {
977
+ return Err(RbValueError::new_err(format!(
978
+ "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
979
+ )))
980
+ }
981
+ };
982
+ Ok(Wrap(parsed))
983
+ }
984
+ }
985
+
986
+ impl TryConvert for Wrap<QuoteStyle> {
987
+ fn try_convert(ob: Value) -> RbResult<Self> {
988
+ let parsed = match String::try_convert(ob)?.as_str() {
989
+ "always" => QuoteStyle::Always,
990
+ "necessary" => QuoteStyle::Necessary,
991
+ "non_numeric" => QuoteStyle::NonNumeric,
992
+ "never" => QuoteStyle::Never,
993
+ v => {
994
+ return Err(RbValueError::new_err(format!(
995
+ "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
996
+ )))
997
+ },
998
+ };
999
+ Ok(Wrap(parsed))
1000
+ }
1001
+ }
1002
+
1081
1003
  pub fn parse_fill_null_strategy(
1082
1004
  strategy: &str,
1083
1005
  limit: FillNullLimit,
@@ -1150,3 +1072,12 @@ where
1150
1072
  {
1151
1073
  container.into_iter().map(|s| s.as_ref().into()).collect()
1152
1074
  }
1075
+
1076
+ impl TryConvert for Wrap<NonZeroUsize> {
1077
+ fn try_convert(ob: Value) -> RbResult<Self> {
1078
+ let v = usize::try_convert(ob)?;
1079
+ NonZeroUsize::new(v)
1080
+ .map(Wrap)
1081
+ .ok_or(RbValueError::new_err("must be non-zero".into()))
1082
+ }
1083
+ }