polars-df 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +42 -1
  3. data/Cargo.lock +159 -66
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +3 -2
  7. data/ext/polars/Cargo.toml +18 -8
  8. data/ext/polars/src/batched_csv.rs +7 -5
  9. data/ext/polars/src/conversion/anyvalue.rs +186 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +273 -342
  12. data/ext/polars/src/dataframe.rs +108 -66
  13. data/ext/polars/src/expr/array.rs +78 -0
  14. data/ext/polars/src/expr/datetime.rs +29 -58
  15. data/ext/polars/src/expr/general.rs +83 -36
  16. data/ext/polars/src/expr/list.rs +58 -6
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +62 -11
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/file.rs +158 -11
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +120 -50
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/string_cache.rs +14 -0
  26. data/ext/polars/src/functions/whenthen.rs +47 -17
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +195 -40
  28. data/ext/polars/src/lib.rs +246 -179
  29. data/ext/polars/src/map/dataframe.rs +17 -9
  30. data/ext/polars/src/series/aggregation.rs +20 -0
  31. data/ext/polars/src/series/mod.rs +35 -4
  32. data/lib/polars/array_expr.rb +453 -0
  33. data/lib/polars/array_name_space.rb +346 -0
  34. data/lib/polars/batched_csv_reader.rb +4 -2
  35. data/lib/polars/cat_expr.rb +24 -0
  36. data/lib/polars/cat_name_space.rb +75 -0
  37. data/lib/polars/config.rb +2 -2
  38. data/lib/polars/data_frame.rb +306 -96
  39. data/lib/polars/data_types.rb +191 -28
  40. data/lib/polars/date_time_expr.rb +41 -18
  41. data/lib/polars/date_time_name_space.rb +9 -3
  42. data/lib/polars/exceptions.rb +12 -1
  43. data/lib/polars/expr.rb +898 -215
  44. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  45. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  46. data/lib/polars/functions/as_datatype.rb +248 -0
  47. data/lib/polars/functions/col.rb +47 -0
  48. data/lib/polars/functions/eager.rb +182 -0
  49. data/lib/polars/functions/lazy.rb +1280 -0
  50. data/lib/polars/functions/len.rb +49 -0
  51. data/lib/polars/functions/lit.rb +35 -0
  52. data/lib/polars/functions/random.rb +16 -0
  53. data/lib/polars/functions/range/date_range.rb +103 -0
  54. data/lib/polars/functions/range/int_range.rb +51 -0
  55. data/lib/polars/functions/repeat.rb +144 -0
  56. data/lib/polars/functions/whenthen.rb +96 -0
  57. data/lib/polars/functions.rb +29 -416
  58. data/lib/polars/group_by.rb +2 -2
  59. data/lib/polars/io.rb +36 -31
  60. data/lib/polars/lazy_frame.rb +405 -88
  61. data/lib/polars/list_expr.rb +158 -8
  62. data/lib/polars/list_name_space.rb +102 -0
  63. data/lib/polars/meta_expr.rb +175 -7
  64. data/lib/polars/series.rb +282 -41
  65. data/lib/polars/string_cache.rb +75 -0
  66. data/lib/polars/string_expr.rb +413 -96
  67. data/lib/polars/string_name_space.rb +4 -4
  68. data/lib/polars/testing.rb +507 -0
  69. data/lib/polars/utils.rb +106 -8
  70. data/lib/polars/version.rb +1 -1
  71. data/lib/polars/whenthen.rb +83 -0
  72. data/lib/polars.rb +16 -4
  73. metadata +37 -8
  74. data/lib/polars/lazy_functions.rb +0 -1181
  75. data/lib/polars/when.rb +0 -16
  76. data/lib/polars/when_then.rb +0 -19
@@ -1,24 +1,28 @@
1
+ pub(crate) mod anyvalue;
2
+ mod chunked_array;
3
+
1
4
  use std::fmt::{Debug, Display, Formatter};
2
5
  use std::hash::{Hash, Hasher};
6
+ use std::num::NonZeroUsize;
3
7
 
4
- use magnus::encoding::{EncodingCapable, Index};
5
8
  use magnus::{
6
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, Float, Integer, IntoValue,
7
- Module, RArray, RHash, RString, Ruby, Symbol, TryConvert, Value,
9
+ class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
+ Ruby, Symbol, TryConvert, Value,
8
11
  };
9
12
  use polars::chunked_array::object::PolarsObjectSafe;
10
13
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
14
  use polars::datatypes::AnyValue;
12
- use polars::frame::row::{any_values_to_dtype, Row};
15
+ use polars::frame::row::Row;
13
16
  use polars::frame::NullStrategy;
14
17
  use polars::io::avro::AvroCompression;
15
18
  use polars::prelude::*;
16
19
  use polars::series::ops::NullBehavior;
17
- use polars_utils::total_ord::TotalEq;
20
+ use polars_core::utils::arrow::array::Array;
21
+ use polars_utils::total_ord::{TotalEq, TotalHash};
18
22
  use smartstring::alias::String as SmartString;
19
23
 
20
24
  use crate::object::OBJECT_NAME;
21
- use crate::rb_modules::utils;
25
+ use crate::rb_modules::series;
22
26
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
23
27
 
24
28
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
78
82
  Ok(rbs.series.borrow().clone())
79
83
  }
80
84
 
81
- impl TryConvert for Wrap<StringChunked> {
82
- fn try_convert(obj: Value) -> RbResult<Self> {
83
- let (seq, len) = get_rbseq(obj)?;
84
- let mut builder = StringChunkedBuilder::new("", len, len * 25);
85
-
86
- for res in seq.each() {
87
- let item = res?;
88
- match String::try_convert(item) {
89
- Ok(val) => builder.append_value(&val),
90
- Err(_) => builder.append_null(),
91
- }
92
- }
93
- Ok(Wrap(builder.finish()))
94
- }
95
- }
96
-
97
- impl TryConvert for Wrap<BinaryChunked> {
98
- fn try_convert(obj: Value) -> RbResult<Self> {
99
- let (seq, len) = get_rbseq(obj)?;
100
- let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
101
-
102
- for res in seq.each() {
103
- let item = res?;
104
- match RString::try_convert(item) {
105
- Ok(val) => builder.append_value(unsafe { val.as_slice() }),
106
- Err(_) => builder.append_null(),
107
- }
108
- }
109
- Ok(Wrap(builder.finish()))
110
- }
85
+ pub(crate) fn to_series(s: RbSeries) -> Value {
86
+ let series = series();
87
+ series
88
+ .funcall::<_, _, Value>("_from_rbseries", (s,))
89
+ .unwrap()
111
90
  }
112
91
 
113
92
  impl TryConvert for Wrap<NullValues> {
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
134
113
  dict.into_value()
135
114
  }
136
115
 
137
- impl IntoValue for Wrap<AnyValue<'_>> {
138
- fn into_value_with(self, ruby: &Ruby) -> Value {
116
+ impl IntoValue for Wrap<DataType> {
117
+ fn into_value_with(self, _: &Ruby) -> Value {
118
+ let pl = crate::rb_modules::polars();
119
+
139
120
  match self.0 {
140
- AnyValue::UInt8(v) => ruby.into_value(v),
141
- AnyValue::UInt16(v) => ruby.into_value(v),
142
- AnyValue::UInt32(v) => ruby.into_value(v),
143
- AnyValue::UInt64(v) => ruby.into_value(v),
144
- AnyValue::Int8(v) => ruby.into_value(v),
145
- AnyValue::Int16(v) => ruby.into_value(v),
146
- AnyValue::Int32(v) => ruby.into_value(v),
147
- AnyValue::Int64(v) => ruby.into_value(v),
148
- AnyValue::Float32(v) => ruby.into_value(v),
149
- AnyValue::Float64(v) => ruby.into_value(v),
150
- AnyValue::Null => ruby.qnil().as_value(),
151
- AnyValue::Boolean(v) => ruby.into_value(v),
152
- AnyValue::String(v) => ruby.into_value(v),
153
- AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
154
- AnyValue::Categorical(idx, rev, arr) => {
155
- let s = if arr.is_null() {
156
- rev.get(idx)
157
- } else {
158
- unsafe { arr.deref_unchecked().value(idx as usize) }
159
- };
160
- s.into_value()
121
+ DataType::Int8 => {
122
+ let class = pl.const_get::<_, Value>("Int8").unwrap();
123
+ class.funcall("new", ()).unwrap()
161
124
  }
162
- AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
163
- AnyValue::Datetime(v, time_unit, time_zone) => {
164
- let time_unit = time_unit.to_ascii();
165
- utils()
166
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
167
- .unwrap()
125
+ DataType::Int16 => {
126
+ let class = pl.const_get::<_, Value>("Int16").unwrap();
127
+ class.funcall("new", ()).unwrap()
168
128
  }
169
- AnyValue::Duration(v, time_unit) => {
170
- let time_unit = time_unit.to_ascii();
171
- utils()
172
- .funcall("_to_ruby_duration", (v, time_unit))
173
- .unwrap()
129
+ DataType::Int32 => {
130
+ let class = pl.const_get::<_, Value>("Int32").unwrap();
131
+ class.funcall("new", ()).unwrap()
174
132
  }
175
- AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
176
- AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
177
- ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
178
- AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
179
- AnyValue::Object(v) => {
180
- let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
181
- object.to_object()
133
+ DataType::Int64 => {
134
+ let class = pl.const_get::<_, Value>("Int64").unwrap();
135
+ class.funcall("new", ()).unwrap()
182
136
  }
183
- AnyValue::ObjectOwned(v) => {
184
- let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
185
- object.to_object()
137
+ DataType::UInt8 => {
138
+ let class = pl.const_get::<_, Value>("UInt8").unwrap();
139
+ class.funcall("new", ()).unwrap()
140
+ }
141
+ DataType::UInt16 => {
142
+ let class = pl.const_get::<_, Value>("UInt16").unwrap();
143
+ class.funcall("new", ()).unwrap()
144
+ }
145
+ DataType::UInt32 => {
146
+ let class = pl.const_get::<_, Value>("UInt32").unwrap();
147
+ class.funcall("new", ()).unwrap()
148
+ }
149
+ DataType::UInt64 => {
150
+ let class = pl.const_get::<_, Value>("UInt64").unwrap();
151
+ class.funcall("new", ()).unwrap()
152
+ }
153
+ DataType::Float32 => {
154
+ let class = pl.const_get::<_, Value>("Float32").unwrap();
155
+ class.funcall("new", ()).unwrap()
156
+ }
157
+ DataType::Float64 => {
158
+ let class = pl.const_get::<_, Value>("Float64").unwrap();
159
+ class.funcall("new", ()).unwrap()
186
160
  }
187
- AnyValue::Binary(v) => RString::from_slice(v).into_value(),
188
- AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
189
- AnyValue::Decimal(v, scale) => utils()
190
- .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
191
- .unwrap(),
192
- }
193
- }
194
- }
195
-
196
- impl IntoValue for Wrap<DataType> {
197
- fn into_value_with(self, _: &Ruby) -> Value {
198
- let pl = crate::rb_modules::polars();
199
-
200
- match self.0 {
201
- DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
202
- DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
203
- DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
204
- DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
205
- DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
206
- DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
207
- DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
208
- DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
209
- DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
210
- DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
211
161
  DataType::Decimal(precision, scale) => {
212
- let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
213
- decimal_class
162
+ let class = pl.const_get::<_, Value>("Decimal").unwrap();
163
+ class
214
164
  .funcall::<_, _, Value>("new", (precision, scale))
215
165
  .unwrap()
216
166
  }
217
- DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
218
- DataType::String => pl.const_get::<_, Value>("String").unwrap(),
219
- DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
167
+ DataType::Boolean => {
168
+ let class = pl.const_get::<_, Value>("Boolean").unwrap();
169
+ class.funcall("new", ()).unwrap()
170
+ }
171
+ DataType::String => {
172
+ let class = pl.const_get::<_, Value>("String").unwrap();
173
+ class.funcall("new", ()).unwrap()
174
+ }
175
+ DataType::Binary => {
176
+ let class = pl.const_get::<_, Value>("Binary").unwrap();
177
+ class.funcall("new", ()).unwrap()
178
+ }
220
179
  DataType::Array(inner, size) => {
180
+ let class = pl.const_get::<_, Value>("Array").unwrap();
221
181
  let inner = Wrap(*inner);
222
- let list_class = pl.const_get::<_, Value>("Array").unwrap();
223
- list_class
224
- .funcall::<_, _, Value>("new", (size, inner))
225
- .unwrap()
182
+ let args = (inner, size);
183
+ class.funcall::<_, _, Value>("new", args).unwrap()
226
184
  }
227
185
  DataType::List(inner) => {
186
+ let class = pl.const_get::<_, Value>("List").unwrap();
228
187
  let inner = Wrap(*inner);
229
- let list_class = pl.const_get::<_, Value>("List").unwrap();
230
- list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
188
+ class.funcall::<_, _, Value>("new", (inner,)).unwrap()
189
+ }
190
+ DataType::Date => {
191
+ let class = pl.const_get::<_, Value>("Date").unwrap();
192
+ class.funcall("new", ()).unwrap()
231
193
  }
232
- DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
233
194
  DataType::Datetime(tu, tz) => {
234
195
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
235
196
  datetime_class
@@ -242,9 +203,26 @@ impl IntoValue for Wrap<DataType> {
242
203
  .funcall::<_, _, Value>("new", (tu.to_ascii(),))
243
204
  .unwrap()
244
205
  }
245
- DataType::Object(_, _) => pl.const_get::<_, Value>("Object").unwrap(),
246
- DataType::Categorical(_, _) => pl.const_get::<_, Value>("Categorical").unwrap(),
247
- DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
206
+ DataType::Object(_, _) => {
207
+ let class = pl.const_get::<_, Value>("Object").unwrap();
208
+ class.funcall("new", ()).unwrap()
209
+ }
210
+ DataType::Categorical(_, ordering) => {
211
+ let class = pl.const_get::<_, Value>("Categorical").unwrap();
212
+ class.funcall("new", (Wrap(ordering),)).unwrap()
213
+ }
214
+ DataType::Enum(rev_map, _) => {
215
+ // we should always have an initialized rev_map coming from rust
216
+ let categories = rev_map.as_ref().unwrap().get_categories();
217
+ let class = pl.const_get::<_, Value>("Enum").unwrap();
218
+ let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
219
+ let series = to_series(s.into());
220
+ class.funcall::<_, _, Value>("new", (series,)).unwrap()
221
+ }
222
+ DataType::Time => {
223
+ let class = pl.const_get::<_, Value>("Time").unwrap();
224
+ class.funcall("new", ()).unwrap()
225
+ }
248
226
  DataType::Struct(fields) => {
249
227
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
250
228
  let iter = fields.iter().map(|fld| {
@@ -260,12 +238,31 @@ impl IntoValue for Wrap<DataType> {
260
238
  .funcall::<_, _, Value>("new", (fields,))
261
239
  .unwrap()
262
240
  }
263
- DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
264
- DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
241
+ DataType::Null => {
242
+ let class = pl.const_get::<_, Value>("Null").unwrap();
243
+ class.funcall("new", ()).unwrap()
244
+ }
245
+ DataType::Unknown => {
246
+ let class = pl.const_get::<_, Value>("Unknown").unwrap();
247
+ class.funcall("new", ()).unwrap()
248
+ }
249
+ DataType::BinaryOffset => {
250
+ unimplemented!()
251
+ }
265
252
  }
266
253
  }
267
254
  }
268
255
 
256
+ impl IntoValue for Wrap<CategoricalOrdering> {
257
+ fn into_value_with(self, _: &Ruby) -> Value {
258
+ let ordering = match self.0 {
259
+ CategoricalOrdering::Physical => "physical",
260
+ CategoricalOrdering::Lexical => "lexical",
261
+ };
262
+ ordering.into_value()
263
+ }
264
+ }
265
+
269
266
  impl IntoValue for Wrap<TimeUnit> {
270
267
  fn into_value_with(self, _: &Ruby) -> Value {
271
268
  let tu = match self.0 {
@@ -277,114 +274,6 @@ impl IntoValue for Wrap<TimeUnit> {
277
274
  }
278
275
  }
279
276
 
280
- impl IntoValue for Wrap<&StringChunked> {
281
- fn into_value_with(self, _: &Ruby) -> Value {
282
- let iter = self.0.into_iter();
283
- RArray::from_iter(iter).into_value()
284
- }
285
- }
286
-
287
- impl IntoValue for Wrap<&BinaryChunked> {
288
- fn into_value_with(self, _: &Ruby) -> Value {
289
- let iter = self
290
- .0
291
- .into_iter()
292
- .map(|opt_bytes| opt_bytes.map(RString::from_slice));
293
- RArray::from_iter(iter).into_value()
294
- }
295
- }
296
-
297
- impl IntoValue for Wrap<&StructChunked> {
298
- fn into_value_with(self, _: &Ruby) -> Value {
299
- let s = self.0.clone().into_series();
300
- // todo! iterate its chunks and flatten.
301
- // make series::iter() accept a chunk index.
302
- let s = s.rechunk();
303
- let iter = s.iter().map(|av| {
304
- if let AnyValue::Struct(_, _, flds) = av {
305
- struct_dict(av._iter_struct_av(), flds)
306
- } else {
307
- unreachable!()
308
- }
309
- });
310
-
311
- RArray::from_iter(iter).into_value()
312
- }
313
- }
314
-
315
- impl IntoValue for Wrap<&DurationChunked> {
316
- fn into_value_with(self, _: &Ruby) -> Value {
317
- let utils = utils();
318
- let time_unit = Wrap(self.0.time_unit()).into_value();
319
- let iter = self.0.into_iter().map(|opt_v| {
320
- opt_v.map(|v| {
321
- utils
322
- .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
323
- .unwrap()
324
- })
325
- });
326
- RArray::from_iter(iter).into_value()
327
- }
328
- }
329
-
330
- impl IntoValue for Wrap<&DatetimeChunked> {
331
- fn into_value_with(self, _: &Ruby) -> Value {
332
- let utils = utils();
333
- let time_unit = Wrap(self.0.time_unit()).into_value();
334
- let time_zone = self.0.time_zone().clone().into_value();
335
- let iter = self.0.into_iter().map(|opt_v| {
336
- opt_v.map(|v| {
337
- utils
338
- .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
339
- .unwrap()
340
- })
341
- });
342
- RArray::from_iter(iter).into_value()
343
- }
344
- }
345
-
346
- impl IntoValue for Wrap<&TimeChunked> {
347
- fn into_value_with(self, _: &Ruby) -> Value {
348
- let utils = utils();
349
- let iter = self.0.into_iter().map(|opt_v| {
350
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
351
- });
352
- RArray::from_iter(iter).into_value()
353
- }
354
- }
355
-
356
- impl IntoValue for Wrap<&DateChunked> {
357
- fn into_value_with(self, _: &Ruby) -> Value {
358
- let utils = utils();
359
- let iter = self.0.into_iter().map(|opt_v| {
360
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
361
- });
362
- RArray::from_iter(iter).into_value()
363
- }
364
- }
365
-
366
- impl IntoValue for Wrap<&DecimalChunked> {
367
- fn into_value_with(self, _: &Ruby) -> Value {
368
- let utils = utils();
369
- let rb_scale = (-(self.0.scale() as i32)).into_value();
370
- let iter = self.0.into_iter().map(|opt_v| {
371
- opt_v.map(|v| {
372
- utils
373
- .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
374
- .unwrap()
375
- })
376
- });
377
- RArray::from_iter(iter).into_value()
378
- }
379
- }
380
-
381
- fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
382
- match digits.parse::<i128>() {
383
- Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
384
- Err(_) => None,
385
- }
386
- }
387
-
388
277
  impl TryConvert for Wrap<Field> {
389
278
  fn try_convert(ob: Value) -> RbResult<Self> {
390
279
  let name: String = ob.funcall("name", ())?;
@@ -410,6 +299,7 @@ impl TryConvert for Wrap<DataType> {
410
299
  "Polars::Binary" => DataType::Binary,
411
300
  "Polars::Boolean" => DataType::Boolean,
412
301
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
302
+ "Polars::Enum" => DataType::Enum(None, Default::default()),
413
303
  "Polars::Date" => DataType::Date,
414
304
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
415
305
  "Polars::Time" => DataType::Time,
@@ -431,6 +321,36 @@ impl TryConvert for Wrap<DataType> {
431
321
  } else if String::try_convert(ob).is_err() {
432
322
  let name = unsafe { ob.class().name() }.into_owned();
433
323
  match name.as_str() {
324
+ "Polars::Int8" => DataType::Int8,
325
+ "Polars::Int16" => DataType::Int16,
326
+ "Polars::Int32" => DataType::Int32,
327
+ "Polars::Int64" => DataType::Int64,
328
+ "Polars::UInt8" => DataType::UInt8,
329
+ "Polars::UInt16" => DataType::UInt16,
330
+ "Polars::UInt32" => DataType::UInt32,
331
+ "Polars::UInt64" => DataType::UInt64,
332
+ "Polars::String" => DataType::String,
333
+ "Polars::Binary" => DataType::Binary,
334
+ "Polars::Boolean" => DataType::Boolean,
335
+ "Polars::Categorical" => {
336
+ let ordering = ob
337
+ .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
338
+ .0;
339
+ DataType::Categorical(None, ordering)
340
+ }
341
+ "Polars::Enum" => {
342
+ let categories = ob.funcall("categories", ()).unwrap();
343
+ let s = get_series(categories)?;
344
+ let ca = s.str().map_err(RbPolarsErr::from)?;
345
+ let categories = ca.downcast_iter().next().unwrap().clone();
346
+ create_enum_data_type(categories)
347
+ }
348
+ "Polars::Date" => DataType::Date,
349
+ "Polars::Time" => DataType::Time,
350
+ "Polars::Float32" => DataType::Float32,
351
+ "Polars::Float64" => DataType::Float64,
352
+ "Polars::Null" => DataType::Null,
353
+ "Polars::Unknown" => DataType::Unknown,
434
354
  "Polars::Duration" => {
435
355
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
436
356
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
@@ -503,102 +423,6 @@ impl TryConvert for Wrap<DataType> {
503
423
  }
504
424
  }
505
425
 
506
- impl<'s> TryConvert for Wrap<AnyValue<'s>> {
507
- fn try_convert(ob: Value) -> RbResult<Self> {
508
- if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
509
- Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
510
- } else if let Some(v) = Integer::from_value(ob) {
511
- Ok(AnyValue::Int64(v.to_i64()?).into())
512
- } else if let Some(v) = Float::from_value(ob) {
513
- Ok(AnyValue::Float64(v.to_f64()).into())
514
- } else if let Some(v) = RString::from_value(ob) {
515
- if v.enc_get() == Index::utf8() {
516
- Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
517
- } else {
518
- Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
519
- }
520
- // call is_a? for ActiveSupport::TimeWithZone
521
- } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
522
- let sec = ob.funcall::<_, _, i64>("to_i", ())?;
523
- let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
524
- let v = sec * 1_000_000_000 + nsec;
525
- // TODO support time zone when possible
526
- // https://github.com/pola-rs/polars/issues/9103
527
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
528
- } else if ob.is_nil() {
529
- Ok(AnyValue::Null.into())
530
- } else if let Some(dict) = RHash::from_value(ob) {
531
- let len = dict.len();
532
- let mut keys = Vec::with_capacity(len);
533
- let mut vals = Vec::with_capacity(len);
534
- dict.foreach(|k: Value, v: Value| {
535
- let key = String::try_convert(k)?;
536
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
537
- let dtype = DataType::from(&val);
538
- keys.push(Field::new(&key, dtype));
539
- vals.push(val);
540
- Ok(ForEach::Continue)
541
- })?;
542
- Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
543
- } else if let Some(v) = RArray::from_value(ob) {
544
- if v.is_empty() {
545
- Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
546
- } else {
547
- let list = v;
548
-
549
- let mut avs = Vec::with_capacity(25);
550
- let mut iter = list.each();
551
-
552
- for item in (&mut iter).take(25) {
553
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
554
- }
555
-
556
- let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
-
558
- // push the rest
559
- avs.reserve(list.len());
560
- for item in iter {
561
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
562
- }
563
-
564
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
565
- .map_err(RbPolarsErr::from)?;
566
- Ok(Wrap(AnyValue::List(s)))
567
- }
568
- } else if ob.is_kind_of(crate::rb_modules::datetime()) {
569
- let sec: i64 = ob.funcall("to_i", ())?;
570
- let nsec: i64 = ob.funcall("nsec", ())?;
571
- Ok(Wrap(AnyValue::Datetime(
572
- sec * 1_000_000_000 + nsec,
573
- TimeUnit::Nanoseconds,
574
- &None,
575
- )))
576
- } else if ob.is_kind_of(crate::rb_modules::date()) {
577
- // convert to DateTime for UTC
578
- let v = ob
579
- .funcall::<_, _, Value>("to_datetime", ())?
580
- .funcall::<_, _, Value>("to_time", ())?
581
- .funcall::<_, _, i64>("to_i", ())?;
582
- Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
- } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
- let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
585
- let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
586
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
587
- })?;
588
- if sign < 0 {
589
- // TODO better error
590
- v = v.checked_neg().unwrap();
591
- }
592
- Ok(Wrap(AnyValue::Decimal(v, scale)))
593
- } else {
594
- Err(RbPolarsErr::other(format!(
595
- "object type not supported {:?}",
596
- ob
597
- )))
598
- }
599
- }
600
- }
601
-
602
426
  impl<'s> TryConvert for Wrap<Row<'s>> {
603
427
  fn try_convert(ob: Value) -> RbResult<Self> {
604
428
  let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
@@ -662,6 +486,15 @@ impl TotalEq for ObjectValue {
662
486
  }
663
487
  }
664
488
 
489
+ impl TotalHash for ObjectValue {
490
+ fn tot_hash<H>(&self, state: &mut H)
491
+ where
492
+ H: Hasher,
493
+ {
494
+ self.hash(state);
495
+ }
496
+ }
497
+
665
498
  impl Display for ObjectValue {
666
499
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
667
500
  write!(f, "{}", self.to_object())
@@ -715,24 +548,33 @@ impl Default for ObjectValue {
715
548
 
716
549
  pub(crate) fn dicts_to_rows(
717
550
  records: &Value,
718
- infer_schema_len: usize,
551
+ infer_schema_len: Option<usize>,
552
+ schema_columns: PlIndexSet<String>,
719
553
  ) -> RbResult<(Vec<Row>, Vec<String>)> {
554
+ let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
720
555
  let (dicts, len) = get_rbseq(*records)?;
721
556
 
722
- let mut key_names = PlIndexSet::new();
723
- for d in dicts.each().take(infer_schema_len) {
724
- let d = d?;
725
- let d = RHash::try_convert(d)?;
726
-
727
- d.foreach(|name: Value, _value: Value| {
728
- if let Some(v) = Symbol::from_value(name) {
729
- key_names.insert(v.name()?.into());
730
- } else {
731
- key_names.insert(String::try_convert(name)?);
732
- };
733
- Ok(ForEach::Continue)
734
- })?;
735
- }
557
+ let key_names = {
558
+ if !schema_columns.is_empty() {
559
+ schema_columns
560
+ } else {
561
+ let mut inferred_keys = PlIndexSet::new();
562
+ for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
563
+ let d = d?;
564
+ let d = RHash::try_convert(d)?;
565
+
566
+ d.foreach(|name: Value, _value: Value| {
567
+ if let Some(v) = Symbol::from_value(name) {
568
+ inferred_keys.insert(v.name()?.into());
569
+ } else {
570
+ inferred_keys.insert(String::try_convert(name)?);
571
+ };
572
+ Ok(ForEach::Continue)
573
+ })?;
574
+ }
575
+ inferred_keys
576
+ }
577
+ };
736
578
 
737
579
  let mut rows = Vec::with_capacity(len);
738
580
 
@@ -895,8 +737,7 @@ impl TryConvert for Wrap<JoinType> {
895
737
  "outer_coalesce" => JoinType::Outer { coalesce: true },
896
738
  "semi" => JoinType::Semi,
897
739
  "anti" => JoinType::Anti,
898
- // #[cfg(feature = "cross_join")]
899
- // "cross" => JoinType::Cross,
740
+ "cross" => JoinType::Cross,
900
741
  v => {
901
742
  return Err(RbValueError::new_err(format!(
902
743
  "how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}",
@@ -940,6 +781,21 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
940
781
  }
941
782
  }
942
783
 
784
+ impl TryConvert for Wrap<NonExistent> {
785
+ fn try_convert(ob: Value) -> RbResult<Self> {
786
+ let parsed = match String::try_convert(ob)?.as_str() {
787
+ "null" => NonExistent::Null,
788
+ "raise" => NonExistent::Raise,
789
+ v => {
790
+ return Err(RbValueError::new_err(format!(
791
+ "`non_existent` must be one of {{'null', 'raise'}}, got {v}",
792
+ )))
793
+ }
794
+ };
795
+ Ok(Wrap(parsed))
796
+ }
797
+ }
798
+
943
799
  impl TryConvert for Wrap<NullBehavior> {
944
800
  fn try_convert(ob: Value) -> RbResult<Self> {
945
801
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1062,6 +918,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1062
918
  }
1063
919
  }
1064
920
 
921
+ impl TryConvert for Wrap<IpcCompression> {
922
+ fn try_convert(ob: Value) -> RbResult<Self> {
923
+ let parsed = match String::try_convert(ob)?.as_str() {
924
+ "lz4" => IpcCompression::LZ4,
925
+ "zstd" => IpcCompression::ZSTD,
926
+ v => {
927
+ return Err(RbValueError::new_err(format!(
928
+ "compression must be one of {{'lz4', 'zstd'}}, got {}",
929
+ v
930
+ )))
931
+ }
932
+ };
933
+ Ok(Wrap(parsed))
934
+ }
935
+ }
936
+
1065
937
  impl TryConvert for Wrap<SearchSortedSide> {
1066
938
  fn try_convert(ob: Value) -> RbResult<Self> {
1067
939
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1078,6 +950,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
1078
950
  }
1079
951
  }
1080
952
 
953
+ impl TryConvert for Wrap<WindowMapping> {
954
+ fn try_convert(ob: Value) -> RbResult<Self> {
955
+ let parsed = match String::try_convert(ob)?.as_str() {
956
+ "group_to_rows" => WindowMapping::GroupsToRows,
957
+ "join" => WindowMapping::Join,
958
+ "explode" => WindowMapping::Explode,
959
+ v => {
960
+ return Err(RbValueError::new_err(format!(
961
+ "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
962
+ )))
963
+ }
964
+ };
965
+ Ok(Wrap(parsed))
966
+ }
967
+ }
968
+
969
+ impl TryConvert for Wrap<JoinValidation> {
970
+ fn try_convert(ob: Value) -> RbResult<Self> {
971
+ let parsed = match String::try_convert(ob)?.as_str() {
972
+ "1:1" => JoinValidation::OneToOne,
973
+ "1:m" => JoinValidation::OneToMany,
974
+ "m:m" => JoinValidation::ManyToMany,
975
+ "m:1" => JoinValidation::ManyToOne,
976
+ v => {
977
+ return Err(RbValueError::new_err(format!(
978
+ "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
979
+ )))
980
+ }
981
+ };
982
+ Ok(Wrap(parsed))
983
+ }
984
+ }
985
+
986
+ impl TryConvert for Wrap<QuoteStyle> {
987
+ fn try_convert(ob: Value) -> RbResult<Self> {
988
+ let parsed = match String::try_convert(ob)?.as_str() {
989
+ "always" => QuoteStyle::Always,
990
+ "necessary" => QuoteStyle::Necessary,
991
+ "non_numeric" => QuoteStyle::NonNumeric,
992
+ "never" => QuoteStyle::Never,
993
+ v => {
994
+ return Err(RbValueError::new_err(format!(
995
+ "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
996
+ )))
997
+ },
998
+ };
999
+ Ok(Wrap(parsed))
1000
+ }
1001
+ }
1002
+
1081
1003
  pub fn parse_fill_null_strategy(
1082
1004
  strategy: &str,
1083
1005
  limit: FillNullLimit,
@@ -1150,3 +1072,12 @@ where
1150
1072
  {
1151
1073
  container.into_iter().map(|s| s.as_ref().into()).collect()
1152
1074
  }
1075
+
1076
+ impl TryConvert for Wrap<NonZeroUsize> {
1077
+ fn try_convert(ob: Value) -> RbResult<Self> {
1078
+ let v = usize::try_convert(ob)?;
1079
+ NonZeroUsize::new(v)
1080
+ .map(Wrap)
1081
+ .ok_or(RbValueError::new_err("must be non-zero".into()))
1082
+ }
1083
+ }