polars-df 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -1
  3. data/Cargo.lock +107 -59
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +15 -7
  8. data/ext/polars/src/batched_csv.rs +4 -4
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +260 -340
  12. data/ext/polars/src/dataframe.rs +69 -53
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/datetime.rs +22 -56
  15. data/ext/polars/src/expr/general.rs +61 -33
  16. data/ext/polars/src/expr/list.rs +52 -4
  17. data/ext/polars/src/expr/meta.rs +48 -0
  18. data/ext/polars/src/expr/rolling.rs +1 -0
  19. data/ext/polars/src/expr/string.rs +59 -8
  20. data/ext/polars/src/expr/struct.rs +8 -4
  21. data/ext/polars/src/functions/aggregation.rs +6 -0
  22. data/ext/polars/src/functions/lazy.rs +103 -48
  23. data/ext/polars/src/functions/meta.rs +45 -1
  24. data/ext/polars/src/functions/string_cache.rs +14 -0
  25. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +138 -22
  26. data/ext/polars/src/lib.rs +226 -168
  27. data/ext/polars/src/series/aggregation.rs +20 -0
  28. data/ext/polars/src/series/mod.rs +25 -4
  29. data/lib/polars/array_expr.rb +449 -0
  30. data/lib/polars/array_name_space.rb +346 -0
  31. data/lib/polars/cat_expr.rb +24 -0
  32. data/lib/polars/cat_name_space.rb +75 -0
  33. data/lib/polars/config.rb +2 -2
  34. data/lib/polars/data_frame.rb +179 -43
  35. data/lib/polars/data_types.rb +191 -28
  36. data/lib/polars/date_time_expr.rb +31 -14
  37. data/lib/polars/exceptions.rb +12 -1
  38. data/lib/polars/expr.rb +866 -186
  39. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  40. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  41. data/lib/polars/functions/as_datatype.rb +248 -0
  42. data/lib/polars/functions/col.rb +47 -0
  43. data/lib/polars/functions/eager.rb +182 -0
  44. data/lib/polars/functions/lazy.rb +1280 -0
  45. data/lib/polars/functions/len.rb +49 -0
  46. data/lib/polars/functions/lit.rb +35 -0
  47. data/lib/polars/functions/random.rb +16 -0
  48. data/lib/polars/functions/range/date_range.rb +103 -0
  49. data/lib/polars/functions/range/int_range.rb +51 -0
  50. data/lib/polars/functions/repeat.rb +144 -0
  51. data/lib/polars/functions/whenthen.rb +27 -0
  52. data/lib/polars/functions.rb +29 -416
  53. data/lib/polars/group_by.rb +2 -2
  54. data/lib/polars/io.rb +18 -25
  55. data/lib/polars/lazy_frame.rb +367 -53
  56. data/lib/polars/list_expr.rb +152 -6
  57. data/lib/polars/list_name_space.rb +102 -0
  58. data/lib/polars/meta_expr.rb +175 -7
  59. data/lib/polars/series.rb +273 -34
  60. data/lib/polars/string_cache.rb +75 -0
  61. data/lib/polars/string_expr.rb +412 -96
  62. data/lib/polars/string_name_space.rb +4 -4
  63. data/lib/polars/testing.rb +507 -0
  64. data/lib/polars/utils.rb +52 -8
  65. data/lib/polars/version.rb +1 -1
  66. data/lib/polars.rb +15 -2
  67. metadata +35 -5
  68. data/lib/polars/lazy_functions.rb +0 -1181
@@ -1,24 +1,28 @@
1
+ pub(crate) mod anyvalue;
2
+ mod chunked_array;
3
+
1
4
  use std::fmt::{Debug, Display, Formatter};
2
5
  use std::hash::{Hash, Hasher};
6
+ use std::num::NonZeroUsize;
3
7
 
4
- use magnus::encoding::{EncodingCapable, Index};
5
8
  use magnus::{
6
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, Float, Integer, IntoValue,
7
- Module, RArray, RHash, RString, Ruby, Symbol, TryConvert, Value,
9
+ class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
+ Ruby, Symbol, TryConvert, Value,
8
11
  };
9
12
  use polars::chunked_array::object::PolarsObjectSafe;
10
13
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
14
  use polars::datatypes::AnyValue;
12
- use polars::frame::row::{any_values_to_dtype, Row};
15
+ use polars::frame::row::Row;
13
16
  use polars::frame::NullStrategy;
14
17
  use polars::io::avro::AvroCompression;
15
18
  use polars::prelude::*;
16
19
  use polars::series::ops::NullBehavior;
17
- use polars_utils::total_ord::TotalEq;
20
+ use polars_core::utils::arrow::array::Array;
21
+ use polars_utils::total_ord::{TotalEq, TotalHash};
18
22
  use smartstring::alias::String as SmartString;
19
23
 
20
24
  use crate::object::OBJECT_NAME;
21
- use crate::rb_modules::utils;
25
+ use crate::rb_modules::series;
22
26
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
23
27
 
24
28
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
@@ -78,36 +82,11 @@ pub(crate) fn get_series(obj: Value) -> RbResult<Series> {
78
82
  Ok(rbs.series.borrow().clone())
79
83
  }
80
84
 
81
- impl TryConvert for Wrap<StringChunked> {
82
- fn try_convert(obj: Value) -> RbResult<Self> {
83
- let (seq, len) = get_rbseq(obj)?;
84
- let mut builder = StringChunkedBuilder::new("", len, len * 25);
85
-
86
- for res in seq.each() {
87
- let item = res?;
88
- match String::try_convert(item) {
89
- Ok(val) => builder.append_value(&val),
90
- Err(_) => builder.append_null(),
91
- }
92
- }
93
- Ok(Wrap(builder.finish()))
94
- }
95
- }
96
-
97
- impl TryConvert for Wrap<BinaryChunked> {
98
- fn try_convert(obj: Value) -> RbResult<Self> {
99
- let (seq, len) = get_rbseq(obj)?;
100
- let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
101
-
102
- for res in seq.each() {
103
- let item = res?;
104
- match RString::try_convert(item) {
105
- Ok(val) => builder.append_value(unsafe { val.as_slice() }),
106
- Err(_) => builder.append_null(),
107
- }
108
- }
109
- Ok(Wrap(builder.finish()))
110
- }
85
+ pub(crate) fn to_series(s: RbSeries) -> Value {
86
+ let series = series();
87
+ series
88
+ .funcall::<_, _, Value>("_from_rbseries", (s,))
89
+ .unwrap()
111
90
  }
112
91
 
113
92
  impl TryConvert for Wrap<NullValues> {
@@ -134,102 +113,84 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
134
113
  dict.into_value()
135
114
  }
136
115
 
137
- impl IntoValue for Wrap<AnyValue<'_>> {
138
- fn into_value_with(self, ruby: &Ruby) -> Value {
116
+ impl IntoValue for Wrap<DataType> {
117
+ fn into_value_with(self, _: &Ruby) -> Value {
118
+ let pl = crate::rb_modules::polars();
119
+
139
120
  match self.0 {
140
- AnyValue::UInt8(v) => ruby.into_value(v),
141
- AnyValue::UInt16(v) => ruby.into_value(v),
142
- AnyValue::UInt32(v) => ruby.into_value(v),
143
- AnyValue::UInt64(v) => ruby.into_value(v),
144
- AnyValue::Int8(v) => ruby.into_value(v),
145
- AnyValue::Int16(v) => ruby.into_value(v),
146
- AnyValue::Int32(v) => ruby.into_value(v),
147
- AnyValue::Int64(v) => ruby.into_value(v),
148
- AnyValue::Float32(v) => ruby.into_value(v),
149
- AnyValue::Float64(v) => ruby.into_value(v),
150
- AnyValue::Null => ruby.qnil().as_value(),
151
- AnyValue::Boolean(v) => ruby.into_value(v),
152
- AnyValue::String(v) => ruby.into_value(v),
153
- AnyValue::StringOwned(v) => ruby.into_value(v.as_str()),
154
- AnyValue::Categorical(idx, rev, arr) => {
155
- let s = if arr.is_null() {
156
- rev.get(idx)
157
- } else {
158
- unsafe { arr.deref_unchecked().value(idx as usize) }
159
- };
160
- s.into_value()
121
+ DataType::Int8 => {
122
+ let class = pl.const_get::<_, Value>("Int8").unwrap();
123
+ class.funcall("new", ()).unwrap()
161
124
  }
162
- AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
163
- AnyValue::Datetime(v, time_unit, time_zone) => {
164
- let time_unit = time_unit.to_ascii();
165
- utils()
166
- .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
167
- .unwrap()
125
+ DataType::Int16 => {
126
+ let class = pl.const_get::<_, Value>("Int16").unwrap();
127
+ class.funcall("new", ()).unwrap()
168
128
  }
169
- AnyValue::Duration(v, time_unit) => {
170
- let time_unit = time_unit.to_ascii();
171
- utils()
172
- .funcall("_to_ruby_duration", (v, time_unit))
173
- .unwrap()
129
+ DataType::Int32 => {
130
+ let class = pl.const_get::<_, Value>("Int32").unwrap();
131
+ class.funcall("new", ()).unwrap()
174
132
  }
175
- AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
176
- AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
177
- ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
178
- AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
179
- AnyValue::Object(v) => {
180
- let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
181
- object.to_object()
133
+ DataType::Int64 => {
134
+ let class = pl.const_get::<_, Value>("Int64").unwrap();
135
+ class.funcall("new", ()).unwrap()
182
136
  }
183
- AnyValue::ObjectOwned(v) => {
184
- let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
185
- object.to_object()
137
+ DataType::UInt8 => {
138
+ let class = pl.const_get::<_, Value>("UInt8").unwrap();
139
+ class.funcall("new", ()).unwrap()
140
+ }
141
+ DataType::UInt16 => {
142
+ let class = pl.const_get::<_, Value>("UInt16").unwrap();
143
+ class.funcall("new", ()).unwrap()
144
+ }
145
+ DataType::UInt32 => {
146
+ let class = pl.const_get::<_, Value>("UInt32").unwrap();
147
+ class.funcall("new", ()).unwrap()
148
+ }
149
+ DataType::UInt64 => {
150
+ let class = pl.const_get::<_, Value>("UInt64").unwrap();
151
+ class.funcall("new", ()).unwrap()
152
+ }
153
+ DataType::Float32 => {
154
+ let class = pl.const_get::<_, Value>("Float32").unwrap();
155
+ class.funcall("new", ()).unwrap()
156
+ }
157
+ DataType::Float64 => {
158
+ let class = pl.const_get::<_, Value>("Float64").unwrap();
159
+ class.funcall("new", ()).unwrap()
186
160
  }
187
- AnyValue::Binary(v) => RString::from_slice(v).into_value(),
188
- AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
189
- AnyValue::Decimal(v, scale) => utils()
190
- .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
191
- .unwrap(),
192
- }
193
- }
194
- }
195
-
196
- impl IntoValue for Wrap<DataType> {
197
- fn into_value_with(self, _: &Ruby) -> Value {
198
- let pl = crate::rb_modules::polars();
199
-
200
- match self.0 {
201
- DataType::Int8 => pl.const_get::<_, Value>("Int8").unwrap(),
202
- DataType::Int16 => pl.const_get::<_, Value>("Int16").unwrap(),
203
- DataType::Int32 => pl.const_get::<_, Value>("Int32").unwrap(),
204
- DataType::Int64 => pl.const_get::<_, Value>("Int64").unwrap(),
205
- DataType::UInt8 => pl.const_get::<_, Value>("UInt8").unwrap(),
206
- DataType::UInt16 => pl.const_get::<_, Value>("UInt16").unwrap(),
207
- DataType::UInt32 => pl.const_get::<_, Value>("UInt32").unwrap(),
208
- DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
209
- DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
210
- DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
211
161
  DataType::Decimal(precision, scale) => {
212
- let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
213
- decimal_class
162
+ let class = pl.const_get::<_, Value>("Decimal").unwrap();
163
+ class
214
164
  .funcall::<_, _, Value>("new", (precision, scale))
215
165
  .unwrap()
216
166
  }
217
- DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
218
- DataType::String => pl.const_get::<_, Value>("String").unwrap(),
219
- DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
167
+ DataType::Boolean => {
168
+ let class = pl.const_get::<_, Value>("Boolean").unwrap();
169
+ class.funcall("new", ()).unwrap()
170
+ }
171
+ DataType::String => {
172
+ let class = pl.const_get::<_, Value>("String").unwrap();
173
+ class.funcall("new", ()).unwrap()
174
+ }
175
+ DataType::Binary => {
176
+ let class = pl.const_get::<_, Value>("Binary").unwrap();
177
+ class.funcall("new", ()).unwrap()
178
+ }
220
179
  DataType::Array(inner, size) => {
180
+ let class = pl.const_get::<_, Value>("Array").unwrap();
221
181
  let inner = Wrap(*inner);
222
- let list_class = pl.const_get::<_, Value>("Array").unwrap();
223
- list_class
224
- .funcall::<_, _, Value>("new", (size, inner))
225
- .unwrap()
182
+ let args = (inner, size);
183
+ class.funcall::<_, _, Value>("new", args).unwrap()
226
184
  }
227
185
  DataType::List(inner) => {
186
+ let class = pl.const_get::<_, Value>("List").unwrap();
228
187
  let inner = Wrap(*inner);
229
- let list_class = pl.const_get::<_, Value>("List").unwrap();
230
- list_class.funcall::<_, _, Value>("new", (inner,)).unwrap()
188
+ class.funcall::<_, _, Value>("new", (inner,)).unwrap()
189
+ }
190
+ DataType::Date => {
191
+ let class = pl.const_get::<_, Value>("Date").unwrap();
192
+ class.funcall("new", ()).unwrap()
231
193
  }
232
- DataType::Date => pl.const_get::<_, Value>("Date").unwrap(),
233
194
  DataType::Datetime(tu, tz) => {
234
195
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
235
196
  datetime_class
@@ -242,9 +203,29 @@ impl IntoValue for Wrap<DataType> {
242
203
  .funcall::<_, _, Value>("new", (tu.to_ascii(),))
243
204
  .unwrap()
244
205
  }
245
- DataType::Object(_, _) => pl.const_get::<_, Value>("Object").unwrap(),
246
- DataType::Categorical(_, _) => pl.const_get::<_, Value>("Categorical").unwrap(),
247
- DataType::Time => pl.const_get::<_, Value>("Time").unwrap(),
206
+ DataType::Object(_, _) => {
207
+ let class = pl.const_get::<_, Value>("Object").unwrap();
208
+ class.funcall("new", ()).unwrap()
209
+ }
210
+ DataType::Categorical(_, ordering) => {
211
+ let class = pl.const_get::<_, Value>("Categorical").unwrap();
212
+ class.funcall("new", (Wrap(ordering),)).unwrap()
213
+ }
214
+ DataType::Enum(rev_map, _) => {
215
+ // we should always have an initialized rev_map coming from rust
216
+ let categories = rev_map.as_ref().unwrap().get_categories();
217
+ let class = pl.const_get::<_, Value>("Enum").unwrap();
218
+ let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
219
+ let series = to_series(s.into());
220
+ class
221
+ .funcall::<_, _, Value>("new", (series,))
222
+ .unwrap()
223
+ .into()
224
+ }
225
+ DataType::Time => {
226
+ let class = pl.const_get::<_, Value>("Time").unwrap();
227
+ class.funcall("new", ()).unwrap()
228
+ }
248
229
  DataType::Struct(fields) => {
249
230
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
250
231
  let iter = fields.iter().map(|fld| {
@@ -260,12 +241,31 @@ impl IntoValue for Wrap<DataType> {
260
241
  .funcall::<_, _, Value>("new", (fields,))
261
242
  .unwrap()
262
243
  }
263
- DataType::Null => pl.const_get::<_, Value>("Null").unwrap(),
264
- DataType::Unknown => pl.const_get::<_, Value>("Unknown").unwrap(),
244
+ DataType::Null => {
245
+ let class = pl.const_get::<_, Value>("Null").unwrap();
246
+ class.funcall("new", ()).unwrap()
247
+ }
248
+ DataType::Unknown => {
249
+ let class = pl.const_get::<_, Value>("Unknown").unwrap();
250
+ class.funcall("new", ()).unwrap()
251
+ }
252
+ DataType::BinaryOffset => {
253
+ unimplemented!()
254
+ }
265
255
  }
266
256
  }
267
257
  }
268
258
 
259
+ impl IntoValue for Wrap<CategoricalOrdering> {
260
+ fn into_value_with(self, _: &Ruby) -> Value {
261
+ let ordering = match self.0 {
262
+ CategoricalOrdering::Physical => "physical",
263
+ CategoricalOrdering::Lexical => "lexical",
264
+ };
265
+ ordering.into_value()
266
+ }
267
+ }
268
+
269
269
  impl IntoValue for Wrap<TimeUnit> {
270
270
  fn into_value_with(self, _: &Ruby) -> Value {
271
271
  let tu = match self.0 {
@@ -277,114 +277,6 @@ impl IntoValue for Wrap<TimeUnit> {
277
277
  }
278
278
  }
279
279
 
280
- impl IntoValue for Wrap<&StringChunked> {
281
- fn into_value_with(self, _: &Ruby) -> Value {
282
- let iter = self.0.into_iter();
283
- RArray::from_iter(iter).into_value()
284
- }
285
- }
286
-
287
- impl IntoValue for Wrap<&BinaryChunked> {
288
- fn into_value_with(self, _: &Ruby) -> Value {
289
- let iter = self
290
- .0
291
- .into_iter()
292
- .map(|opt_bytes| opt_bytes.map(RString::from_slice));
293
- RArray::from_iter(iter).into_value()
294
- }
295
- }
296
-
297
- impl IntoValue for Wrap<&StructChunked> {
298
- fn into_value_with(self, _: &Ruby) -> Value {
299
- let s = self.0.clone().into_series();
300
- // todo! iterate its chunks and flatten.
301
- // make series::iter() accept a chunk index.
302
- let s = s.rechunk();
303
- let iter = s.iter().map(|av| {
304
- if let AnyValue::Struct(_, _, flds) = av {
305
- struct_dict(av._iter_struct_av(), flds)
306
- } else {
307
- unreachable!()
308
- }
309
- });
310
-
311
- RArray::from_iter(iter).into_value()
312
- }
313
- }
314
-
315
- impl IntoValue for Wrap<&DurationChunked> {
316
- fn into_value_with(self, _: &Ruby) -> Value {
317
- let utils = utils();
318
- let time_unit = Wrap(self.0.time_unit()).into_value();
319
- let iter = self.0.into_iter().map(|opt_v| {
320
- opt_v.map(|v| {
321
- utils
322
- .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
323
- .unwrap()
324
- })
325
- });
326
- RArray::from_iter(iter).into_value()
327
- }
328
- }
329
-
330
- impl IntoValue for Wrap<&DatetimeChunked> {
331
- fn into_value_with(self, _: &Ruby) -> Value {
332
- let utils = utils();
333
- let time_unit = Wrap(self.0.time_unit()).into_value();
334
- let time_zone = self.0.time_zone().clone().into_value();
335
- let iter = self.0.into_iter().map(|opt_v| {
336
- opt_v.map(|v| {
337
- utils
338
- .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
339
- .unwrap()
340
- })
341
- });
342
- RArray::from_iter(iter).into_value()
343
- }
344
- }
345
-
346
- impl IntoValue for Wrap<&TimeChunked> {
347
- fn into_value_with(self, _: &Ruby) -> Value {
348
- let utils = utils();
349
- let iter = self.0.into_iter().map(|opt_v| {
350
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
351
- });
352
- RArray::from_iter(iter).into_value()
353
- }
354
- }
355
-
356
- impl IntoValue for Wrap<&DateChunked> {
357
- fn into_value_with(self, _: &Ruby) -> Value {
358
- let utils = utils();
359
- let iter = self.0.into_iter().map(|opt_v| {
360
- opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
361
- });
362
- RArray::from_iter(iter).into_value()
363
- }
364
- }
365
-
366
- impl IntoValue for Wrap<&DecimalChunked> {
367
- fn into_value_with(self, _: &Ruby) -> Value {
368
- let utils = utils();
369
- let rb_scale = (-(self.0.scale() as i32)).into_value();
370
- let iter = self.0.into_iter().map(|opt_v| {
371
- opt_v.map(|v| {
372
- utils
373
- .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
374
- .unwrap()
375
- })
376
- });
377
- RArray::from_iter(iter).into_value()
378
- }
379
- }
380
-
381
- fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
382
- match digits.parse::<i128>() {
383
- Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
384
- Err(_) => None,
385
- }
386
- }
387
-
388
280
  impl TryConvert for Wrap<Field> {
389
281
  fn try_convert(ob: Value) -> RbResult<Self> {
390
282
  let name: String = ob.funcall("name", ())?;
@@ -410,6 +302,7 @@ impl TryConvert for Wrap<DataType> {
410
302
  "Polars::Binary" => DataType::Binary,
411
303
  "Polars::Boolean" => DataType::Boolean,
412
304
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
305
+ "Polars::Enum" => DataType::Enum(None, Default::default()),
413
306
  "Polars::Date" => DataType::Date,
414
307
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
415
308
  "Polars::Time" => DataType::Time,
@@ -431,6 +324,36 @@ impl TryConvert for Wrap<DataType> {
431
324
  } else if String::try_convert(ob).is_err() {
432
325
  let name = unsafe { ob.class().name() }.into_owned();
433
326
  match name.as_str() {
327
+ "Polars::Int8" => DataType::Int8,
328
+ "Polars::Int16" => DataType::Int16,
329
+ "Polars::Int32" => DataType::Int32,
330
+ "Polars::Int64" => DataType::Int64,
331
+ "Polars::UInt8" => DataType::UInt8,
332
+ "Polars::UInt16" => DataType::UInt16,
333
+ "Polars::UInt32" => DataType::UInt32,
334
+ "Polars::UInt64" => DataType::UInt64,
335
+ "Polars::String" => DataType::String,
336
+ "Polars::Binary" => DataType::Binary,
337
+ "Polars::Boolean" => DataType::Boolean,
338
+ "Polars::Categorical" => {
339
+ let ordering = ob
340
+ .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
341
+ .0;
342
+ DataType::Categorical(None, ordering)
343
+ }
344
+ "Polars::Enum" => {
345
+ let categories = ob.funcall("categories", ()).unwrap();
346
+ let s = get_series(categories)?;
347
+ let ca = s.str().map_err(RbPolarsErr::from)?;
348
+ let categories = ca.downcast_iter().next().unwrap().clone();
349
+ create_enum_data_type(categories)
350
+ }
351
+ "Polars::Date" => DataType::Date,
352
+ "Polars::Time" => DataType::Time,
353
+ "Polars::Float32" => DataType::Float32,
354
+ "Polars::Float64" => DataType::Float64,
355
+ "Polars::Null" => DataType::Null,
356
+ "Polars::Unknown" => DataType::Unknown,
434
357
  "Polars::Duration" => {
435
358
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
436
359
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
@@ -503,102 +426,6 @@ impl TryConvert for Wrap<DataType> {
503
426
  }
504
427
  }
505
428
 
506
- impl<'s> TryConvert for Wrap<AnyValue<'s>> {
507
- fn try_convert(ob: Value) -> RbResult<Self> {
508
- if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
509
- Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
510
- } else if let Some(v) = Integer::from_value(ob) {
511
- Ok(AnyValue::Int64(v.to_i64()?).into())
512
- } else if let Some(v) = Float::from_value(ob) {
513
- Ok(AnyValue::Float64(v.to_f64()).into())
514
- } else if let Some(v) = RString::from_value(ob) {
515
- if v.enc_get() == Index::utf8() {
516
- Ok(AnyValue::StringOwned(v.to_string()?.into()).into())
517
- } else {
518
- Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
519
- }
520
- // call is_a? for ActiveSupport::TimeWithZone
521
- } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
522
- let sec = ob.funcall::<_, _, i64>("to_i", ())?;
523
- let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
524
- let v = sec * 1_000_000_000 + nsec;
525
- // TODO support time zone when possible
526
- // https://github.com/pola-rs/polars/issues/9103
527
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
528
- } else if ob.is_nil() {
529
- Ok(AnyValue::Null.into())
530
- } else if let Some(dict) = RHash::from_value(ob) {
531
- let len = dict.len();
532
- let mut keys = Vec::with_capacity(len);
533
- let mut vals = Vec::with_capacity(len);
534
- dict.foreach(|k: Value, v: Value| {
535
- let key = String::try_convert(k)?;
536
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
537
- let dtype = DataType::from(&val);
538
- keys.push(Field::new(&key, dtype));
539
- vals.push(val);
540
- Ok(ForEach::Continue)
541
- })?;
542
- Ok(Wrap(AnyValue::StructOwned(Box::new((vals, keys)))))
543
- } else if let Some(v) = RArray::from_value(ob) {
544
- if v.is_empty() {
545
- Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
546
- } else {
547
- let list = v;
548
-
549
- let mut avs = Vec::with_capacity(25);
550
- let mut iter = list.each();
551
-
552
- for item in (&mut iter).take(25) {
553
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
554
- }
555
-
556
- let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
-
558
- // push the rest
559
- avs.reserve(list.len());
560
- for item in iter {
561
- avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
562
- }
563
-
564
- let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
565
- .map_err(RbPolarsErr::from)?;
566
- Ok(Wrap(AnyValue::List(s)))
567
- }
568
- } else if ob.is_kind_of(crate::rb_modules::datetime()) {
569
- let sec: i64 = ob.funcall("to_i", ())?;
570
- let nsec: i64 = ob.funcall("nsec", ())?;
571
- Ok(Wrap(AnyValue::Datetime(
572
- sec * 1_000_000_000 + nsec,
573
- TimeUnit::Nanoseconds,
574
- &None,
575
- )))
576
- } else if ob.is_kind_of(crate::rb_modules::date()) {
577
- // convert to DateTime for UTC
578
- let v = ob
579
- .funcall::<_, _, Value>("to_datetime", ())?
580
- .funcall::<_, _, Value>("to_time", ())?
581
- .funcall::<_, _, i64>("to_i", ())?;
582
- Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
- } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
- let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
585
- let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
586
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
587
- })?;
588
- if sign < 0 {
589
- // TODO better error
590
- v = v.checked_neg().unwrap();
591
- }
592
- Ok(Wrap(AnyValue::Decimal(v, scale)))
593
- } else {
594
- Err(RbPolarsErr::other(format!(
595
- "object type not supported {:?}",
596
- ob
597
- )))
598
- }
599
- }
600
- }
601
-
602
429
  impl<'s> TryConvert for Wrap<Row<'s>> {
603
430
  fn try_convert(ob: Value) -> RbResult<Self> {
604
431
  let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
@@ -662,6 +489,15 @@ impl TotalEq for ObjectValue {
662
489
  }
663
490
  }
664
491
 
492
+ impl TotalHash for ObjectValue {
493
+ fn tot_hash<H>(&self, state: &mut H)
494
+ where
495
+ H: Hasher,
496
+ {
497
+ self.hash(state);
498
+ }
499
+ }
500
+
665
501
  impl Display for ObjectValue {
666
502
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
667
503
  write!(f, "{}", self.to_object())
@@ -715,24 +551,33 @@ impl Default for ObjectValue {
715
551
 
716
552
  pub(crate) fn dicts_to_rows(
717
553
  records: &Value,
718
- infer_schema_len: usize,
554
+ infer_schema_len: Option<usize>,
555
+ schema_columns: PlIndexSet<String>,
719
556
  ) -> RbResult<(Vec<Row>, Vec<String>)> {
557
+ let infer_schema_len = infer_schema_len.map(|n| std::cmp::max(1, n));
720
558
  let (dicts, len) = get_rbseq(*records)?;
721
559
 
722
- let mut key_names = PlIndexSet::new();
723
- for d in dicts.each().take(infer_schema_len) {
724
- let d = d?;
725
- let d = RHash::try_convert(d)?;
726
-
727
- d.foreach(|name: Value, _value: Value| {
728
- if let Some(v) = Symbol::from_value(name) {
729
- key_names.insert(v.name()?.into());
730
- } else {
731
- key_names.insert(String::try_convert(name)?);
732
- };
733
- Ok(ForEach::Continue)
734
- })?;
735
- }
560
+ let key_names = {
561
+ if !schema_columns.is_empty() {
562
+ schema_columns
563
+ } else {
564
+ let mut inferred_keys = PlIndexSet::new();
565
+ for d in dicts.each().take(infer_schema_len.unwrap_or(usize::MAX)) {
566
+ let d = d?;
567
+ let d = RHash::try_convert(d)?;
568
+
569
+ d.foreach(|name: Value, _value: Value| {
570
+ if let Some(v) = Symbol::from_value(name) {
571
+ inferred_keys.insert(v.name()?.into());
572
+ } else {
573
+ inferred_keys.insert(String::try_convert(name)?);
574
+ };
575
+ Ok(ForEach::Continue)
576
+ })?;
577
+ }
578
+ inferred_keys
579
+ }
580
+ };
736
581
 
737
582
  let mut rows = Vec::with_capacity(len);
738
583
 
@@ -1062,6 +907,22 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
1062
907
  }
1063
908
  }
1064
909
 
910
+ impl TryConvert for Wrap<IpcCompression> {
911
+ fn try_convert(ob: Value) -> RbResult<Self> {
912
+ let parsed = match String::try_convert(ob)?.as_str() {
913
+ "lz4" => IpcCompression::LZ4,
914
+ "zstd" => IpcCompression::ZSTD,
915
+ v => {
916
+ return Err(RbValueError::new_err(format!(
917
+ "compression must be one of {{'lz4', 'zstd'}}, got {}",
918
+ v
919
+ )))
920
+ }
921
+ };
922
+ Ok(Wrap(parsed))
923
+ }
924
+ }
925
+
1065
926
  impl TryConvert for Wrap<SearchSortedSide> {
1066
927
  fn try_convert(ob: Value) -> RbResult<Self> {
1067
928
  let parsed = match String::try_convert(ob)?.as_str() {
@@ -1078,6 +939,56 @@ impl TryConvert for Wrap<SearchSortedSide> {
1078
939
  }
1079
940
  }
1080
941
 
942
+ impl TryConvert for Wrap<WindowMapping> {
943
+ fn try_convert(ob: Value) -> RbResult<Self> {
944
+ let parsed = match String::try_convert(ob)?.as_str() {
945
+ "group_to_rows" => WindowMapping::GroupsToRows,
946
+ "join" => WindowMapping::Join,
947
+ "explode" => WindowMapping::Explode,
948
+ v => {
949
+ return Err(RbValueError::new_err(format!(
950
+ "`mapping_strategy` must be one of {{'group_to_rows', 'join', 'explode'}}, got {v}",
951
+ )))
952
+ }
953
+ };
954
+ Ok(Wrap(parsed))
955
+ }
956
+ }
957
+
958
+ impl TryConvert for Wrap<JoinValidation> {
959
+ fn try_convert(ob: Value) -> RbResult<Self> {
960
+ let parsed = match String::try_convert(ob)?.as_str() {
961
+ "1:1" => JoinValidation::OneToOne,
962
+ "1:m" => JoinValidation::OneToMany,
963
+ "m:m" => JoinValidation::ManyToMany,
964
+ "m:1" => JoinValidation::ManyToOne,
965
+ v => {
966
+ return Err(RbValueError::new_err(format!(
967
+ "`validate` must be one of {{'m:m', 'm:1', '1:m', '1:1'}}, got {v}",
968
+ )))
969
+ }
970
+ };
971
+ Ok(Wrap(parsed))
972
+ }
973
+ }
974
+
975
+ impl TryConvert for Wrap<QuoteStyle> {
976
+ fn try_convert(ob: Value) -> RbResult<Self> {
977
+ let parsed = match String::try_convert(ob)?.as_str() {
978
+ "always" => QuoteStyle::Always,
979
+ "necessary" => QuoteStyle::Necessary,
980
+ "non_numeric" => QuoteStyle::NonNumeric,
981
+ "never" => QuoteStyle::Never,
982
+ v => {
983
+ return Err(RbValueError::new_err(format!(
984
+ "`quote_style` must be one of {{'always', 'necessary', 'non_numeric', 'never'}}, got {v}",
985
+ )))
986
+ },
987
+ };
988
+ Ok(Wrap(parsed))
989
+ }
990
+ }
991
+
1081
992
  pub fn parse_fill_null_strategy(
1082
993
  strategy: &str,
1083
994
  limit: FillNullLimit,
@@ -1150,3 +1061,12 @@ where
1150
1061
  {
1151
1062
  container.into_iter().map(|s| s.as_ref().into()).collect()
1152
1063
  }
1064
+
1065
+ impl TryConvert for Wrap<NonZeroUsize> {
1066
+ fn try_convert(ob: Value) -> RbResult<Self> {
1067
+ let v = usize::try_convert(ob)?;
1068
+ NonZeroUsize::new(v)
1069
+ .map(|v| Wrap(v))
1070
+ .ok_or(RbValueError::new_err("must be non-zero".into()))
1071
+ }
1072
+ }