polars-df 0.5.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +595 -709
  4. data/Cargo.toml +1 -0
  5. data/README.md +11 -9
  6. data/ext/polars/Cargo.toml +18 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +272 -136
  9. data/ext/polars/src/dataframe.rs +135 -94
  10. data/ext/polars/src/error.rs +8 -5
  11. data/ext/polars/src/expr/array.rs +15 -0
  12. data/ext/polars/src/expr/binary.rs +18 -6
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +78 -264
  15. data/ext/polars/src/expr/list.rs +41 -28
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +196 -0
  19. data/ext/polars/src/expr/string.rs +94 -66
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +119 -54
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +46 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +61 -44
  33. data/ext/polars/src/lib.rs +173 -84
  34. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  35. data/ext/polars/src/{apply → map}/mod.rs +10 -6
  36. data/ext/polars/src/{apply → map}/series.rs +12 -16
  37. data/ext/polars/src/object.rs +2 -2
  38. data/ext/polars/src/rb_modules.rs +25 -6
  39. data/ext/polars/src/series/construction.rs +32 -6
  40. data/ext/polars/src/series/export.rs +2 -2
  41. data/ext/polars/src/series/set_at_idx.rs +33 -17
  42. data/ext/polars/src/series.rs +62 -42
  43. data/ext/polars/src/sql.rs +46 -0
  44. data/lib/polars/array_expr.rb +84 -0
  45. data/lib/polars/array_name_space.rb +77 -0
  46. data/lib/polars/batched_csv_reader.rb +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +206 -131
  49. data/lib/polars/data_types.rb +163 -29
  50. data/lib/polars/date_time_expr.rb +13 -18
  51. data/lib/polars/date_time_name_space.rb +22 -28
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +241 -151
  54. data/lib/polars/functions.rb +29 -38
  55. data/lib/polars/group_by.rb +38 -76
  56. data/lib/polars/io.rb +37 -2
  57. data/lib/polars/lazy_frame.rb +174 -95
  58. data/lib/polars/lazy_functions.rb +87 -63
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +40 -36
  61. data/lib/polars/list_name_space.rb +15 -15
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +6 -4
  64. data/lib/polars/series.rb +95 -28
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +249 -69
  67. data/lib/polars/string_name_space.rb +155 -25
  68. data/lib/polars/utils.rb +119 -57
  69. data/lib/polars/version.rb +1 -1
  70. data/lib/polars.rb +6 -0
  71. metadata +21 -7
  72. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,11 +1,11 @@
1
- use std::fmt::{Display, Formatter};
1
+ use std::fmt::{Debug, Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, prelude::*, r_hash::ForEach, value::Opaque, Float, Integer, IntoValue,
7
+ Module, RArray, RHash, RString, Ruby, Symbol, TryConvert, Value,
7
8
  };
8
- use magnus::encoding::{EncodingCapable, Index};
9
9
  use polars::chunked_array::object::PolarsObjectSafe;
10
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
11
  use polars::datatypes::AnyValue;
@@ -14,10 +14,12 @@ use polars::frame::NullStrategy;
14
14
  use polars::io::avro::AvroCompression;
15
15
  use polars::prelude::*;
16
16
  use polars::series::ops::NullBehavior;
17
+ use polars_core::utils::arrow::util::total_ord::TotalEq;
17
18
  use smartstring::alias::String as SmartString;
18
19
 
20
+ use crate::object::OBJECT_NAME;
19
21
  use crate::rb_modules::utils;
20
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
22
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
21
23
 
22
24
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
25
  // Safety:
@@ -56,7 +58,7 @@ impl<T> From<T> for Wrap<T> {
56
58
  }
57
59
 
58
60
  pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
59
- let seq: RArray = obj.try_convert()?;
61
+ let seq = RArray::try_convert(obj)?;
60
62
  let len = seq.len();
61
63
  Ok((seq, len))
62
64
  }
@@ -83,7 +85,7 @@ impl TryConvert for Wrap<Utf8Chunked> {
83
85
 
84
86
  for res in seq.each() {
85
87
  let item = res?;
86
- match item.try_convert::<String>() {
88
+ match String::try_convert(item) {
87
89
  Ok(val) => builder.append_value(&val),
88
90
  Err(_) => builder.append_null(),
89
91
  }
@@ -99,7 +101,7 @@ impl TryConvert for Wrap<BinaryChunked> {
99
101
 
100
102
  for res in seq.each() {
101
103
  let item = res?;
102
- match item.try_convert::<RString>() {
104
+ match RString::try_convert(item) {
103
105
  Ok(val) => builder.append_value(unsafe { val.as_slice() }),
104
106
  Err(_) => builder.append_null(),
105
107
  }
@@ -110,11 +112,11 @@ impl TryConvert for Wrap<BinaryChunked> {
110
112
 
111
113
  impl TryConvert for Wrap<NullValues> {
112
114
  fn try_convert(ob: Value) -> RbResult<Self> {
113
- if let Ok(s) = ob.try_convert::<String>() {
115
+ if let Ok(s) = String::try_convert(ob) {
114
116
  Ok(Wrap(NullValues::AllColumnsSingle(s)))
115
- } else if let Ok(s) = ob.try_convert::<Vec<String>>() {
117
+ } else if let Ok(s) = Vec::<String>::try_convert(ob) {
116
118
  Ok(Wrap(NullValues::AllColumns(s)))
117
- } else if let Ok(s) = ob.try_convert::<Vec<(String, String)>>() {
119
+ } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
118
120
  Ok(Wrap(NullValues::Named(s)))
119
121
  } else {
120
122
  Err(RbPolarsErr::other(
@@ -133,86 +135,66 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
133
135
  }
134
136
 
135
137
  impl IntoValue for Wrap<AnyValue<'_>> {
136
- fn into_value_with(self, _: &RubyHandle) -> Value {
138
+ fn into_value_with(self, ruby: &Ruby) -> Value {
137
139
  match self.0 {
138
- AnyValue::UInt8(v) => Value::from(v),
139
- AnyValue::UInt16(v) => Value::from(v),
140
- AnyValue::UInt32(v) => Value::from(v),
141
- AnyValue::UInt64(v) => Value::from(v),
142
- AnyValue::Int8(v) => Value::from(v),
143
- AnyValue::Int16(v) => Value::from(v),
144
- AnyValue::Int32(v) => Value::from(v),
145
- AnyValue::Int64(v) => Value::from(v),
146
- AnyValue::Float32(v) => Value::from(v),
147
- AnyValue::Float64(v) => Value::from(v),
148
- AnyValue::Null => *QNIL,
149
- AnyValue::Boolean(v) => Value::from(v),
150
- AnyValue::Utf8(v) => Value::from(v),
151
- AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
152
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
153
- AnyValue::Date(v) => class::time()
154
- .funcall::<_, _, Value>("at", (v * 86400,))
155
- .unwrap()
156
- .funcall::<_, _, Value>("utc", ())
157
- .unwrap()
158
- .funcall::<_, _, Value>("to_date", ())
159
- .unwrap(),
160
- AnyValue::Datetime(v, tu, tz) => {
161
- let t = match tu {
162
- TimeUnit::Nanoseconds => {
163
- let sec = v / 1000000000;
164
- let subsec = v % 1000000000;
165
- class::time()
166
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
167
- .unwrap()
168
- }
169
- TimeUnit::Microseconds => {
170
- let sec = v / 1000000;
171
- let subsec = v % 1000000;
172
- class::time()
173
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
174
- .unwrap()
175
- }
176
- TimeUnit::Milliseconds => {
177
- let sec = v / 1000;
178
- let subsec = v % 1000;
179
- class::time()
180
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
181
- .unwrap()
182
- }
183
- };
184
-
185
- if tz.is_some() {
186
- todo!();
140
+ AnyValue::UInt8(v) => ruby.into_value(v),
141
+ AnyValue::UInt16(v) => ruby.into_value(v),
142
+ AnyValue::UInt32(v) => ruby.into_value(v),
143
+ AnyValue::UInt64(v) => ruby.into_value(v),
144
+ AnyValue::Int8(v) => ruby.into_value(v),
145
+ AnyValue::Int16(v) => ruby.into_value(v),
146
+ AnyValue::Int32(v) => ruby.into_value(v),
147
+ AnyValue::Int64(v) => ruby.into_value(v),
148
+ AnyValue::Float32(v) => ruby.into_value(v),
149
+ AnyValue::Float64(v) => ruby.into_value(v),
150
+ AnyValue::Null => ruby.qnil().as_value(),
151
+ AnyValue::Boolean(v) => ruby.into_value(v),
152
+ AnyValue::Utf8(v) => ruby.into_value(v),
153
+ AnyValue::Utf8Owned(v) => ruby.into_value(v.as_str()),
154
+ AnyValue::Categorical(idx, rev, arr) => {
155
+ let s = if arr.is_null() {
156
+ rev.get(idx)
187
157
  } else {
188
- t.funcall::<_, _, Value>("utc", ()).unwrap()
189
- }
158
+ unsafe { arr.deref_unchecked().value(idx as usize) }
159
+ };
160
+ s.into_value()
161
+ }
162
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
163
+ AnyValue::Datetime(v, time_unit, time_zone) => {
164
+ let time_unit = time_unit.to_ascii();
165
+ utils()
166
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
167
+ .unwrap()
190
168
  }
191
- AnyValue::Duration(v, tu) => {
192
- let tu = tu.to_ascii();
193
- utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
169
+ AnyValue::Duration(v, time_unit) => {
170
+ let time_unit = time_unit.to_ascii();
171
+ utils()
172
+ .funcall("_to_ruby_duration", (v, time_unit))
173
+ .unwrap()
194
174
  }
195
- AnyValue::Time(_v) => todo!(),
196
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
175
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
176
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
197
177
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
178
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
179
  AnyValue::Object(v) => {
200
180
  let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
201
- object.inner
181
+ object.to_object()
202
182
  }
203
183
  AnyValue::ObjectOwned(v) => {
204
184
  let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
205
- object.inner
185
+ object.to_object()
206
186
  }
207
187
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
188
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
- AnyValue::Decimal(_v, _scale) => todo!(),
189
+ AnyValue::Decimal(v, scale) => utils()
190
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
191
+ .unwrap(),
210
192
  }
211
193
  }
212
194
  }
213
195
 
214
196
  impl IntoValue for Wrap<DataType> {
215
- fn into_value_with(self, _: &RubyHandle) -> Value {
197
+ fn into_value_with(self, _: &Ruby) -> Value {
216
198
  let pl = crate::rb_modules::polars();
217
199
 
218
200
  match self.0 {
@@ -226,10 +208,22 @@ impl IntoValue for Wrap<DataType> {
226
208
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
227
209
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
228
210
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
229
- DataType::Decimal(_precision, _scale) => todo!(),
211
+ DataType::Decimal(precision, scale) => {
212
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
213
+ decimal_class
214
+ .funcall::<_, _, Value>("new", (precision, scale))
215
+ .unwrap()
216
+ }
230
217
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
231
218
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
232
219
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
220
+ DataType::Array(inner, size) => {
221
+ let inner = Wrap(*inner);
222
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
223
+ list_class
224
+ .funcall::<_, _, Value>("new", (size, inner))
225
+ .unwrap()
226
+ }
233
227
  DataType::List(inner) => {
234
228
  let inner = Wrap(*inner);
235
229
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -273,7 +267,7 @@ impl IntoValue for Wrap<DataType> {
273
267
  }
274
268
 
275
269
  impl IntoValue for Wrap<TimeUnit> {
276
- fn into_value_with(self, _: &RubyHandle) -> Value {
270
+ fn into_value_with(self, _: &Ruby) -> Value {
277
271
  let tu = match self.0 {
278
272
  TimeUnit::Nanoseconds => "ns",
279
273
  TimeUnit::Microseconds => "us",
@@ -284,14 +278,14 @@ impl IntoValue for Wrap<TimeUnit> {
284
278
  }
285
279
 
286
280
  impl IntoValue for Wrap<&Utf8Chunked> {
287
- fn into_value_with(self, _: &RubyHandle) -> Value {
281
+ fn into_value_with(self, _: &Ruby) -> Value {
288
282
  let iter = self.0.into_iter();
289
283
  RArray::from_iter(iter).into_value()
290
284
  }
291
285
  }
292
286
 
293
287
  impl IntoValue for Wrap<&BinaryChunked> {
294
- fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ fn into_value_with(self, _: &Ruby) -> Value {
295
289
  let iter = self
296
290
  .0
297
291
  .into_iter()
@@ -301,7 +295,7 @@ impl IntoValue for Wrap<&BinaryChunked> {
301
295
  }
302
296
 
303
297
  impl IntoValue for Wrap<&StructChunked> {
304
- fn into_value_with(self, _: &RubyHandle) -> Value {
298
+ fn into_value_with(self, _: &Ruby) -> Value {
305
299
  let s = self.0.clone().into_series();
306
300
  // todo! iterate its chunks and flatten.
307
301
  // make series::iter() accept a chunk index.
@@ -319,13 +313,64 @@ impl IntoValue for Wrap<&StructChunked> {
319
313
  }
320
314
 
321
315
  impl IntoValue for Wrap<&DurationChunked> {
322
- fn into_value_with(self, _: &RubyHandle) -> Value {
316
+ fn into_value_with(self, _: &Ruby) -> Value {
317
+ let utils = utils();
318
+ let time_unit = Wrap(self.0.time_unit()).into_value();
319
+ let iter = self.0.into_iter().map(|opt_v| {
320
+ opt_v.map(|v| {
321
+ utils
322
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
323
+ .unwrap()
324
+ })
325
+ });
326
+ RArray::from_iter(iter).into_value()
327
+ }
328
+ }
329
+
330
+ impl IntoValue for Wrap<&DatetimeChunked> {
331
+ fn into_value_with(self, _: &Ruby) -> Value {
332
+ let utils = utils();
333
+ let time_unit = Wrap(self.0.time_unit()).into_value();
334
+ let time_zone = self.0.time_zone().clone().into_value();
335
+ let iter = self.0.into_iter().map(|opt_v| {
336
+ opt_v.map(|v| {
337
+ utils
338
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
339
+ .unwrap()
340
+ })
341
+ });
342
+ RArray::from_iter(iter).into_value()
343
+ }
344
+ }
345
+
346
+ impl IntoValue for Wrap<&TimeChunked> {
347
+ fn into_value_with(self, _: &Ruby) -> Value {
348
+ let utils = utils();
349
+ let iter = self.0.into_iter().map(|opt_v| {
350
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
351
+ });
352
+ RArray::from_iter(iter).into_value()
353
+ }
354
+ }
355
+
356
+ impl IntoValue for Wrap<&DateChunked> {
357
+ fn into_value_with(self, _: &Ruby) -> Value {
358
+ let utils = utils();
359
+ let iter = self.0.into_iter().map(|opt_v| {
360
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
361
+ });
362
+ RArray::from_iter(iter).into_value()
363
+ }
364
+ }
365
+
366
+ impl IntoValue for Wrap<&DecimalChunked> {
367
+ fn into_value_with(self, _: &Ruby) -> Value {
323
368
  let utils = utils();
324
- let tu = Wrap(self.0.time_unit()).into_value();
369
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
325
370
  let iter = self.0.into_iter().map(|opt_v| {
326
371
  opt_v.map(|v| {
327
372
  utils
328
- .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
373
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
329
374
  .unwrap()
330
375
  })
331
376
  });
@@ -333,6 +378,13 @@ impl IntoValue for Wrap<&DurationChunked> {
333
378
  }
334
379
  }
335
380
 
381
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
382
+ match digits.parse::<i128>() {
383
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
384
+ Err(_) => None,
385
+ }
386
+ }
387
+
336
388
  impl TryConvert for Wrap<Field> {
337
389
  fn try_convert(ob: Value) -> RbResult<Self> {
338
390
  let name: String = ob.funcall("name", ())?;
@@ -362,10 +414,11 @@ impl TryConvert for Wrap<DataType> {
362
414
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
363
415
  "Polars::Time" => DataType::Time,
364
416
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
417
+ "Polars::Decimal" => DataType::Decimal(None, None),
365
418
  "Polars::Float32" => DataType::Float32,
366
419
  "Polars::Float64" => DataType::Float64,
367
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
368
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
420
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
421
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
369
422
  "Polars::Null" => DataType::Null,
370
423
  "Polars::Unknown" => DataType::Unknown,
371
424
  dt => {
@@ -375,21 +428,47 @@ impl TryConvert for Wrap<DataType> {
375
428
  }
376
429
  }
377
430
  // TODO improve
378
- } else if ob.try_convert::<String>().is_err() {
431
+ } else if String::try_convert(ob).is_err() {
379
432
  let name = unsafe { ob.class().name() }.into_owned();
380
433
  match name.as_str() {
434
+ "Polars::Duration" => {
435
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
436
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
437
+ DataType::Duration(time_unit)
438
+ }
439
+ "Polars::Datetime" => {
440
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
441
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
442
+ let time_zone = ob.funcall("time_zone", ())?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall("precision", ())?;
447
+ let scale = ob.funcall("scale", ())?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = Wrap::<DataType>::try_convert(inner)?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
381
455
  "Polars::Struct" => {
382
456
  let arr: RArray = ob.funcall("fields", ())?;
383
457
  let mut fields = Vec::with_capacity(arr.len());
384
458
  for v in arr.each() {
385
- fields.push(v?.try_convert::<Wrap<Field>>()?.0);
459
+ fields.push(Wrap::<Field>::try_convert(v?)?.0);
386
460
  }
387
461
  DataType::Struct(fields)
388
462
  }
389
- _ => todo!(),
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
390
469
  }
391
470
  } else {
392
- match ob.try_convert::<String>()?.as_str() {
471
+ match String::try_convert(ob)?.as_str() {
393
472
  "u8" => DataType::UInt8,
394
473
  "u16" => DataType::UInt16,
395
474
  "u32" => DataType::UInt32,
@@ -408,7 +487,7 @@ impl TryConvert for Wrap<DataType> {
408
487
  "time" => DataType::Time,
409
488
  "dur" => DataType::Duration(TimeUnit::Microseconds),
410
489
  "f64" => DataType::Float64,
411
- // "obj" => DataType::Object(OBJECT_NAME),
490
+ "obj" => DataType::Object(OBJECT_NAME),
412
491
  "list" => DataType::List(Box::new(DataType::Boolean)),
413
492
  "null" => DataType::Null,
414
493
  "unk" => DataType::Unknown,
@@ -427,10 +506,10 @@ impl TryConvert for Wrap<DataType> {
427
506
  impl<'s> TryConvert for Wrap<AnyValue<'s>> {
428
507
  fn try_convert(ob: Value) -> RbResult<Self> {
429
508
  if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
430
- Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
509
+ Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
431
510
  } else if let Some(v) = Integer::from_value(ob) {
432
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
433
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
434
513
  Ok(AnyValue::Float64(v.to_f64()).into())
435
514
  } else if let Some(v) = RString::from_value(ob) {
436
515
  if v.enc_get() == Index::utf8() {
@@ -443,7 +522,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
443
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
444
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
445
524
  let v = sec * 1_000_000_000 + nsec;
446
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
447
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
448
528
  } else if ob.is_nil() {
449
529
  Ok(AnyValue::Null.into())
@@ -452,8 +532,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
452
532
  let mut keys = Vec::with_capacity(len);
453
533
  let mut vals = Vec::with_capacity(len);
454
534
  dict.foreach(|k: Value, v: Value| {
455
- let key = k.try_convert::<String>()?;
456
- let val = v.try_convert::<Wrap<AnyValue>>()?.0;
535
+ let key = String::try_convert(k)?;
536
+ let val = Wrap::<AnyValue>::try_convert(v)?.0;
457
537
  let dtype = DataType::from(&val);
458
538
  keys.push(Field::new(&key, dtype));
459
539
  vals.push(val);
@@ -464,13 +544,23 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
464
544
  if v.is_empty() {
465
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
466
546
  } else {
467
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
468
- // use first `n` values to infer datatype
469
- // this value is not too large as this will be done with every
470
- // anyvalue that has to be converted, which can be many
471
- let n = 25;
472
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
473
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
562
+ }
563
+
474
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
475
565
  .map_err(RbPolarsErr::from)?;
476
566
  Ok(Wrap(AnyValue::List(s)))
@@ -478,7 +568,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
478
568
  } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
569
  let sec: i64 = ob.funcall("to_i", ())?;
480
570
  let nsec: i64 = ob.funcall("nsec", ())?;
481
- Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
482
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
483
577
  // convert to DateTime for UTC
484
578
  let v = ob
@@ -486,6 +580,16 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
486
580
  .funcall::<_, _, Value>("to_time", ())?
487
581
  .funcall::<_, _, i64>("to_i", ())?;
488
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
585
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
586
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
587
+ })?;
588
+ if sign < 0 {
589
+ // TODO better error
590
+ v = v.checked_neg().unwrap();
591
+ }
592
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
489
593
  } else {
490
594
  Err(RbPolarsErr::other(format!(
491
595
  "object type not supported {:?}",
@@ -498,8 +602,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
498
602
  impl<'s> TryConvert for Wrap<Row<'s>> {
499
603
  fn try_convert(ob: Value) -> RbResult<Self> {
500
604
  let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
501
- for item in ob.try_convert::<RArray>()?.each() {
502
- vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
605
+ for item in RArray::try_convert(ob)?.each() {
606
+ vals.push(Wrap::<AnyValue<'s>>::try_convert(item?)?);
503
607
  }
504
608
  let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
505
609
  Ok(Wrap(Row(vals)))
@@ -508,7 +612,7 @@ impl<'s> TryConvert for Wrap<Row<'s>> {
508
612
 
509
613
  impl TryConvert for Wrap<Schema> {
510
614
  fn try_convert(ob: Value) -> RbResult<Self> {
511
- let dict = ob.try_convert::<RHash>()?;
615
+ let dict = RHash::try_convert(ob)?;
512
616
 
513
617
  let mut schema = Vec::new();
514
618
  dict.foreach(|key: String, val: Wrap<DataType>| {
@@ -521,15 +625,23 @@ impl TryConvert for Wrap<Schema> {
521
625
  }
522
626
  }
523
627
 
524
- #[derive(Clone, Debug)]
628
+ #[derive(Clone)]
525
629
  pub struct ObjectValue {
526
- pub inner: Value,
630
+ pub inner: Opaque<Value>,
631
+ }
632
+
633
+ impl Debug for ObjectValue {
634
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
635
+ f.debug_struct("ObjectValue")
636
+ .field("inner", &self.to_object())
637
+ .finish()
638
+ }
527
639
  }
528
640
 
529
641
  impl Hash for ObjectValue {
530
642
  fn hash<H: Hasher>(&self, state: &mut H) {
531
643
  let h = self
532
- .inner
644
+ .to_object()
533
645
  .funcall::<_, _, isize>("hash", ())
534
646
  .expect("should be hashable");
535
647
  state.write_isize(h)
@@ -540,13 +652,19 @@ impl Eq for ObjectValue {}
540
652
 
541
653
  impl PartialEq for ObjectValue {
542
654
  fn eq(&self, other: &Self) -> bool {
543
- self.inner.eql(&other.inner).unwrap_or(false)
655
+ self.to_object().eql(other.to_object()).unwrap_or(false)
656
+ }
657
+ }
658
+
659
+ impl TotalEq for ObjectValue {
660
+ fn tot_eq(&self, other: &Self) -> bool {
661
+ self == other
544
662
  }
545
663
  }
546
664
 
547
665
  impl Display for ObjectValue {
548
666
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
549
- write!(f, "{}", self.inner)
667
+ write!(f, "{}", self.to_object())
550
668
  }
551
669
  }
552
670
 
@@ -558,13 +676,13 @@ impl PolarsObject for ObjectValue {
558
676
 
559
677
  impl From<Value> for ObjectValue {
560
678
  fn from(v: Value) -> Self {
561
- Self { inner: v }
679
+ Self { inner: v.into() }
562
680
  }
563
681
  }
564
682
 
565
683
  impl TryConvert for ObjectValue {
566
684
  fn try_convert(ob: Value) -> RbResult<Self> {
567
- Ok(ObjectValue { inner: ob })
685
+ Ok(ObjectValue { inner: ob.into() })
568
686
  }
569
687
  }
570
688
 
@@ -577,19 +695,21 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
577
695
  // TODO remove
578
696
  impl ObjectValue {
579
697
  pub fn to_object(&self) -> Value {
580
- self.inner
698
+ Ruby::get().unwrap().get_inner(self.inner)
581
699
  }
582
700
  }
583
701
 
584
702
  impl IntoValue for ObjectValue {
585
- fn into_value_with(self, _: &RubyHandle) -> Value {
586
- self.inner
703
+ fn into_value_with(self, _: &Ruby) -> Value {
704
+ self.to_object()
587
705
  }
588
706
  }
589
707
 
590
708
  impl Default for ObjectValue {
591
709
  fn default() -> Self {
592
- ObjectValue { inner: *QNIL }
710
+ ObjectValue {
711
+ inner: Ruby::get().unwrap().qnil().as_value().into(),
712
+ }
593
713
  }
594
714
  }
595
715
 
@@ -602,13 +722,13 @@ pub(crate) fn dicts_to_rows(
602
722
  let mut key_names = PlIndexSet::new();
603
723
  for d in dicts.each().take(infer_schema_len) {
604
724
  let d = d?;
605
- let d = d.try_convert::<RHash>()?;
725
+ let d = RHash::try_convert(d)?;
606
726
 
607
727
  d.foreach(|name: Value, _value: Value| {
608
728
  if let Some(v) = Symbol::from_value(name) {
609
729
  key_names.insert(v.name()?.into());
610
730
  } else {
611
- key_names.insert(name.try_convert::<String>()?);
731
+ key_names.insert(String::try_convert(name)?);
612
732
  };
613
733
  Ok(ForEach::Continue)
614
734
  })?;
@@ -618,7 +738,7 @@ pub(crate) fn dicts_to_rows(
618
738
 
619
739
  for d in dicts.each() {
620
740
  let d = d?;
621
- let d = d.try_convert::<RHash>()?;
741
+ let d = RHash::try_convert(d)?;
622
742
 
623
743
  let mut row = Vec::with_capacity(key_names.len());
624
744
 
@@ -626,7 +746,7 @@ pub(crate) fn dicts_to_rows(
626
746
  // TODO improve performance
627
747
  let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
628
748
  None => AnyValue::Null,
629
- Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
749
+ Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
630
750
  };
631
751
  row.push(val)
632
752
  }
@@ -637,7 +757,7 @@ pub(crate) fn dicts_to_rows(
637
757
 
638
758
  impl TryConvert for Wrap<AsofStrategy> {
639
759
  fn try_convert(ob: Value) -> RbResult<Self> {
640
- let parsed = match ob.try_convert::<String>()?.as_str() {
760
+ let parsed = match String::try_convert(ob)?.as_str() {
641
761
  "backward" => AsofStrategy::Backward,
642
762
  "forward" => AsofStrategy::Forward,
643
763
  v => {
@@ -653,7 +773,7 @@ impl TryConvert for Wrap<AsofStrategy> {
653
773
 
654
774
  impl TryConvert for Wrap<InterpolationMethod> {
655
775
  fn try_convert(ob: Value) -> RbResult<Self> {
656
- let parsed = match ob.try_convert::<String>()?.as_str() {
776
+ let parsed = match String::try_convert(ob)?.as_str() {
657
777
  "linear" => InterpolationMethod::Linear,
658
778
  "nearest" => InterpolationMethod::Nearest,
659
779
  v => {
@@ -668,7 +788,7 @@ impl TryConvert for Wrap<InterpolationMethod> {
668
788
 
669
789
  impl TryConvert for Wrap<Option<AvroCompression>> {
670
790
  fn try_convert(ob: Value) -> RbResult<Self> {
671
- let parsed = match ob.try_convert::<String>()?.as_str() {
791
+ let parsed = match String::try_convert(ob)?.as_str() {
672
792
  "uncompressed" => None,
673
793
  "snappy" => Some(AvroCompression::Snappy),
674
794
  "deflate" => Some(AvroCompression::Deflate),
@@ -685,7 +805,7 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
685
805
 
686
806
  impl TryConvert for Wrap<CategoricalOrdering> {
687
807
  fn try_convert(ob: Value) -> RbResult<Self> {
688
- let parsed = match ob.try_convert::<String>()?.as_str() {
808
+ let parsed = match String::try_convert(ob)?.as_str() {
689
809
  "physical" => CategoricalOrdering::Physical,
690
810
  "lexical" => CategoricalOrdering::Lexical,
691
811
  v => {
@@ -701,7 +821,7 @@ impl TryConvert for Wrap<CategoricalOrdering> {
701
821
 
702
822
  impl TryConvert for Wrap<StartBy> {
703
823
  fn try_convert(ob: Value) -> RbResult<Self> {
704
- let parsed = match ob.try_convert::<String>()?.as_str() {
824
+ let parsed = match String::try_convert(ob)?.as_str() {
705
825
  "window" => StartBy::WindowBound,
706
826
  "datapoint" => StartBy::DataPoint,
707
827
  "monday" => StartBy::Monday,
@@ -717,7 +837,7 @@ impl TryConvert for Wrap<StartBy> {
717
837
 
718
838
  impl TryConvert for Wrap<ClosedWindow> {
719
839
  fn try_convert(ob: Value) -> RbResult<Self> {
720
- let parsed = match ob.try_convert::<String>()?.as_str() {
840
+ let parsed = match String::try_convert(ob)?.as_str() {
721
841
  "left" => ClosedWindow::Left,
722
842
  "right" => ClosedWindow::Right,
723
843
  "both" => ClosedWindow::Both,
@@ -735,7 +855,7 @@ impl TryConvert for Wrap<ClosedWindow> {
735
855
 
736
856
  impl TryConvert for Wrap<CsvEncoding> {
737
857
  fn try_convert(ob: Value) -> RbResult<Self> {
738
- let parsed = match ob.try_convert::<String>()?.as_str() {
858
+ let parsed = match String::try_convert(ob)?.as_str() {
739
859
  "utf8" => CsvEncoding::Utf8,
740
860
  "utf8-lossy" => CsvEncoding::LossyUtf8,
741
861
  v => {
@@ -751,7 +871,7 @@ impl TryConvert for Wrap<CsvEncoding> {
751
871
 
752
872
  impl TryConvert for Wrap<Option<IpcCompression>> {
753
873
  fn try_convert(ob: Value) -> RbResult<Self> {
754
- let parsed = match ob.try_convert::<String>()?.as_str() {
874
+ let parsed = match String::try_convert(ob)?.as_str() {
755
875
  "uncompressed" => None,
756
876
  "lz4" => Some(IpcCompression::LZ4),
757
877
  "zstd" => Some(IpcCompression::ZSTD),
@@ -768,7 +888,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
768
888
 
769
889
  impl TryConvert for Wrap<JoinType> {
770
890
  fn try_convert(ob: Value) -> RbResult<Self> {
771
- let parsed = match ob.try_convert::<String>()?.as_str() {
891
+ let parsed = match String::try_convert(ob)?.as_str() {
772
892
  "inner" => JoinType::Inner,
773
893
  "left" => JoinType::Left,
774
894
  "outer" => JoinType::Outer,
@@ -787,9 +907,25 @@ impl TryConvert for Wrap<JoinType> {
787
907
  }
788
908
  }
789
909
 
910
+ impl TryConvert for Wrap<Label> {
911
+ fn try_convert(ob: Value) -> RbResult<Self> {
912
+ let parsed = match String::try_convert(ob)?.as_str() {
913
+ "left" => Label::Left,
914
+ "right" => Label::Right,
915
+ "datapoint" => Label::DataPoint,
916
+ v => {
917
+ return Err(RbValueError::new_err(format!(
918
+ "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
919
+ )))
920
+ }
921
+ };
922
+ Ok(Wrap(parsed))
923
+ }
924
+ }
925
+
790
926
  impl TryConvert for Wrap<ListToStructWidthStrategy> {
791
927
  fn try_convert(ob: Value) -> RbResult<Self> {
792
- let parsed = match ob.try_convert::<String>()?.as_str() {
928
+ let parsed = match String::try_convert(ob)?.as_str() {
793
929
  "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
794
930
  "max_width" => ListToStructWidthStrategy::MaxWidth,
795
931
  v => {
@@ -805,7 +941,7 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
805
941
 
806
942
  impl TryConvert for Wrap<NullBehavior> {
807
943
  fn try_convert(ob: Value) -> RbResult<Self> {
808
- let parsed = match ob.try_convert::<String>()?.as_str() {
944
+ let parsed = match String::try_convert(ob)?.as_str() {
809
945
  "drop" => NullBehavior::Drop,
810
946
  "ignore" => NullBehavior::Ignore,
811
947
  v => {
@@ -821,7 +957,7 @@ impl TryConvert for Wrap<NullBehavior> {
821
957
 
822
958
  impl TryConvert for Wrap<NullStrategy> {
823
959
  fn try_convert(ob: Value) -> RbResult<Self> {
824
- let parsed = match ob.try_convert::<String>()?.as_str() {
960
+ let parsed = match String::try_convert(ob)?.as_str() {
825
961
  "ignore" => NullStrategy::Ignore,
826
962
  "propagate" => NullStrategy::Propagate,
827
963
  v => {
@@ -837,7 +973,7 @@ impl TryConvert for Wrap<NullStrategy> {
837
973
 
838
974
  impl TryConvert for Wrap<ParallelStrategy> {
839
975
  fn try_convert(ob: Value) -> RbResult<Self> {
840
- let parsed = match ob.try_convert::<String>()?.as_str() {
976
+ let parsed = match String::try_convert(ob)?.as_str() {
841
977
  "auto" => ParallelStrategy::Auto,
842
978
  "columns" => ParallelStrategy::Columns,
843
979
  "row_groups" => ParallelStrategy::RowGroups,
@@ -855,7 +991,7 @@ impl TryConvert for Wrap<ParallelStrategy> {
855
991
 
856
992
  impl TryConvert for Wrap<QuantileInterpolOptions> {
857
993
  fn try_convert(ob: Value) -> RbResult<Self> {
858
- let parsed = match ob.try_convert::<String>()?.as_str() {
994
+ let parsed = match String::try_convert(ob)?.as_str() {
859
995
  "lower" => QuantileInterpolOptions::Lower,
860
996
  "higher" => QuantileInterpolOptions::Higher,
861
997
  "nearest" => QuantileInterpolOptions::Nearest,
@@ -874,7 +1010,7 @@ impl TryConvert for Wrap<QuantileInterpolOptions> {
874
1010
 
875
1011
  impl TryConvert for Wrap<RankMethod> {
876
1012
  fn try_convert(ob: Value) -> RbResult<Self> {
877
- let parsed = match ob.try_convert::<String>()?.as_str() {
1013
+ let parsed = match String::try_convert(ob)?.as_str() {
878
1014
  "min" => RankMethod::Min,
879
1015
  "max" => RankMethod::Max,
880
1016
  "average" => RankMethod::Average,
@@ -894,7 +1030,7 @@ impl TryConvert for Wrap<RankMethod> {
894
1030
 
895
1031
  impl TryConvert for Wrap<TimeUnit> {
896
1032
  fn try_convert(ob: Value) -> RbResult<Self> {
897
- let parsed = match ob.try_convert::<String>()?.as_str() {
1033
+ let parsed = match String::try_convert(ob)?.as_str() {
898
1034
  "ns" => TimeUnit::Nanoseconds,
899
1035
  "us" => TimeUnit::Microseconds,
900
1036
  "ms" => TimeUnit::Milliseconds,
@@ -911,7 +1047,7 @@ impl TryConvert for Wrap<TimeUnit> {
911
1047
 
912
1048
  impl TryConvert for Wrap<UniqueKeepStrategy> {
913
1049
  fn try_convert(ob: Value) -> RbResult<Self> {
914
- let parsed = match ob.try_convert::<String>()?.as_str() {
1050
+ let parsed = match String::try_convert(ob)?.as_str() {
915
1051
  "first" => UniqueKeepStrategy::First,
916
1052
  "last" => UniqueKeepStrategy::Last,
917
1053
  v => {
@@ -927,7 +1063,7 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
927
1063
 
928
1064
  impl TryConvert for Wrap<SearchSortedSide> {
929
1065
  fn try_convert(ob: Value) -> RbResult<Self> {
930
- let parsed = match ob.try_convert::<String>()?.as_str() {
1066
+ let parsed = match String::try_convert(ob)?.as_str() {
931
1067
  "any" => SearchSortedSide::Any,
932
1068
  "left" => SearchSortedSide::Left,
933
1069
  "right" => SearchSortedSide::Right,