polars-df 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +595 -709
  4. data/Cargo.toml +1 -0
  5. data/README.md +11 -9
  6. data/ext/polars/Cargo.toml +18 -10
  7. data/ext/polars/src/batched_csv.rs +26 -26
  8. data/ext/polars/src/conversion.rs +272 -136
  9. data/ext/polars/src/dataframe.rs +135 -94
  10. data/ext/polars/src/error.rs +8 -5
  11. data/ext/polars/src/expr/array.rs +15 -0
  12. data/ext/polars/src/expr/binary.rs +18 -6
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +78 -264
  15. data/ext/polars/src/expr/list.rs +41 -28
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +5 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +196 -0
  19. data/ext/polars/src/expr/string.rs +94 -66
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +119 -54
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +46 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +61 -44
  33. data/ext/polars/src/lib.rs +173 -84
  34. data/ext/polars/src/{apply → map}/dataframe.rs +28 -33
  35. data/ext/polars/src/{apply → map}/mod.rs +10 -6
  36. data/ext/polars/src/{apply → map}/series.rs +12 -16
  37. data/ext/polars/src/object.rs +2 -2
  38. data/ext/polars/src/rb_modules.rs +25 -6
  39. data/ext/polars/src/series/construction.rs +32 -6
  40. data/ext/polars/src/series/export.rs +2 -2
  41. data/ext/polars/src/series/set_at_idx.rs +33 -17
  42. data/ext/polars/src/series.rs +62 -42
  43. data/ext/polars/src/sql.rs +46 -0
  44. data/lib/polars/array_expr.rb +84 -0
  45. data/lib/polars/array_name_space.rb +77 -0
  46. data/lib/polars/batched_csv_reader.rb +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +206 -131
  49. data/lib/polars/data_types.rb +163 -29
  50. data/lib/polars/date_time_expr.rb +13 -18
  51. data/lib/polars/date_time_name_space.rb +22 -28
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +241 -151
  54. data/lib/polars/functions.rb +29 -38
  55. data/lib/polars/group_by.rb +38 -76
  56. data/lib/polars/io.rb +37 -2
  57. data/lib/polars/lazy_frame.rb +174 -95
  58. data/lib/polars/lazy_functions.rb +87 -63
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +40 -36
  61. data/lib/polars/list_name_space.rb +15 -15
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +6 -4
  64. data/lib/polars/series.rb +95 -28
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +249 -69
  67. data/lib/polars/string_name_space.rb +155 -25
  68. data/lib/polars/utils.rb +119 -57
  69. data/lib/polars/version.rb +1 -1
  70. data/lib/polars.rb +6 -0
  71. metadata +21 -7
  72. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,11 +1,11 @@
1
- use std::fmt::{Display, Formatter};
1
+ use std::fmt::{Debug, Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, prelude::*, r_hash::ForEach, value::Opaque, Float, Integer, IntoValue,
7
+ Module, RArray, RHash, RString, Ruby, Symbol, TryConvert, Value,
7
8
  };
8
- use magnus::encoding::{EncodingCapable, Index};
9
9
  use polars::chunked_array::object::PolarsObjectSafe;
10
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
11
11
  use polars::datatypes::AnyValue;
@@ -14,10 +14,12 @@ use polars::frame::NullStrategy;
14
14
  use polars::io::avro::AvroCompression;
15
15
  use polars::prelude::*;
16
16
  use polars::series::ops::NullBehavior;
17
+ use polars_core::utils::arrow::util::total_ord::TotalEq;
17
18
  use smartstring::alias::String as SmartString;
18
19
 
20
+ use crate::object::OBJECT_NAME;
19
21
  use crate::rb_modules::utils;
20
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
22
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
21
23
 
22
24
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
25
  // Safety:
@@ -56,7 +58,7 @@ impl<T> From<T> for Wrap<T> {
56
58
  }
57
59
 
58
60
  pub(crate) fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> {
59
- let seq: RArray = obj.try_convert()?;
61
+ let seq = RArray::try_convert(obj)?;
60
62
  let len = seq.len();
61
63
  Ok((seq, len))
62
64
  }
@@ -83,7 +85,7 @@ impl TryConvert for Wrap<Utf8Chunked> {
83
85
 
84
86
  for res in seq.each() {
85
87
  let item = res?;
86
- match item.try_convert::<String>() {
88
+ match String::try_convert(item) {
87
89
  Ok(val) => builder.append_value(&val),
88
90
  Err(_) => builder.append_null(),
89
91
  }
@@ -99,7 +101,7 @@ impl TryConvert for Wrap<BinaryChunked> {
99
101
 
100
102
  for res in seq.each() {
101
103
  let item = res?;
102
- match item.try_convert::<RString>() {
104
+ match RString::try_convert(item) {
103
105
  Ok(val) => builder.append_value(unsafe { val.as_slice() }),
104
106
  Err(_) => builder.append_null(),
105
107
  }
@@ -110,11 +112,11 @@ impl TryConvert for Wrap<BinaryChunked> {
110
112
 
111
113
  impl TryConvert for Wrap<NullValues> {
112
114
  fn try_convert(ob: Value) -> RbResult<Self> {
113
- if let Ok(s) = ob.try_convert::<String>() {
115
+ if let Ok(s) = String::try_convert(ob) {
114
116
  Ok(Wrap(NullValues::AllColumnsSingle(s)))
115
- } else if let Ok(s) = ob.try_convert::<Vec<String>>() {
117
+ } else if let Ok(s) = Vec::<String>::try_convert(ob) {
116
118
  Ok(Wrap(NullValues::AllColumns(s)))
117
- } else if let Ok(s) = ob.try_convert::<Vec<(String, String)>>() {
119
+ } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
118
120
  Ok(Wrap(NullValues::Named(s)))
119
121
  } else {
120
122
  Err(RbPolarsErr::other(
@@ -133,86 +135,66 @@ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) ->
133
135
  }
134
136
 
135
137
  impl IntoValue for Wrap<AnyValue<'_>> {
136
- fn into_value_with(self, _: &RubyHandle) -> Value {
138
+ fn into_value_with(self, ruby: &Ruby) -> Value {
137
139
  match self.0 {
138
- AnyValue::UInt8(v) => Value::from(v),
139
- AnyValue::UInt16(v) => Value::from(v),
140
- AnyValue::UInt32(v) => Value::from(v),
141
- AnyValue::UInt64(v) => Value::from(v),
142
- AnyValue::Int8(v) => Value::from(v),
143
- AnyValue::Int16(v) => Value::from(v),
144
- AnyValue::Int32(v) => Value::from(v),
145
- AnyValue::Int64(v) => Value::from(v),
146
- AnyValue::Float32(v) => Value::from(v),
147
- AnyValue::Float64(v) => Value::from(v),
148
- AnyValue::Null => *QNIL,
149
- AnyValue::Boolean(v) => Value::from(v),
150
- AnyValue::Utf8(v) => Value::from(v),
151
- AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
152
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
153
- AnyValue::Date(v) => class::time()
154
- .funcall::<_, _, Value>("at", (v * 86400,))
155
- .unwrap()
156
- .funcall::<_, _, Value>("utc", ())
157
- .unwrap()
158
- .funcall::<_, _, Value>("to_date", ())
159
- .unwrap(),
160
- AnyValue::Datetime(v, tu, tz) => {
161
- let t = match tu {
162
- TimeUnit::Nanoseconds => {
163
- let sec = v / 1000000000;
164
- let subsec = v % 1000000000;
165
- class::time()
166
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
167
- .unwrap()
168
- }
169
- TimeUnit::Microseconds => {
170
- let sec = v / 1000000;
171
- let subsec = v % 1000000;
172
- class::time()
173
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
174
- .unwrap()
175
- }
176
- TimeUnit::Milliseconds => {
177
- let sec = v / 1000;
178
- let subsec = v % 1000;
179
- class::time()
180
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
181
- .unwrap()
182
- }
183
- };
184
-
185
- if tz.is_some() {
186
- todo!();
140
+ AnyValue::UInt8(v) => ruby.into_value(v),
141
+ AnyValue::UInt16(v) => ruby.into_value(v),
142
+ AnyValue::UInt32(v) => ruby.into_value(v),
143
+ AnyValue::UInt64(v) => ruby.into_value(v),
144
+ AnyValue::Int8(v) => ruby.into_value(v),
145
+ AnyValue::Int16(v) => ruby.into_value(v),
146
+ AnyValue::Int32(v) => ruby.into_value(v),
147
+ AnyValue::Int64(v) => ruby.into_value(v),
148
+ AnyValue::Float32(v) => ruby.into_value(v),
149
+ AnyValue::Float64(v) => ruby.into_value(v),
150
+ AnyValue::Null => ruby.qnil().as_value(),
151
+ AnyValue::Boolean(v) => ruby.into_value(v),
152
+ AnyValue::Utf8(v) => ruby.into_value(v),
153
+ AnyValue::Utf8Owned(v) => ruby.into_value(v.as_str()),
154
+ AnyValue::Categorical(idx, rev, arr) => {
155
+ let s = if arr.is_null() {
156
+ rev.get(idx)
187
157
  } else {
188
- t.funcall::<_, _, Value>("utc", ()).unwrap()
189
- }
158
+ unsafe { arr.deref_unchecked().value(idx as usize) }
159
+ };
160
+ s.into_value()
161
+ }
162
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
163
+ AnyValue::Datetime(v, time_unit, time_zone) => {
164
+ let time_unit = time_unit.to_ascii();
165
+ utils()
166
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
167
+ .unwrap()
190
168
  }
191
- AnyValue::Duration(v, tu) => {
192
- let tu = tu.to_ascii();
193
- utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
169
+ AnyValue::Duration(v, time_unit) => {
170
+ let time_unit = time_unit.to_ascii();
171
+ utils()
172
+ .funcall("_to_ruby_duration", (v, time_unit))
173
+ .unwrap()
194
174
  }
195
- AnyValue::Time(_v) => todo!(),
196
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
175
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
176
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
197
177
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
178
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
179
  AnyValue::Object(v) => {
200
180
  let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
201
- object.inner
181
+ object.to_object()
202
182
  }
203
183
  AnyValue::ObjectOwned(v) => {
204
184
  let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
205
- object.inner
185
+ object.to_object()
206
186
  }
207
187
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
188
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
- AnyValue::Decimal(_v, _scale) => todo!(),
189
+ AnyValue::Decimal(v, scale) => utils()
190
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
191
+ .unwrap(),
210
192
  }
211
193
  }
212
194
  }
213
195
 
214
196
  impl IntoValue for Wrap<DataType> {
215
- fn into_value_with(self, _: &RubyHandle) -> Value {
197
+ fn into_value_with(self, _: &Ruby) -> Value {
216
198
  let pl = crate::rb_modules::polars();
217
199
 
218
200
  match self.0 {
@@ -226,10 +208,22 @@ impl IntoValue for Wrap<DataType> {
226
208
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
227
209
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
228
210
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
229
- DataType::Decimal(_precision, _scale) => todo!(),
211
+ DataType::Decimal(precision, scale) => {
212
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
213
+ decimal_class
214
+ .funcall::<_, _, Value>("new", (precision, scale))
215
+ .unwrap()
216
+ }
230
217
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
231
218
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
232
219
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
220
+ DataType::Array(inner, size) => {
221
+ let inner = Wrap(*inner);
222
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
223
+ list_class
224
+ .funcall::<_, _, Value>("new", (size, inner))
225
+ .unwrap()
226
+ }
233
227
  DataType::List(inner) => {
234
228
  let inner = Wrap(*inner);
235
229
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -273,7 +267,7 @@ impl IntoValue for Wrap<DataType> {
273
267
  }
274
268
 
275
269
  impl IntoValue for Wrap<TimeUnit> {
276
- fn into_value_with(self, _: &RubyHandle) -> Value {
270
+ fn into_value_with(self, _: &Ruby) -> Value {
277
271
  let tu = match self.0 {
278
272
  TimeUnit::Nanoseconds => "ns",
279
273
  TimeUnit::Microseconds => "us",
@@ -284,14 +278,14 @@ impl IntoValue for Wrap<TimeUnit> {
284
278
  }
285
279
 
286
280
  impl IntoValue for Wrap<&Utf8Chunked> {
287
- fn into_value_with(self, _: &RubyHandle) -> Value {
281
+ fn into_value_with(self, _: &Ruby) -> Value {
288
282
  let iter = self.0.into_iter();
289
283
  RArray::from_iter(iter).into_value()
290
284
  }
291
285
  }
292
286
 
293
287
  impl IntoValue for Wrap<&BinaryChunked> {
294
- fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ fn into_value_with(self, _: &Ruby) -> Value {
295
289
  let iter = self
296
290
  .0
297
291
  .into_iter()
@@ -301,7 +295,7 @@ impl IntoValue for Wrap<&BinaryChunked> {
301
295
  }
302
296
 
303
297
  impl IntoValue for Wrap<&StructChunked> {
304
- fn into_value_with(self, _: &RubyHandle) -> Value {
298
+ fn into_value_with(self, _: &Ruby) -> Value {
305
299
  let s = self.0.clone().into_series();
306
300
  // todo! iterate its chunks and flatten.
307
301
  // make series::iter() accept a chunk index.
@@ -319,13 +313,64 @@ impl IntoValue for Wrap<&StructChunked> {
319
313
  }
320
314
 
321
315
  impl IntoValue for Wrap<&DurationChunked> {
322
- fn into_value_with(self, _: &RubyHandle) -> Value {
316
+ fn into_value_with(self, _: &Ruby) -> Value {
317
+ let utils = utils();
318
+ let time_unit = Wrap(self.0.time_unit()).into_value();
319
+ let iter = self.0.into_iter().map(|opt_v| {
320
+ opt_v.map(|v| {
321
+ utils
322
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
323
+ .unwrap()
324
+ })
325
+ });
326
+ RArray::from_iter(iter).into_value()
327
+ }
328
+ }
329
+
330
+ impl IntoValue for Wrap<&DatetimeChunked> {
331
+ fn into_value_with(self, _: &Ruby) -> Value {
332
+ let utils = utils();
333
+ let time_unit = Wrap(self.0.time_unit()).into_value();
334
+ let time_zone = self.0.time_zone().clone().into_value();
335
+ let iter = self.0.into_iter().map(|opt_v| {
336
+ opt_v.map(|v| {
337
+ utils
338
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
339
+ .unwrap()
340
+ })
341
+ });
342
+ RArray::from_iter(iter).into_value()
343
+ }
344
+ }
345
+
346
+ impl IntoValue for Wrap<&TimeChunked> {
347
+ fn into_value_with(self, _: &Ruby) -> Value {
348
+ let utils = utils();
349
+ let iter = self.0.into_iter().map(|opt_v| {
350
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
351
+ });
352
+ RArray::from_iter(iter).into_value()
353
+ }
354
+ }
355
+
356
+ impl IntoValue for Wrap<&DateChunked> {
357
+ fn into_value_with(self, _: &Ruby) -> Value {
358
+ let utils = utils();
359
+ let iter = self.0.into_iter().map(|opt_v| {
360
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
361
+ });
362
+ RArray::from_iter(iter).into_value()
363
+ }
364
+ }
365
+
366
+ impl IntoValue for Wrap<&DecimalChunked> {
367
+ fn into_value_with(self, _: &Ruby) -> Value {
323
368
  let utils = utils();
324
- let tu = Wrap(self.0.time_unit()).into_value();
369
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
325
370
  let iter = self.0.into_iter().map(|opt_v| {
326
371
  opt_v.map(|v| {
327
372
  utils
328
- .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
373
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
329
374
  .unwrap()
330
375
  })
331
376
  });
@@ -333,6 +378,13 @@ impl IntoValue for Wrap<&DurationChunked> {
333
378
  }
334
379
  }
335
380
 
381
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
382
+ match digits.parse::<i128>() {
383
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
384
+ Err(_) => None,
385
+ }
386
+ }
387
+
336
388
  impl TryConvert for Wrap<Field> {
337
389
  fn try_convert(ob: Value) -> RbResult<Self> {
338
390
  let name: String = ob.funcall("name", ())?;
@@ -362,10 +414,11 @@ impl TryConvert for Wrap<DataType> {
362
414
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
363
415
  "Polars::Time" => DataType::Time,
364
416
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
417
+ "Polars::Decimal" => DataType::Decimal(None, None),
365
418
  "Polars::Float32" => DataType::Float32,
366
419
  "Polars::Float64" => DataType::Float64,
367
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
368
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
420
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
421
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
369
422
  "Polars::Null" => DataType::Null,
370
423
  "Polars::Unknown" => DataType::Unknown,
371
424
  dt => {
@@ -375,21 +428,47 @@ impl TryConvert for Wrap<DataType> {
375
428
  }
376
429
  }
377
430
  // TODO improve
378
- } else if ob.try_convert::<String>().is_err() {
431
+ } else if String::try_convert(ob).is_err() {
379
432
  let name = unsafe { ob.class().name() }.into_owned();
380
433
  match name.as_str() {
434
+ "Polars::Duration" => {
435
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
436
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
437
+ DataType::Duration(time_unit)
438
+ }
439
+ "Polars::Datetime" => {
440
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
441
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
442
+ let time_zone = ob.funcall("time_zone", ())?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall("precision", ())?;
447
+ let scale = ob.funcall("scale", ())?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = Wrap::<DataType>::try_convert(inner)?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
381
455
  "Polars::Struct" => {
382
456
  let arr: RArray = ob.funcall("fields", ())?;
383
457
  let mut fields = Vec::with_capacity(arr.len());
384
458
  for v in arr.each() {
385
- fields.push(v?.try_convert::<Wrap<Field>>()?.0);
459
+ fields.push(Wrap::<Field>::try_convert(v?)?.0);
386
460
  }
387
461
  DataType::Struct(fields)
388
462
  }
389
- _ => todo!(),
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
390
469
  }
391
470
  } else {
392
- match ob.try_convert::<String>()?.as_str() {
471
+ match String::try_convert(ob)?.as_str() {
393
472
  "u8" => DataType::UInt8,
394
473
  "u16" => DataType::UInt16,
395
474
  "u32" => DataType::UInt32,
@@ -408,7 +487,7 @@ impl TryConvert for Wrap<DataType> {
408
487
  "time" => DataType::Time,
409
488
  "dur" => DataType::Duration(TimeUnit::Microseconds),
410
489
  "f64" => DataType::Float64,
411
- // "obj" => DataType::Object(OBJECT_NAME),
490
+ "obj" => DataType::Object(OBJECT_NAME),
412
491
  "list" => DataType::List(Box::new(DataType::Boolean)),
413
492
  "null" => DataType::Null,
414
493
  "unk" => DataType::Unknown,
@@ -427,10 +506,10 @@ impl TryConvert for Wrap<DataType> {
427
506
  impl<'s> TryConvert for Wrap<AnyValue<'s>> {
428
507
  fn try_convert(ob: Value) -> RbResult<Self> {
429
508
  if ob.is_kind_of(class::true_class()) || ob.is_kind_of(class::false_class()) {
430
- Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
509
+ Ok(AnyValue::Boolean(bool::try_convert(ob)?).into())
431
510
  } else if let Some(v) = Integer::from_value(ob) {
432
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
433
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
434
513
  Ok(AnyValue::Float64(v.to_f64()).into())
435
514
  } else if let Some(v) = RString::from_value(ob) {
436
515
  if v.enc_get() == Index::utf8() {
@@ -443,7 +522,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
443
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
444
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
445
524
  let v = sec * 1_000_000_000 + nsec;
446
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
447
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
448
528
  } else if ob.is_nil() {
449
529
  Ok(AnyValue::Null.into())
@@ -452,8 +532,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
452
532
  let mut keys = Vec::with_capacity(len);
453
533
  let mut vals = Vec::with_capacity(len);
454
534
  dict.foreach(|k: Value, v: Value| {
455
- let key = k.try_convert::<String>()?;
456
- let val = v.try_convert::<Wrap<AnyValue>>()?.0;
535
+ let key = String::try_convert(k)?;
536
+ let val = Wrap::<AnyValue>::try_convert(v)?.0;
457
537
  let dtype = DataType::from(&val);
458
538
  keys.push(Field::new(&key, dtype));
459
539
  vals.push(val);
@@ -464,13 +544,23 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
464
544
  if v.is_empty() {
465
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
466
546
  } else {
467
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
468
- // use first `n` values to infer datatype
469
- // this value is not too large as this will be done with every
470
- // anyvalue that has to be converted, which can be many
471
- let n = 25;
472
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
473
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(Wrap::<AnyValue>::try_convert(item?)?.0)
562
+ }
563
+
474
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
475
565
  .map_err(RbPolarsErr::from)?;
476
566
  Ok(Wrap(AnyValue::List(s)))
@@ -478,7 +568,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
478
568
  } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
569
  let sec: i64 = ob.funcall("to_i", ())?;
480
570
  let nsec: i64 = ob.funcall("nsec", ())?;
481
- Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
482
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
483
577
  // convert to DateTime for UTC
484
578
  let v = ob
@@ -486,6 +580,16 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
486
580
  .funcall::<_, _, Value>("to_time", ())?
487
581
  .funcall::<_, _, i64>("to_i", ())?;
488
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
585
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
586
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
587
+ })?;
588
+ if sign < 0 {
589
+ // TODO better error
590
+ v = v.checked_neg().unwrap();
591
+ }
592
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
489
593
  } else {
490
594
  Err(RbPolarsErr::other(format!(
491
595
  "object type not supported {:?}",
@@ -498,8 +602,8 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
498
602
  impl<'s> TryConvert for Wrap<Row<'s>> {
499
603
  fn try_convert(ob: Value) -> RbResult<Self> {
500
604
  let mut vals: Vec<Wrap<AnyValue<'s>>> = Vec::new();
501
- for item in ob.try_convert::<RArray>()?.each() {
502
- vals.push(item?.try_convert::<Wrap<AnyValue<'s>>>()?);
605
+ for item in RArray::try_convert(ob)?.each() {
606
+ vals.push(Wrap::<AnyValue<'s>>::try_convert(item?)?);
503
607
  }
504
608
  let vals: Vec<AnyValue> = unsafe { std::mem::transmute(vals) };
505
609
  Ok(Wrap(Row(vals)))
@@ -508,7 +612,7 @@ impl<'s> TryConvert for Wrap<Row<'s>> {
508
612
 
509
613
  impl TryConvert for Wrap<Schema> {
510
614
  fn try_convert(ob: Value) -> RbResult<Self> {
511
- let dict = ob.try_convert::<RHash>()?;
615
+ let dict = RHash::try_convert(ob)?;
512
616
 
513
617
  let mut schema = Vec::new();
514
618
  dict.foreach(|key: String, val: Wrap<DataType>| {
@@ -521,15 +625,23 @@ impl TryConvert for Wrap<Schema> {
521
625
  }
522
626
  }
523
627
 
524
- #[derive(Clone, Debug)]
628
+ #[derive(Clone)]
525
629
  pub struct ObjectValue {
526
- pub inner: Value,
630
+ pub inner: Opaque<Value>,
631
+ }
632
+
633
+ impl Debug for ObjectValue {
634
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
635
+ f.debug_struct("ObjectValue")
636
+ .field("inner", &self.to_object())
637
+ .finish()
638
+ }
527
639
  }
528
640
 
529
641
  impl Hash for ObjectValue {
530
642
  fn hash<H: Hasher>(&self, state: &mut H) {
531
643
  let h = self
532
- .inner
644
+ .to_object()
533
645
  .funcall::<_, _, isize>("hash", ())
534
646
  .expect("should be hashable");
535
647
  state.write_isize(h)
@@ -540,13 +652,19 @@ impl Eq for ObjectValue {}
540
652
 
541
653
  impl PartialEq for ObjectValue {
542
654
  fn eq(&self, other: &Self) -> bool {
543
- self.inner.eql(&other.inner).unwrap_or(false)
655
+ self.to_object().eql(other.to_object()).unwrap_or(false)
656
+ }
657
+ }
658
+
659
+ impl TotalEq for ObjectValue {
660
+ fn tot_eq(&self, other: &Self) -> bool {
661
+ self == other
544
662
  }
545
663
  }
546
664
 
547
665
  impl Display for ObjectValue {
548
666
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
549
- write!(f, "{}", self.inner)
667
+ write!(f, "{}", self.to_object())
550
668
  }
551
669
  }
552
670
 
@@ -558,13 +676,13 @@ impl PolarsObject for ObjectValue {
558
676
 
559
677
  impl From<Value> for ObjectValue {
560
678
  fn from(v: Value) -> Self {
561
- Self { inner: v }
679
+ Self { inner: v.into() }
562
680
  }
563
681
  }
564
682
 
565
683
  impl TryConvert for ObjectValue {
566
684
  fn try_convert(ob: Value) -> RbResult<Self> {
567
- Ok(ObjectValue { inner: ob })
685
+ Ok(ObjectValue { inner: ob.into() })
568
686
  }
569
687
  }
570
688
 
@@ -577,19 +695,21 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
577
695
  // TODO remove
578
696
  impl ObjectValue {
579
697
  pub fn to_object(&self) -> Value {
580
- self.inner
698
+ Ruby::get().unwrap().get_inner(self.inner)
581
699
  }
582
700
  }
583
701
 
584
702
  impl IntoValue for ObjectValue {
585
- fn into_value_with(self, _: &RubyHandle) -> Value {
586
- self.inner
703
+ fn into_value_with(self, _: &Ruby) -> Value {
704
+ self.to_object()
587
705
  }
588
706
  }
589
707
 
590
708
  impl Default for ObjectValue {
591
709
  fn default() -> Self {
592
- ObjectValue { inner: *QNIL }
710
+ ObjectValue {
711
+ inner: Ruby::get().unwrap().qnil().as_value().into(),
712
+ }
593
713
  }
594
714
  }
595
715
 
@@ -602,13 +722,13 @@ pub(crate) fn dicts_to_rows(
602
722
  let mut key_names = PlIndexSet::new();
603
723
  for d in dicts.each().take(infer_schema_len) {
604
724
  let d = d?;
605
- let d = d.try_convert::<RHash>()?;
725
+ let d = RHash::try_convert(d)?;
606
726
 
607
727
  d.foreach(|name: Value, _value: Value| {
608
728
  if let Some(v) = Symbol::from_value(name) {
609
729
  key_names.insert(v.name()?.into());
610
730
  } else {
611
- key_names.insert(name.try_convert::<String>()?);
731
+ key_names.insert(String::try_convert(name)?);
612
732
  };
613
733
  Ok(ForEach::Continue)
614
734
  })?;
@@ -618,7 +738,7 @@ pub(crate) fn dicts_to_rows(
618
738
 
619
739
  for d in dicts.each() {
620
740
  let d = d?;
621
- let d = d.try_convert::<RHash>()?;
741
+ let d = RHash::try_convert(d)?;
622
742
 
623
743
  let mut row = Vec::with_capacity(key_names.len());
624
744
 
@@ -626,7 +746,7 @@ pub(crate) fn dicts_to_rows(
626
746
  // TODO improve performance
627
747
  let val = match d.get(k.clone()).or_else(|| d.get(Symbol::new(k))) {
628
748
  None => AnyValue::Null,
629
- Some(val) => val.try_convert::<Wrap<AnyValue>>()?.0,
749
+ Some(val) => Wrap::<AnyValue>::try_convert(val)?.0,
630
750
  };
631
751
  row.push(val)
632
752
  }
@@ -637,7 +757,7 @@ pub(crate) fn dicts_to_rows(
637
757
 
638
758
  impl TryConvert for Wrap<AsofStrategy> {
639
759
  fn try_convert(ob: Value) -> RbResult<Self> {
640
- let parsed = match ob.try_convert::<String>()?.as_str() {
760
+ let parsed = match String::try_convert(ob)?.as_str() {
641
761
  "backward" => AsofStrategy::Backward,
642
762
  "forward" => AsofStrategy::Forward,
643
763
  v => {
@@ -653,7 +773,7 @@ impl TryConvert for Wrap<AsofStrategy> {
653
773
 
654
774
  impl TryConvert for Wrap<InterpolationMethod> {
655
775
  fn try_convert(ob: Value) -> RbResult<Self> {
656
- let parsed = match ob.try_convert::<String>()?.as_str() {
776
+ let parsed = match String::try_convert(ob)?.as_str() {
657
777
  "linear" => InterpolationMethod::Linear,
658
778
  "nearest" => InterpolationMethod::Nearest,
659
779
  v => {
@@ -668,7 +788,7 @@ impl TryConvert for Wrap<InterpolationMethod> {
668
788
 
669
789
  impl TryConvert for Wrap<Option<AvroCompression>> {
670
790
  fn try_convert(ob: Value) -> RbResult<Self> {
671
- let parsed = match ob.try_convert::<String>()?.as_str() {
791
+ let parsed = match String::try_convert(ob)?.as_str() {
672
792
  "uncompressed" => None,
673
793
  "snappy" => Some(AvroCompression::Snappy),
674
794
  "deflate" => Some(AvroCompression::Deflate),
@@ -685,7 +805,7 @@ impl TryConvert for Wrap<Option<AvroCompression>> {
685
805
 
686
806
  impl TryConvert for Wrap<CategoricalOrdering> {
687
807
  fn try_convert(ob: Value) -> RbResult<Self> {
688
- let parsed = match ob.try_convert::<String>()?.as_str() {
808
+ let parsed = match String::try_convert(ob)?.as_str() {
689
809
  "physical" => CategoricalOrdering::Physical,
690
810
  "lexical" => CategoricalOrdering::Lexical,
691
811
  v => {
@@ -701,7 +821,7 @@ impl TryConvert for Wrap<CategoricalOrdering> {
701
821
 
702
822
  impl TryConvert for Wrap<StartBy> {
703
823
  fn try_convert(ob: Value) -> RbResult<Self> {
704
- let parsed = match ob.try_convert::<String>()?.as_str() {
824
+ let parsed = match String::try_convert(ob)?.as_str() {
705
825
  "window" => StartBy::WindowBound,
706
826
  "datapoint" => StartBy::DataPoint,
707
827
  "monday" => StartBy::Monday,
@@ -717,7 +837,7 @@ impl TryConvert for Wrap<StartBy> {
717
837
 
718
838
  impl TryConvert for Wrap<ClosedWindow> {
719
839
  fn try_convert(ob: Value) -> RbResult<Self> {
720
- let parsed = match ob.try_convert::<String>()?.as_str() {
840
+ let parsed = match String::try_convert(ob)?.as_str() {
721
841
  "left" => ClosedWindow::Left,
722
842
  "right" => ClosedWindow::Right,
723
843
  "both" => ClosedWindow::Both,
@@ -735,7 +855,7 @@ impl TryConvert for Wrap<ClosedWindow> {
735
855
 
736
856
  impl TryConvert for Wrap<CsvEncoding> {
737
857
  fn try_convert(ob: Value) -> RbResult<Self> {
738
- let parsed = match ob.try_convert::<String>()?.as_str() {
858
+ let parsed = match String::try_convert(ob)?.as_str() {
739
859
  "utf8" => CsvEncoding::Utf8,
740
860
  "utf8-lossy" => CsvEncoding::LossyUtf8,
741
861
  v => {
@@ -751,7 +871,7 @@ impl TryConvert for Wrap<CsvEncoding> {
751
871
 
752
872
  impl TryConvert for Wrap<Option<IpcCompression>> {
753
873
  fn try_convert(ob: Value) -> RbResult<Self> {
754
- let parsed = match ob.try_convert::<String>()?.as_str() {
874
+ let parsed = match String::try_convert(ob)?.as_str() {
755
875
  "uncompressed" => None,
756
876
  "lz4" => Some(IpcCompression::LZ4),
757
877
  "zstd" => Some(IpcCompression::ZSTD),
@@ -768,7 +888,7 @@ impl TryConvert for Wrap<Option<IpcCompression>> {
768
888
 
769
889
  impl TryConvert for Wrap<JoinType> {
770
890
  fn try_convert(ob: Value) -> RbResult<Self> {
771
- let parsed = match ob.try_convert::<String>()?.as_str() {
891
+ let parsed = match String::try_convert(ob)?.as_str() {
772
892
  "inner" => JoinType::Inner,
773
893
  "left" => JoinType::Left,
774
894
  "outer" => JoinType::Outer,
@@ -787,9 +907,25 @@ impl TryConvert for Wrap<JoinType> {
787
907
  }
788
908
  }
789
909
 
910
+ impl TryConvert for Wrap<Label> {
911
+ fn try_convert(ob: Value) -> RbResult<Self> {
912
+ let parsed = match String::try_convert(ob)?.as_str() {
913
+ "left" => Label::Left,
914
+ "right" => Label::Right,
915
+ "datapoint" => Label::DataPoint,
916
+ v => {
917
+ return Err(RbValueError::new_err(format!(
918
+ "`label` must be one of {{'left', 'right', 'datapoint'}}, got {v}",
919
+ )))
920
+ }
921
+ };
922
+ Ok(Wrap(parsed))
923
+ }
924
+ }
925
+
790
926
  impl TryConvert for Wrap<ListToStructWidthStrategy> {
791
927
  fn try_convert(ob: Value) -> RbResult<Self> {
792
- let parsed = match ob.try_convert::<String>()?.as_str() {
928
+ let parsed = match String::try_convert(ob)?.as_str() {
793
929
  "first_non_null" => ListToStructWidthStrategy::FirstNonNull,
794
930
  "max_width" => ListToStructWidthStrategy::MaxWidth,
795
931
  v => {
@@ -805,7 +941,7 @@ impl TryConvert for Wrap<ListToStructWidthStrategy> {
805
941
 
806
942
  impl TryConvert for Wrap<NullBehavior> {
807
943
  fn try_convert(ob: Value) -> RbResult<Self> {
808
- let parsed = match ob.try_convert::<String>()?.as_str() {
944
+ let parsed = match String::try_convert(ob)?.as_str() {
809
945
  "drop" => NullBehavior::Drop,
810
946
  "ignore" => NullBehavior::Ignore,
811
947
  v => {
@@ -821,7 +957,7 @@ impl TryConvert for Wrap<NullBehavior> {
821
957
 
822
958
  impl TryConvert for Wrap<NullStrategy> {
823
959
  fn try_convert(ob: Value) -> RbResult<Self> {
824
- let parsed = match ob.try_convert::<String>()?.as_str() {
960
+ let parsed = match String::try_convert(ob)?.as_str() {
825
961
  "ignore" => NullStrategy::Ignore,
826
962
  "propagate" => NullStrategy::Propagate,
827
963
  v => {
@@ -837,7 +973,7 @@ impl TryConvert for Wrap<NullStrategy> {
837
973
 
838
974
  impl TryConvert for Wrap<ParallelStrategy> {
839
975
  fn try_convert(ob: Value) -> RbResult<Self> {
840
- let parsed = match ob.try_convert::<String>()?.as_str() {
976
+ let parsed = match String::try_convert(ob)?.as_str() {
841
977
  "auto" => ParallelStrategy::Auto,
842
978
  "columns" => ParallelStrategy::Columns,
843
979
  "row_groups" => ParallelStrategy::RowGroups,
@@ -855,7 +991,7 @@ impl TryConvert for Wrap<ParallelStrategy> {
855
991
 
856
992
  impl TryConvert for Wrap<QuantileInterpolOptions> {
857
993
  fn try_convert(ob: Value) -> RbResult<Self> {
858
- let parsed = match ob.try_convert::<String>()?.as_str() {
994
+ let parsed = match String::try_convert(ob)?.as_str() {
859
995
  "lower" => QuantileInterpolOptions::Lower,
860
996
  "higher" => QuantileInterpolOptions::Higher,
861
997
  "nearest" => QuantileInterpolOptions::Nearest,
@@ -874,7 +1010,7 @@ impl TryConvert for Wrap<QuantileInterpolOptions> {
874
1010
 
875
1011
  impl TryConvert for Wrap<RankMethod> {
876
1012
  fn try_convert(ob: Value) -> RbResult<Self> {
877
- let parsed = match ob.try_convert::<String>()?.as_str() {
1013
+ let parsed = match String::try_convert(ob)?.as_str() {
878
1014
  "min" => RankMethod::Min,
879
1015
  "max" => RankMethod::Max,
880
1016
  "average" => RankMethod::Average,
@@ -894,7 +1030,7 @@ impl TryConvert for Wrap<RankMethod> {
894
1030
 
895
1031
  impl TryConvert for Wrap<TimeUnit> {
896
1032
  fn try_convert(ob: Value) -> RbResult<Self> {
897
- let parsed = match ob.try_convert::<String>()?.as_str() {
1033
+ let parsed = match String::try_convert(ob)?.as_str() {
898
1034
  "ns" => TimeUnit::Nanoseconds,
899
1035
  "us" => TimeUnit::Microseconds,
900
1036
  "ms" => TimeUnit::Milliseconds,
@@ -911,7 +1047,7 @@ impl TryConvert for Wrap<TimeUnit> {
911
1047
 
912
1048
  impl TryConvert for Wrap<UniqueKeepStrategy> {
913
1049
  fn try_convert(ob: Value) -> RbResult<Self> {
914
- let parsed = match ob.try_convert::<String>()?.as_str() {
1050
+ let parsed = match String::try_convert(ob)?.as_str() {
915
1051
  "first" => UniqueKeepStrategy::First,
916
1052
  "last" => UniqueKeepStrategy::Last,
917
1053
  v => {
@@ -927,7 +1063,7 @@ impl TryConvert for Wrap<UniqueKeepStrategy> {
927
1063
 
928
1064
  impl TryConvert for Wrap<SearchSortedSide> {
929
1065
  fn try_convert(ob: Value) -> RbResult<Self> {
930
- let parsed = match ob.try_convert::<String>()?.as_str() {
1066
+ let parsed = match String::try_convert(ob)?.as_str() {
931
1067
  "any" => SearchSortedSide::Any,
932
1068
  "left" => SearchSortedSide::Left,
933
1069
  "right" => SearchSortedSide::Right,