polars-df 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +447 -410
  4. data/Cargo.toml +0 -1
  5. data/README.md +6 -5
  6. data/ext/polars/Cargo.toml +10 -5
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +8 -3
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +269 -59
  12. data/ext/polars/src/dataframe.rs +38 -40
  13. data/ext/polars/src/error.rs +6 -2
  14. data/ext/polars/src/expr/array.rs +15 -0
  15. data/ext/polars/src/expr/binary.rs +69 -0
  16. data/ext/polars/src/expr/categorical.rs +10 -0
  17. data/ext/polars/src/expr/datetime.rs +223 -0
  18. data/ext/polars/src/expr/general.rs +963 -0
  19. data/ext/polars/src/expr/list.rs +151 -0
  20. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  21. data/ext/polars/src/expr/string.rs +314 -0
  22. data/ext/polars/src/expr/struct.rs +15 -0
  23. data/ext/polars/src/expr.rs +34 -0
  24. data/ext/polars/src/functions/eager.rs +93 -0
  25. data/ext/polars/src/functions/io.rs +34 -0
  26. data/ext/polars/src/functions/lazy.rs +249 -0
  27. data/ext/polars/src/functions/meta.rs +8 -0
  28. data/ext/polars/src/functions/mod.rs +5 -0
  29. data/ext/polars/src/functions/whenthen.rs +43 -0
  30. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
  31. data/ext/polars/src/lazygroupby.rs +29 -0
  32. data/ext/polars/src/lib.rs +223 -316
  33. data/ext/polars/src/object.rs +1 -1
  34. data/ext/polars/src/rb_modules.rs +12 -0
  35. data/ext/polars/src/series/aggregation.rs +83 -0
  36. data/ext/polars/src/series/arithmetic.rs +88 -0
  37. data/ext/polars/src/series/comparison.rs +251 -0
  38. data/ext/polars/src/series/construction.rs +190 -0
  39. data/ext/polars/src/series.rs +151 -551
  40. data/lib/polars/array_expr.rb +84 -0
  41. data/lib/polars/array_name_space.rb +77 -0
  42. data/lib/polars/batched_csv_reader.rb +1 -1
  43. data/lib/polars/convert.rb +2 -2
  44. data/lib/polars/data_frame.rb +289 -96
  45. data/lib/polars/data_types.rb +169 -33
  46. data/lib/polars/date_time_expr.rb +142 -2
  47. data/lib/polars/date_time_name_space.rb +17 -3
  48. data/lib/polars/expr.rb +145 -78
  49. data/lib/polars/functions.rb +0 -1
  50. data/lib/polars/group_by.rb +1 -22
  51. data/lib/polars/lazy_frame.rb +84 -31
  52. data/lib/polars/lazy_functions.rb +71 -32
  53. data/lib/polars/list_expr.rb +94 -45
  54. data/lib/polars/list_name_space.rb +13 -13
  55. data/lib/polars/rolling_group_by.rb +4 -2
  56. data/lib/polars/series.rb +249 -87
  57. data/lib/polars/string_expr.rb +277 -45
  58. data/lib/polars/string_name_space.rb +137 -22
  59. data/lib/polars/struct_name_space.rb +32 -0
  60. data/lib/polars/utils.rb +138 -54
  61. data/lib/polars/version.rb +1 -1
  62. data/lib/polars.rb +5 -2
  63. metadata +29 -11
  64. data/ext/polars/src/lazy/dsl.rs +0 -1775
  65. data/ext/polars/src/lazy/mod.rs +0 -5
  66. data/ext/polars/src/lazy/utils.rs +0 -13
  67. data/ext/polars/src/list_construction.rs +0 -100
  68. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  69. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml CHANGED
@@ -3,7 +3,6 @@ members = ["ext/polars"]
3
3
 
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
- halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
6
 
8
7
  [profile.release]
9
8
  strip = true
data/README.md CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
58
58
  From Active Record
59
59
 
60
60
  ```ruby
61
- Polars.read_sql(User.all)
61
+ Polars.read_database(User.all)
62
62
  # or
63
- Polars.read_sql("SELECT * FROM users")
63
+ Polars.read_database("SELECT * FROM users")
64
64
  ```
65
65
 
66
66
  From JSON
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,9 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
363
 
363
364
  Get column types
364
365
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.4.0"
3
+ version = "0.6.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,23 +11,24 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
+ chrono = "=0.4.24"
14
15
  magnus = "0.5"
15
- polars-core = "0.28.0"
16
+ polars-core = "0.31.1"
16
17
  serde_json = "1"
17
18
  smartstring = "1"
18
19
 
19
20
  [dependencies.polars]
20
- version = "0.28.0"
21
+ version = "0.31.1"
21
22
  features = [
22
23
  "abs",
23
- "arange",
24
+ "approx_unique",
24
25
  "arg_where",
25
26
  "asof_join",
26
27
  "avro",
27
28
  "binary_encoding",
28
29
  "concat_str",
29
30
  "cse",
30
- "csv-file",
31
+ "csv",
31
32
  "cum_agg",
32
33
  "cumulative_eval",
33
34
  "dataframe_arithmetic",
@@ -49,7 +50,9 @@ features = [
49
50
  "json",
50
51
  "lazy",
51
52
  "lazy_regex",
53
+ "list_count",
52
54
  "list_eval",
55
+ "list_take",
53
56
  "list_to_struct",
54
57
  "log",
55
58
  "meta",
@@ -65,6 +68,7 @@ features = [
65
68
  "propagate_nans",
66
69
  "random",
67
70
  "rank",
71
+ "range",
68
72
  "reinterpret",
69
73
  "repeat_by",
70
74
  "rolling_window",
@@ -75,6 +79,7 @@ features = [
75
79
  "serde-lazy",
76
80
  "sign",
77
81
  "string_encoding",
82
+ "string_from_radix",
78
83
  "string_justify",
79
84
  "strings",
80
85
  "timezones",
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
202
202
  }
203
203
 
204
204
  /// Apply a lambda with list output type
205
- pub fn apply_lambda_with_list_out_type<'a>(
206
- df: &'a DataFrame,
205
+ pub fn apply_lambda_with_list_out_type(
206
+ df: &DataFrame,
207
207
  lambda: Value,
208
208
  init_null_count: usize,
209
209
  first_value: Option<&Series>,
@@ -1,8 +1,7 @@
1
1
  use magnus::Value;
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::lazy::dsl::RbExpr;
5
- use crate::Wrap;
4
+ use crate::{RbExpr, Wrap};
6
5
 
7
6
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
7
  todo!();
@@ -1,4 +1,5 @@
1
1
  pub mod dataframe;
2
+ pub mod lazy;
2
3
  pub mod series;
3
4
 
4
5
  use magnus::{RHash, Value};
@@ -236,15 +237,19 @@ fn iterator_to_list(
236
237
  for _ in 0..init_null_count {
237
238
  builder.append_null()
238
239
  }
239
- builder.append_opt_series(first_value);
240
+ builder
241
+ .append_opt_series(first_value)
242
+ .map_err(RbPolarsErr::from)?;
240
243
  for opt_val in it {
241
244
  match opt_val {
242
245
  None => builder.append_null(),
243
246
  Some(s) => {
244
247
  if s.len() == 0 && s.dtype() != dt {
245
- builder.append_series(&Series::full_null("", 0, dt))
248
+ builder
249
+ .append_series(&Series::full_null("", 0, dt))
250
+ .unwrap()
246
251
  } else {
247
- builder.append_series(&s)
252
+ builder.append_series(&s).map_err(RbPolarsErr::from)?
248
253
  }
249
254
  }
250
255
  }
@@ -68,11 +68,13 @@ impl RbBatchedCsv {
68
68
  };
69
69
 
70
70
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
71
- let fields = overwrite_dtype.iter().map(|(name, dtype)| {
72
- let dtype = dtype.0.clone();
73
- Field::new(name, dtype)
74
- });
75
- Schema::from(fields)
71
+ overwrite_dtype
72
+ .iter()
73
+ .map(|(name, dtype)| {
74
+ let dtype = dtype.0.clone();
75
+ Field::new(name, dtype)
76
+ })
77
+ .collect::<Schema>()
76
78
  });
77
79
 
78
80
  let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
@@ -1,9 +1,10 @@
1
1
  use std::fmt::{Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
7
+ RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
7
8
  };
8
9
  use polars::chunked_array::object::PolarsObjectSafe;
9
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,7 +16,9 @@ use polars::prelude::*;
15
16
  use polars::series::ops::NullBehavior;
16
17
  use smartstring::alias::String as SmartString;
17
18
 
18
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
19
+ use crate::object::OBJECT_NAME;
20
+ use crate::rb_modules::utils;
21
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
19
22
 
20
23
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
21
24
  // Safety:
@@ -23,6 +26,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
26
  unsafe { std::mem::transmute(slice) }
24
27
  }
25
28
 
29
+ pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
30
+ // Safety:
31
+ // Wrap is transparent.
32
+ unsafe { std::mem::transmute(slice) }
33
+ }
34
+
26
35
  pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
27
36
  // Safety:
28
37
  // Wrap is transparent.
@@ -141,48 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
141
150
  AnyValue::Boolean(v) => Value::from(v),
142
151
  AnyValue::Utf8(v) => Value::from(v),
143
152
  AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
144
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
145
- AnyValue::Date(v) => class::time()
146
- .funcall::<_, _, Value>("at", (v * 86400,))
147
- .unwrap()
148
- .funcall::<_, _, Value>("utc", ())
149
- .unwrap()
150
- .funcall::<_, _, Value>("to_date", ())
151
- .unwrap(),
152
- AnyValue::Datetime(v, tu, tz) => {
153
- let t = match tu {
154
- TimeUnit::Nanoseconds => {
155
- let sec = v / 1000000000;
156
- let subsec = v % 1000000000;
157
- class::time()
158
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
159
- .unwrap()
160
- }
161
- TimeUnit::Microseconds => {
162
- let sec = v / 1000000;
163
- let subsec = v % 1000000;
164
- class::time()
165
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
166
- .unwrap()
167
- }
168
- TimeUnit::Milliseconds => {
169
- let sec = v / 1000;
170
- let subsec = v % 1000;
171
- class::time()
172
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
173
- .unwrap()
174
- }
175
- };
176
-
177
- if tz.is_some() {
178
- todo!();
153
+ AnyValue::Categorical(idx, rev, arr) => {
154
+ let s = if arr.is_null() {
155
+ rev.get(idx)
179
156
  } else {
180
- t.funcall::<_, _, Value>("utc", ()).unwrap()
181
- }
157
+ unsafe { arr.deref_unchecked().value(idx as usize) }
158
+ };
159
+ s.into_value()
160
+ }
161
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
162
+ AnyValue::Datetime(v, time_unit, time_zone) => {
163
+ let time_unit = time_unit.to_ascii();
164
+ utils()
165
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
166
+ .unwrap()
167
+ }
168
+ AnyValue::Duration(v, time_unit) => {
169
+ let time_unit = time_unit.to_ascii();
170
+ utils()
171
+ .funcall("_to_ruby_duration", (v, time_unit))
172
+ .unwrap()
182
173
  }
183
- AnyValue::Duration(_v, _tu) => todo!(),
184
- AnyValue::Time(_v) => todo!(),
185
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
174
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
175
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
186
176
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
187
177
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
188
178
  AnyValue::Object(v) => {
@@ -195,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
195
185
  }
196
186
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
197
187
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
198
- AnyValue::Decimal(_v, _scale) => todo!(),
188
+ AnyValue::Decimal(v, scale) => utils()
189
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
190
+ .unwrap(),
199
191
  }
200
192
  }
201
193
  }
@@ -215,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
215
207
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
216
208
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
217
209
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
218
- DataType::Decimal(_precision, _scale) => todo!(),
210
+ DataType::Decimal(precision, scale) => {
211
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
212
+ decimal_class
213
+ .funcall::<_, _, Value>("new", (precision, scale))
214
+ .unwrap()
215
+ }
219
216
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
220
217
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
221
218
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
219
+ DataType::Array(inner, size) => {
220
+ let inner = Wrap(*inner);
221
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
222
+ list_class
223
+ .funcall::<_, _, Value>("new", (size, inner))
224
+ .unwrap()
225
+ }
222
226
  DataType::List(inner) => {
223
227
  let inner = Wrap(*inner);
224
228
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -261,6 +265,133 @@ impl IntoValue for Wrap<DataType> {
261
265
  }
262
266
  }
263
267
 
268
+ impl IntoValue for Wrap<TimeUnit> {
269
+ fn into_value_with(self, _: &RubyHandle) -> Value {
270
+ let tu = match self.0 {
271
+ TimeUnit::Nanoseconds => "ns",
272
+ TimeUnit::Microseconds => "us",
273
+ TimeUnit::Milliseconds => "ms",
274
+ };
275
+ tu.into_value()
276
+ }
277
+ }
278
+
279
+ impl IntoValue for Wrap<&Utf8Chunked> {
280
+ fn into_value_with(self, _: &RubyHandle) -> Value {
281
+ let iter = self.0.into_iter();
282
+ RArray::from_iter(iter).into_value()
283
+ }
284
+ }
285
+
286
+ impl IntoValue for Wrap<&BinaryChunked> {
287
+ fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ let iter = self
289
+ .0
290
+ .into_iter()
291
+ .map(|opt_bytes| opt_bytes.map(RString::from_slice));
292
+ RArray::from_iter(iter).into_value()
293
+ }
294
+ }
295
+
296
+ impl IntoValue for Wrap<&StructChunked> {
297
+ fn into_value_with(self, _: &RubyHandle) -> Value {
298
+ let s = self.0.clone().into_series();
299
+ // todo! iterate its chunks and flatten.
300
+ // make series::iter() accept a chunk index.
301
+ let s = s.rechunk();
302
+ let iter = s.iter().map(|av| {
303
+ if let AnyValue::Struct(_, _, flds) = av {
304
+ struct_dict(av._iter_struct_av(), flds)
305
+ } else {
306
+ unreachable!()
307
+ }
308
+ });
309
+
310
+ RArray::from_iter(iter).into_value()
311
+ }
312
+ }
313
+
314
+ impl IntoValue for Wrap<&DurationChunked> {
315
+ fn into_value_with(self, _: &RubyHandle) -> Value {
316
+ let utils = utils();
317
+ let time_unit = Wrap(self.0.time_unit()).into_value();
318
+ let iter = self.0.into_iter().map(|opt_v| {
319
+ opt_v.map(|v| {
320
+ utils
321
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
322
+ .unwrap()
323
+ })
324
+ });
325
+ RArray::from_iter(iter).into_value()
326
+ }
327
+ }
328
+
329
+ impl IntoValue for Wrap<&DatetimeChunked> {
330
+ fn into_value_with(self, _: &RubyHandle) -> Value {
331
+ let utils = utils();
332
+ let time_unit = Wrap(self.0.time_unit()).into_value();
333
+ let time_zone = self.0.time_zone().clone().into_value();
334
+ let iter = self.0.into_iter().map(|opt_v| {
335
+ opt_v.map(|v| {
336
+ utils
337
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
338
+ .unwrap()
339
+ })
340
+ });
341
+ RArray::from_iter(iter).into_value()
342
+ }
343
+ }
344
+
345
+ impl IntoValue for Wrap<&TimeChunked> {
346
+ fn into_value_with(self, _: &RubyHandle) -> Value {
347
+ let utils = utils();
348
+ let iter = self.0.into_iter().map(|opt_v| {
349
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
350
+ });
351
+ RArray::from_iter(iter).into_value()
352
+ }
353
+ }
354
+
355
+ impl IntoValue for Wrap<&DateChunked> {
356
+ fn into_value_with(self, _: &RubyHandle) -> Value {
357
+ let utils = utils();
358
+ let iter = self.0.into_iter().map(|opt_v| {
359
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
360
+ });
361
+ RArray::from_iter(iter).into_value()
362
+ }
363
+ }
364
+
365
+ impl IntoValue for Wrap<&DecimalChunked> {
366
+ fn into_value_with(self, _: &RubyHandle) -> Value {
367
+ let utils = utils();
368
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
369
+ let iter = self.0.into_iter().map(|opt_v| {
370
+ opt_v.map(|v| {
371
+ utils
372
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
373
+ .unwrap()
374
+ })
375
+ });
376
+ RArray::from_iter(iter).into_value()
377
+ }
378
+ }
379
+
380
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
381
+ match digits.parse::<i128>() {
382
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
383
+ Err(_) => None,
384
+ }
385
+ }
386
+
387
+ impl TryConvert for Wrap<Field> {
388
+ fn try_convert(ob: Value) -> RbResult<Self> {
389
+ let name: String = ob.funcall("name", ())?;
390
+ let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
391
+ Ok(Wrap(Field::new(&name, dtype.0)))
392
+ }
393
+ }
394
+
264
395
  impl TryConvert for Wrap<DataType> {
265
396
  fn try_convert(ob: Value) -> RbResult<Self> {
266
397
  let dtype = if ob.is_kind_of(class::class()) {
@@ -282,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
282
413
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
283
414
  "Polars::Time" => DataType::Time,
284
415
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
416
+ "Polars::Decimal" => DataType::Decimal(None, None),
285
417
  "Polars::Float32" => DataType::Float32,
286
418
  "Polars::Float64" => DataType::Float64,
287
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
288
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
419
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
420
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
289
421
  "Polars::Null" => DataType::Null,
290
422
  "Polars::Unknown" => DataType::Unknown,
291
423
  dt => {
@@ -294,6 +426,47 @@ impl TryConvert for Wrap<DataType> {
294
426
  )))
295
427
  }
296
428
  }
429
+ // TODO improve
430
+ } else if ob.try_convert::<String>().is_err() {
431
+ let name = unsafe { ob.class().name() }.into_owned();
432
+ match name.as_str() {
433
+ "Polars::Duration" => {
434
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
435
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
436
+ DataType::Duration(time_unit)
437
+ }
438
+ "Polars::Datetime" => {
439
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
440
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
441
+ let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
442
+ let time_zone = time_zone.try_convert()?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
447
+ let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = inner.try_convert::<Wrap<DataType>>()?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
455
+ "Polars::Struct" => {
456
+ let arr: RArray = ob.funcall("fields", ())?;
457
+ let mut fields = Vec::with_capacity(arr.len());
458
+ for v in arr.each() {
459
+ fields.push(v?.try_convert::<Wrap<Field>>()?.0);
460
+ }
461
+ DataType::Struct(fields)
462
+ }
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
469
+ }
297
470
  } else {
298
471
  match ob.try_convert::<String>()?.as_str() {
299
472
  "u8" => DataType::UInt8,
@@ -336,16 +509,21 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
336
509
  Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
337
510
  } else if let Some(v) = Integer::from_value(ob) {
338
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
339
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
340
513
  Ok(AnyValue::Float64(v.to_f64()).into())
341
514
  } else if let Some(v) = RString::from_value(ob) {
342
- Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
515
+ if v.enc_get() == Index::utf8() {
516
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
517
+ } else {
518
+ Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
519
+ }
343
520
  // call is_a? for ActiveSupport::TimeWithZone
344
521
  } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
345
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
346
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
347
524
  let v = sec * 1_000_000_000 + nsec;
348
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
349
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
350
528
  } else if ob.is_nil() {
351
529
  Ok(AnyValue::Null.into())
@@ -366,17 +544,35 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
366
544
  if v.is_empty() {
367
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
368
546
  } else {
369
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
370
- // use first `n` values to infer datatype
371
- // this value is not too large as this will be done with every
372
- // anyvalue that has to be converted, which can be many
373
- let n = 25;
374
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
375
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
562
+ }
563
+
376
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
377
565
  .map_err(RbPolarsErr::from)?;
378
566
  Ok(Wrap(AnyValue::List(s)))
379
567
  }
568
+ } else if ob.is_kind_of(crate::rb_modules::datetime()) {
569
+ let sec: i64 = ob.funcall("to_i", ())?;
570
+ let nsec: i64 = ob.funcall("nsec", ())?;
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
380
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
381
577
  // convert to DateTime for UTC
382
578
  let v = ob
@@ -384,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
384
580
  .funcall::<_, _, Value>("to_time", ())?
385
581
  .funcall::<_, _, i64>("to_i", ())?;
386
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob
585
+ .funcall::<_, _, Value>("split", ())
586
+ .unwrap()
587
+ .try_convert()
588
+ .unwrap();
589
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
590
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
591
+ })?;
592
+ if sign < 0 {
593
+ // TODO better error
594
+ v = v.checked_neg().unwrap();
595
+ }
596
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
387
597
  } else {
388
598
  Err(RbPolarsErr::other(format!(
389
599
  "object type not supported {:?}",
@@ -410,12 +620,12 @@ impl TryConvert for Wrap<Schema> {
410
620
 
411
621
  let mut schema = Vec::new();
412
622
  dict.foreach(|key: String, val: Wrap<DataType>| {
413
- schema.push(Field::new(&key, val.0));
623
+ schema.push(Ok(Field::new(&key, val.0)));
414
624
  Ok(ForEach::Continue)
415
625
  })
416
626
  .unwrap();
417
627
 
418
- Ok(Wrap(schema.into_iter().into()))
628
+ Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
419
629
  }
420
630
  }
421
631