polars-df 0.4.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -0
  3. data/Cargo.lock +447 -410
  4. data/Cargo.toml +0 -1
  5. data/README.md +6 -5
  6. data/ext/polars/Cargo.toml +10 -5
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +8 -3
  10. data/ext/polars/src/batched_csv.rs +7 -5
  11. data/ext/polars/src/conversion.rs +269 -59
  12. data/ext/polars/src/dataframe.rs +38 -40
  13. data/ext/polars/src/error.rs +6 -2
  14. data/ext/polars/src/expr/array.rs +15 -0
  15. data/ext/polars/src/expr/binary.rs +69 -0
  16. data/ext/polars/src/expr/categorical.rs +10 -0
  17. data/ext/polars/src/expr/datetime.rs +223 -0
  18. data/ext/polars/src/expr/general.rs +963 -0
  19. data/ext/polars/src/expr/list.rs +151 -0
  20. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  21. data/ext/polars/src/expr/string.rs +314 -0
  22. data/ext/polars/src/expr/struct.rs +15 -0
  23. data/ext/polars/src/expr.rs +34 -0
  24. data/ext/polars/src/functions/eager.rs +93 -0
  25. data/ext/polars/src/functions/io.rs +34 -0
  26. data/ext/polars/src/functions/lazy.rs +249 -0
  27. data/ext/polars/src/functions/meta.rs +8 -0
  28. data/ext/polars/src/functions/mod.rs +5 -0
  29. data/ext/polars/src/functions/whenthen.rs +43 -0
  30. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +26 -35
  31. data/ext/polars/src/lazygroupby.rs +29 -0
  32. data/ext/polars/src/lib.rs +223 -316
  33. data/ext/polars/src/object.rs +1 -1
  34. data/ext/polars/src/rb_modules.rs +12 -0
  35. data/ext/polars/src/series/aggregation.rs +83 -0
  36. data/ext/polars/src/series/arithmetic.rs +88 -0
  37. data/ext/polars/src/series/comparison.rs +251 -0
  38. data/ext/polars/src/series/construction.rs +190 -0
  39. data/ext/polars/src/series.rs +151 -551
  40. data/lib/polars/array_expr.rb +84 -0
  41. data/lib/polars/array_name_space.rb +77 -0
  42. data/lib/polars/batched_csv_reader.rb +1 -1
  43. data/lib/polars/convert.rb +2 -2
  44. data/lib/polars/data_frame.rb +289 -96
  45. data/lib/polars/data_types.rb +169 -33
  46. data/lib/polars/date_time_expr.rb +142 -2
  47. data/lib/polars/date_time_name_space.rb +17 -3
  48. data/lib/polars/expr.rb +145 -78
  49. data/lib/polars/functions.rb +0 -1
  50. data/lib/polars/group_by.rb +1 -22
  51. data/lib/polars/lazy_frame.rb +84 -31
  52. data/lib/polars/lazy_functions.rb +71 -32
  53. data/lib/polars/list_expr.rb +94 -45
  54. data/lib/polars/list_name_space.rb +13 -13
  55. data/lib/polars/rolling_group_by.rb +4 -2
  56. data/lib/polars/series.rb +249 -87
  57. data/lib/polars/string_expr.rb +277 -45
  58. data/lib/polars/string_name_space.rb +137 -22
  59. data/lib/polars/struct_name_space.rb +32 -0
  60. data/lib/polars/utils.rb +138 -54
  61. data/lib/polars/version.rb +1 -1
  62. data/lib/polars.rb +5 -2
  63. metadata +29 -11
  64. data/ext/polars/src/lazy/dsl.rs +0 -1775
  65. data/ext/polars/src/lazy/mod.rs +0 -5
  66. data/ext/polars/src/lazy/utils.rs +0 -13
  67. data/ext/polars/src/list_construction.rs +0 -100
  68. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  69. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml CHANGED
@@ -3,7 +3,6 @@ members = ["ext/polars"]
3
3
 
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
- halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
6
 
8
7
  [profile.release]
9
8
  strip = true
data/README.md CHANGED
@@ -25,7 +25,7 @@ Polars.read_csv("iris.csv")
25
25
  .collect
26
26
  ```
27
27
 
28
- You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/introduction.html) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
28
+ You can follow [Polars tutorials](https://pola-rs.github.io/polars-book/user-guide/) and convert the code to Ruby in many cases. Feel free to open an issue if you run into problems.
29
29
 
30
30
  ## Reference
31
31
 
@@ -58,9 +58,9 @@ Polars.scan_parquet("file.parquet")
58
58
  From Active Record
59
59
 
60
60
  ```ruby
61
- Polars.read_sql(User.all)
61
+ Polars.read_database(User.all)
62
62
  # or
63
- Polars.read_sql("SELECT * FROM users")
63
+ Polars.read_database("SELECT * FROM users")
64
64
  ```
65
65
 
66
66
  From JSON
@@ -348,7 +348,7 @@ df.to_numo
348
348
  You can specify column types when creating a data frame
349
349
 
350
350
  ```ruby
351
- Polars::DataFrame.new(data, columns: {"a" => Polars::Int32, "b" => Polars::Float32})
351
+ Polars::DataFrame.new(data, schema: {"a" => Polars::Int32, "b" => Polars::Float32})
352
352
  ```
353
353
 
354
354
  Supported types are:
@@ -357,8 +357,9 @@ Supported types are:
357
357
  - float - `Float64`, `Float32`
358
358
  - integer - `Int64`, `Int32`, `Int16`, `Int8`
359
359
  - unsigned integer - `UInt64`, `UInt32`, `UInt16`, `UInt8`
360
- - string - `Utf8`, `Categorical`
360
+ - string - `Utf8`, `Binary`, `Categorical`
361
361
  - temporal - `Date`, `Datetime`, `Time`, `Duration`
362
+ - other - `Object`, `List`, `Struct`, `Array` [unreleased]
362
363
 
363
364
  Get column types
364
365
 
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.4.0"
3
+ version = "0.6.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -11,23 +11,24 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
+ chrono = "=0.4.24"
14
15
  magnus = "0.5"
15
- polars-core = "0.28.0"
16
+ polars-core = "0.31.1"
16
17
  serde_json = "1"
17
18
  smartstring = "1"
18
19
 
19
20
  [dependencies.polars]
20
- version = "0.28.0"
21
+ version = "0.31.1"
21
22
  features = [
22
23
  "abs",
23
- "arange",
24
+ "approx_unique",
24
25
  "arg_where",
25
26
  "asof_join",
26
27
  "avro",
27
28
  "binary_encoding",
28
29
  "concat_str",
29
30
  "cse",
30
- "csv-file",
31
+ "csv",
31
32
  "cum_agg",
32
33
  "cumulative_eval",
33
34
  "dataframe_arithmetic",
@@ -49,7 +50,9 @@ features = [
49
50
  "json",
50
51
  "lazy",
51
52
  "lazy_regex",
53
+ "list_count",
52
54
  "list_eval",
55
+ "list_take",
53
56
  "list_to_struct",
54
57
  "log",
55
58
  "meta",
@@ -65,6 +68,7 @@ features = [
65
68
  "propagate_nans",
66
69
  "random",
67
70
  "rank",
71
+ "range",
68
72
  "reinterpret",
69
73
  "repeat_by",
70
74
  "rolling_window",
@@ -75,6 +79,7 @@ features = [
75
79
  "serde-lazy",
76
80
  "sign",
77
81
  "string_encoding",
82
+ "string_from_radix",
78
83
  "string_justify",
79
84
  "strings",
80
85
  "timezones",
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
202
202
  }
203
203
 
204
204
  /// Apply a lambda with list output type
205
- pub fn apply_lambda_with_list_out_type<'a>(
206
- df: &'a DataFrame,
205
+ pub fn apply_lambda_with_list_out_type(
206
+ df: &DataFrame,
207
207
  lambda: Value,
208
208
  init_null_count: usize,
209
209
  first_value: Option<&Series>,
@@ -1,8 +1,7 @@
1
1
  use magnus::Value;
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::lazy::dsl::RbExpr;
5
- use crate::Wrap;
4
+ use crate::{RbExpr, Wrap};
6
5
 
7
6
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
7
  todo!();
@@ -1,4 +1,5 @@
1
1
  pub mod dataframe;
2
+ pub mod lazy;
2
3
  pub mod series;
3
4
 
4
5
  use magnus::{RHash, Value};
@@ -236,15 +237,19 @@ fn iterator_to_list(
236
237
  for _ in 0..init_null_count {
237
238
  builder.append_null()
238
239
  }
239
- builder.append_opt_series(first_value);
240
+ builder
241
+ .append_opt_series(first_value)
242
+ .map_err(RbPolarsErr::from)?;
240
243
  for opt_val in it {
241
244
  match opt_val {
242
245
  None => builder.append_null(),
243
246
  Some(s) => {
244
247
  if s.len() == 0 && s.dtype() != dt {
245
- builder.append_series(&Series::full_null("", 0, dt))
248
+ builder
249
+ .append_series(&Series::full_null("", 0, dt))
250
+ .unwrap()
246
251
  } else {
247
- builder.append_series(&s)
252
+ builder.append_series(&s).map_err(RbPolarsErr::from)?
248
253
  }
249
254
  }
250
255
  }
@@ -68,11 +68,13 @@ impl RbBatchedCsv {
68
68
  };
69
69
 
70
70
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
71
- let fields = overwrite_dtype.iter().map(|(name, dtype)| {
72
- let dtype = dtype.0.clone();
73
- Field::new(name, dtype)
74
- });
75
- Schema::from(fields)
71
+ overwrite_dtype
72
+ .iter()
73
+ .map(|(name, dtype)| {
74
+ let dtype = dtype.0.clone();
75
+ Field::new(name, dtype)
76
+ })
77
+ .collect::<Schema>()
76
78
  });
77
79
 
78
80
  let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
@@ -1,9 +1,10 @@
1
1
  use std::fmt::{Display, Formatter};
2
2
  use std::hash::{Hash, Hasher};
3
3
 
4
+ use magnus::encoding::{EncodingCapable, Index};
4
5
  use magnus::{
5
- class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
6
- RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
6
+ class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Float, Integer, IntoValue, Module,
7
+ RArray, RHash, RString, Symbol, TryConvert, Value, QNIL,
7
8
  };
8
9
  use polars::chunked_array::object::PolarsObjectSafe;
9
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,7 +16,9 @@ use polars::prelude::*;
15
16
  use polars::series::ops::NullBehavior;
16
17
  use smartstring::alias::String as SmartString;
17
18
 
18
- use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
19
+ use crate::object::OBJECT_NAME;
20
+ use crate::rb_modules::utils;
21
+ use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
19
22
 
20
23
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
21
24
  // Safety:
@@ -23,6 +26,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
23
26
  unsafe { std::mem::transmute(slice) }
24
27
  }
25
28
 
29
+ pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
30
+ // Safety:
31
+ // Wrap is transparent.
32
+ unsafe { std::mem::transmute(slice) }
33
+ }
34
+
26
35
  pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
27
36
  // Safety:
28
37
  // Wrap is transparent.
@@ -141,48 +150,29 @@ impl IntoValue for Wrap<AnyValue<'_>> {
141
150
  AnyValue::Boolean(v) => Value::from(v),
142
151
  AnyValue::Utf8(v) => Value::from(v),
143
152
  AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
144
- AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
145
- AnyValue::Date(v) => class::time()
146
- .funcall::<_, _, Value>("at", (v * 86400,))
147
- .unwrap()
148
- .funcall::<_, _, Value>("utc", ())
149
- .unwrap()
150
- .funcall::<_, _, Value>("to_date", ())
151
- .unwrap(),
152
- AnyValue::Datetime(v, tu, tz) => {
153
- let t = match tu {
154
- TimeUnit::Nanoseconds => {
155
- let sec = v / 1000000000;
156
- let subsec = v % 1000000000;
157
- class::time()
158
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("nsec")))
159
- .unwrap()
160
- }
161
- TimeUnit::Microseconds => {
162
- let sec = v / 1000000;
163
- let subsec = v % 1000000;
164
- class::time()
165
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec")))
166
- .unwrap()
167
- }
168
- TimeUnit::Milliseconds => {
169
- let sec = v / 1000;
170
- let subsec = v % 1000;
171
- class::time()
172
- .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("millisecond")))
173
- .unwrap()
174
- }
175
- };
176
-
177
- if tz.is_some() {
178
- todo!();
153
+ AnyValue::Categorical(idx, rev, arr) => {
154
+ let s = if arr.is_null() {
155
+ rev.get(idx)
179
156
  } else {
180
- t.funcall::<_, _, Value>("utc", ()).unwrap()
181
- }
157
+ unsafe { arr.deref_unchecked().value(idx as usize) }
158
+ };
159
+ s.into_value()
160
+ }
161
+ AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
162
+ AnyValue::Datetime(v, time_unit, time_zone) => {
163
+ let time_unit = time_unit.to_ascii();
164
+ utils()
165
+ .funcall("_to_ruby_datetime", (v, time_unit, time_zone.clone()))
166
+ .unwrap()
167
+ }
168
+ AnyValue::Duration(v, time_unit) => {
169
+ let time_unit = time_unit.to_ascii();
170
+ utils()
171
+ .funcall("_to_ruby_duration", (v, time_unit))
172
+ .unwrap()
182
173
  }
183
- AnyValue::Duration(_v, _tu) => todo!(),
184
- AnyValue::Time(_v) => todo!(),
185
- AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
174
+ AnyValue::Time(v) => utils().funcall("_to_ruby_time", (v,)).unwrap(),
175
+ AnyValue::Array(v, _) | AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
186
176
  ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
187
177
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
188
178
  AnyValue::Object(v) => {
@@ -195,7 +185,9 @@ impl IntoValue for Wrap<AnyValue<'_>> {
195
185
  }
196
186
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
197
187
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
198
- AnyValue::Decimal(_v, _scale) => todo!(),
188
+ AnyValue::Decimal(v, scale) => utils()
189
+ .funcall("_to_ruby_decimal", (v.to_string(), -(scale as i32)))
190
+ .unwrap(),
199
191
  }
200
192
  }
201
193
  }
@@ -215,10 +207,22 @@ impl IntoValue for Wrap<DataType> {
215
207
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
216
208
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
217
209
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
218
- DataType::Decimal(_precision, _scale) => todo!(),
210
+ DataType::Decimal(precision, scale) => {
211
+ let decimal_class = pl.const_get::<_, Value>("Decimal").unwrap();
212
+ decimal_class
213
+ .funcall::<_, _, Value>("new", (precision, scale))
214
+ .unwrap()
215
+ }
219
216
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
220
217
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
221
218
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
219
+ DataType::Array(inner, size) => {
220
+ let inner = Wrap(*inner);
221
+ let list_class = pl.const_get::<_, Value>("Array").unwrap();
222
+ list_class
223
+ .funcall::<_, _, Value>("new", (size, inner))
224
+ .unwrap()
225
+ }
222
226
  DataType::List(inner) => {
223
227
  let inner = Wrap(*inner);
224
228
  let list_class = pl.const_get::<_, Value>("List").unwrap();
@@ -261,6 +265,133 @@ impl IntoValue for Wrap<DataType> {
261
265
  }
262
266
  }
263
267
 
268
+ impl IntoValue for Wrap<TimeUnit> {
269
+ fn into_value_with(self, _: &RubyHandle) -> Value {
270
+ let tu = match self.0 {
271
+ TimeUnit::Nanoseconds => "ns",
272
+ TimeUnit::Microseconds => "us",
273
+ TimeUnit::Milliseconds => "ms",
274
+ };
275
+ tu.into_value()
276
+ }
277
+ }
278
+
279
+ impl IntoValue for Wrap<&Utf8Chunked> {
280
+ fn into_value_with(self, _: &RubyHandle) -> Value {
281
+ let iter = self.0.into_iter();
282
+ RArray::from_iter(iter).into_value()
283
+ }
284
+ }
285
+
286
+ impl IntoValue for Wrap<&BinaryChunked> {
287
+ fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ let iter = self
289
+ .0
290
+ .into_iter()
291
+ .map(|opt_bytes| opt_bytes.map(RString::from_slice));
292
+ RArray::from_iter(iter).into_value()
293
+ }
294
+ }
295
+
296
+ impl IntoValue for Wrap<&StructChunked> {
297
+ fn into_value_with(self, _: &RubyHandle) -> Value {
298
+ let s = self.0.clone().into_series();
299
+ // todo! iterate its chunks and flatten.
300
+ // make series::iter() accept a chunk index.
301
+ let s = s.rechunk();
302
+ let iter = s.iter().map(|av| {
303
+ if let AnyValue::Struct(_, _, flds) = av {
304
+ struct_dict(av._iter_struct_av(), flds)
305
+ } else {
306
+ unreachable!()
307
+ }
308
+ });
309
+
310
+ RArray::from_iter(iter).into_value()
311
+ }
312
+ }
313
+
314
+ impl IntoValue for Wrap<&DurationChunked> {
315
+ fn into_value_with(self, _: &RubyHandle) -> Value {
316
+ let utils = utils();
317
+ let time_unit = Wrap(self.0.time_unit()).into_value();
318
+ let iter = self.0.into_iter().map(|opt_v| {
319
+ opt_v.map(|v| {
320
+ utils
321
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, time_unit))
322
+ .unwrap()
323
+ })
324
+ });
325
+ RArray::from_iter(iter).into_value()
326
+ }
327
+ }
328
+
329
+ impl IntoValue for Wrap<&DatetimeChunked> {
330
+ fn into_value_with(self, _: &RubyHandle) -> Value {
331
+ let utils = utils();
332
+ let time_unit = Wrap(self.0.time_unit()).into_value();
333
+ let time_zone = self.0.time_zone().clone().into_value();
334
+ let iter = self.0.into_iter().map(|opt_v| {
335
+ opt_v.map(|v| {
336
+ utils
337
+ .funcall::<_, _, Value>("_to_ruby_datetime", (v, time_unit, time_zone))
338
+ .unwrap()
339
+ })
340
+ });
341
+ RArray::from_iter(iter).into_value()
342
+ }
343
+ }
344
+
345
+ impl IntoValue for Wrap<&TimeChunked> {
346
+ fn into_value_with(self, _: &RubyHandle) -> Value {
347
+ let utils = utils();
348
+ let iter = self.0.into_iter().map(|opt_v| {
349
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_time", (v,)).unwrap())
350
+ });
351
+ RArray::from_iter(iter).into_value()
352
+ }
353
+ }
354
+
355
+ impl IntoValue for Wrap<&DateChunked> {
356
+ fn into_value_with(self, _: &RubyHandle) -> Value {
357
+ let utils = utils();
358
+ let iter = self.0.into_iter().map(|opt_v| {
359
+ opt_v.map(|v| utils.funcall::<_, _, Value>("_to_ruby_date", (v,)).unwrap())
360
+ });
361
+ RArray::from_iter(iter).into_value()
362
+ }
363
+ }
364
+
365
+ impl IntoValue for Wrap<&DecimalChunked> {
366
+ fn into_value_with(self, _: &RubyHandle) -> Value {
367
+ let utils = utils();
368
+ let rb_scale = (-(self.0.scale() as i32)).into_value();
369
+ let iter = self.0.into_iter().map(|opt_v| {
370
+ opt_v.map(|v| {
371
+ utils
372
+ .funcall::<_, _, Value>("_to_ruby_decimal", (v.to_string(), rb_scale))
373
+ .unwrap()
374
+ })
375
+ });
376
+ RArray::from_iter(iter).into_value()
377
+ }
378
+ }
379
+
380
+ fn abs_decimal_from_digits(digits: String, exp: i32) -> Option<(i128, usize)> {
381
+ match digits.parse::<i128>() {
382
+ Ok(v) => Some((v, ((digits.len() as i32) - exp) as usize)),
383
+ Err(_) => None,
384
+ }
385
+ }
386
+
387
+ impl TryConvert for Wrap<Field> {
388
+ fn try_convert(ob: Value) -> RbResult<Self> {
389
+ let name: String = ob.funcall("name", ())?;
390
+ let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
391
+ Ok(Wrap(Field::new(&name, dtype.0)))
392
+ }
393
+ }
394
+
264
395
  impl TryConvert for Wrap<DataType> {
265
396
  fn try_convert(ob: Value) -> RbResult<Self> {
266
397
  let dtype = if ob.is_kind_of(class::class()) {
@@ -282,10 +413,11 @@ impl TryConvert for Wrap<DataType> {
282
413
  "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
283
414
  "Polars::Time" => DataType::Time,
284
415
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
416
+ "Polars::Decimal" => DataType::Decimal(None, None),
285
417
  "Polars::Float32" => DataType::Float32,
286
418
  "Polars::Float64" => DataType::Float64,
287
- // "Polars::Object" => DataType::Object(OBJECT_NAME),
288
- "Polars::List" => DataType::List(Box::new(DataType::Boolean)),
419
+ "Polars::Object" => DataType::Object(OBJECT_NAME),
420
+ "Polars::List" => DataType::List(Box::new(DataType::Null)),
289
421
  "Polars::Null" => DataType::Null,
290
422
  "Polars::Unknown" => DataType::Unknown,
291
423
  dt => {
@@ -294,6 +426,47 @@ impl TryConvert for Wrap<DataType> {
294
426
  )))
295
427
  }
296
428
  }
429
+ // TODO improve
430
+ } else if ob.try_convert::<String>().is_err() {
431
+ let name = unsafe { ob.class().name() }.into_owned();
432
+ match name.as_str() {
433
+ "Polars::Duration" => {
434
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
435
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
436
+ DataType::Duration(time_unit)
437
+ }
438
+ "Polars::Datetime" => {
439
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
440
+ let time_unit = time_unit.try_convert::<Wrap<TimeUnit>>()?.0;
441
+ let time_zone: Value = ob.funcall("time_zone", ()).unwrap();
442
+ let time_zone = time_zone.try_convert()?;
443
+ DataType::Datetime(time_unit, time_zone)
444
+ }
445
+ "Polars::Decimal" => {
446
+ let precision = ob.funcall::<_, _, Value>("precision", ())?.try_convert()?;
447
+ let scale = ob.funcall::<_, _, Value>("scale", ())?.try_convert()?;
448
+ DataType::Decimal(precision, Some(scale))
449
+ }
450
+ "Polars::List" => {
451
+ let inner: Value = ob.funcall("inner", ()).unwrap();
452
+ let inner = inner.try_convert::<Wrap<DataType>>()?;
453
+ DataType::List(Box::new(inner.0))
454
+ }
455
+ "Polars::Struct" => {
456
+ let arr: RArray = ob.funcall("fields", ())?;
457
+ let mut fields = Vec::with_capacity(arr.len());
458
+ for v in arr.each() {
459
+ fields.push(v?.try_convert::<Wrap<Field>>()?.0);
460
+ }
461
+ DataType::Struct(fields)
462
+ }
463
+ dt => {
464
+ return Err(RbTypeError::new_err(format!(
465
+ "A {dt} object is not a correct polars DataType. \
466
+ Hint: use the class without instantiating it.",
467
+ )))
468
+ }
469
+ }
297
470
  } else {
298
471
  match ob.try_convert::<String>()?.as_str() {
299
472
  "u8" => DataType::UInt8,
@@ -336,16 +509,21 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
336
509
  Ok(AnyValue::Boolean(ob.try_convert::<bool>()?).into())
337
510
  } else if let Some(v) = Integer::from_value(ob) {
338
511
  Ok(AnyValue::Int64(v.to_i64()?).into())
339
- } else if let Some(v) = RFloat::from_value(ob) {
512
+ } else if let Some(v) = Float::from_value(ob) {
340
513
  Ok(AnyValue::Float64(v.to_f64()).into())
341
514
  } else if let Some(v) = RString::from_value(ob) {
342
- Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
515
+ if v.enc_get() == Index::utf8() {
516
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
517
+ } else {
518
+ Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
519
+ }
343
520
  // call is_a? for ActiveSupport::TimeWithZone
344
521
  } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
345
522
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
346
523
  let nsec = ob.funcall::<_, _, i64>("nsec", ())?;
347
524
  let v = sec * 1_000_000_000 + nsec;
348
- // TODO support time zone
525
+ // TODO support time zone when possible
526
+ // https://github.com/pola-rs/polars/issues/9103
349
527
  Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None).into())
350
528
  } else if ob.is_nil() {
351
529
  Ok(AnyValue::Null.into())
@@ -366,17 +544,35 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
366
544
  if v.is_empty() {
367
545
  Ok(Wrap(AnyValue::List(Series::new_empty("", &DataType::Null))))
368
546
  } else {
369
- let avs = v.try_convert::<Wrap<Row>>()?.0 .0;
370
- // use first `n` values to infer datatype
371
- // this value is not too large as this will be done with every
372
- // anyvalue that has to be converted, which can be many
373
- let n = 25;
374
- let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
375
- .map_err(RbPolarsErr::from)?;
547
+ let list = v;
548
+
549
+ let mut avs = Vec::with_capacity(25);
550
+ let mut iter = list.each();
551
+
552
+ for item in (&mut iter).take(25) {
553
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
554
+ }
555
+
556
+ let (dtype, _n_types) = any_values_to_dtype(&avs).map_err(RbPolarsErr::from)?;
557
+
558
+ // push the rest
559
+ avs.reserve(list.len());
560
+ for item in iter {
561
+ avs.push(item?.try_convert::<Wrap<AnyValue>>()?.0)
562
+ }
563
+
376
564
  let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
377
565
  .map_err(RbPolarsErr::from)?;
378
566
  Ok(Wrap(AnyValue::List(s)))
379
567
  }
568
+ } else if ob.is_kind_of(crate::rb_modules::datetime()) {
569
+ let sec: i64 = ob.funcall("to_i", ())?;
570
+ let nsec: i64 = ob.funcall("nsec", ())?;
571
+ Ok(Wrap(AnyValue::Datetime(
572
+ sec * 1_000_000_000 + nsec,
573
+ TimeUnit::Nanoseconds,
574
+ &None,
575
+ )))
380
576
  } else if ob.is_kind_of(crate::rb_modules::date()) {
381
577
  // convert to DateTime for UTC
382
578
  let v = ob
@@ -384,6 +580,20 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
384
580
  .funcall::<_, _, Value>("to_time", ())?
385
581
  .funcall::<_, _, i64>("to_i", ())?;
386
582
  Ok(Wrap(AnyValue::Date((v / 86400) as i32)))
583
+ } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
584
+ let (sign, digits, _, exp): (i8, String, i32, i32) = ob
585
+ .funcall::<_, _, Value>("split", ())
586
+ .unwrap()
587
+ .try_convert()
588
+ .unwrap();
589
+ let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
590
+ RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
591
+ })?;
592
+ if sign < 0 {
593
+ // TODO better error
594
+ v = v.checked_neg().unwrap();
595
+ }
596
+ Ok(Wrap(AnyValue::Decimal(v, scale)))
387
597
  } else {
388
598
  Err(RbPolarsErr::other(format!(
389
599
  "object type not supported {:?}",
@@ -410,12 +620,12 @@ impl TryConvert for Wrap<Schema> {
410
620
 
411
621
  let mut schema = Vec::new();
412
622
  dict.foreach(|key: String, val: Wrap<DataType>| {
413
- schema.push(Field::new(&key, val.0));
623
+ schema.push(Ok(Field::new(&key, val.0)));
414
624
  Ok(ForEach::Continue)
415
625
  })
416
626
  .unwrap();
417
627
 
418
- Ok(Wrap(schema.into_iter().into()))
628
+ Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
419
629
  }
420
630
  }
421
631