polars-df 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml CHANGED
@@ -3,8 +3,6 @@ members = ["ext/polars"]
3
3
 
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
- halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
6
 
9
7
  [profile.release]
10
8
  strip = true
data/README.md CHANGED
@@ -50,14 +50,43 @@ From Parquet
50
50
 
51
51
  ```ruby
52
52
  Polars.read_parquet("file.parquet")
53
+
54
+ # or lazily with
55
+ Polars.scan_parquet("file.parquet")
53
56
  ```
54
57
 
55
58
  From Active Record
56
59
 
57
60
  ```ruby
58
- Polars.read_sql(User.all)
61
+ Polars.read_database(User.all)
62
+ # or
63
+ Polars.read_database("SELECT * FROM users")
64
+ ```
65
+
66
+ From JSON
67
+
68
+ ```ruby
69
+ Polars.read_json("file.json")
59
70
  # or
60
- Polars.read_sql("SELECT * FROM users")
71
+ Polars.read_ndjson("file.ndjson")
72
+
73
+ # or lazily with
74
+ Polars.scan_ndjson("file.ndjson")
75
+ ```
76
+
77
+ From Feather / Arrow IPC
78
+
79
+ ```ruby
80
+ Polars.read_ipc("file.arrow")
81
+
82
+ # or lazily with
83
+ Polars.scan_ipc("file.arrow")
84
+ ```
85
+
86
+ From Avro
87
+
88
+ ```ruby
89
+ Polars.read_avro("file.avro")
61
90
  ```
62
91
 
63
92
  From a hash
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.3.1"
3
+ version = "0.5.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,13 +12,15 @@ crate-type = ["cdylib"]
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
14
  magnus = "0.5"
15
- polars-core = "0.27.0"
15
+ polars-core = "0.29.0"
16
16
  serde_json = "1"
17
+ smartstring = "1"
17
18
 
18
19
  [dependencies.polars]
19
- version = "0.27.0"
20
+ version = "0.29.0"
20
21
  features = [
21
22
  "abs",
23
+ "approx_unique",
22
24
  "arange",
23
25
  "arg_where",
24
26
  "asof_join",
@@ -26,7 +28,7 @@ features = [
26
28
  "binary_encoding",
27
29
  "concat_str",
28
30
  "cse",
29
- "csv-file",
31
+ "csv",
30
32
  "cum_agg",
31
33
  "cumulative_eval",
32
34
  "dataframe_arithmetic",
@@ -44,10 +46,13 @@ features = [
44
46
  "ipc",
45
47
  "is_first",
46
48
  "is_in",
49
+ "is_unique",
47
50
  "json",
48
51
  "lazy",
49
52
  "lazy_regex",
53
+ "list_count",
50
54
  "list_eval",
55
+ "list_take",
51
56
  "list_to_struct",
52
57
  "log",
53
58
  "meta",
@@ -73,6 +78,7 @@ features = [
73
78
  "serde-lazy",
74
79
  "sign",
75
80
  "string_encoding",
81
+ "string_from_radix",
76
82
  "string_justify",
77
83
  "strings",
78
84
  "timezones",
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
202
202
  }
203
203
 
204
204
  /// Apply a lambda with list output type
205
- pub fn apply_lambda_with_list_out_type<'a>(
206
- df: &'a DataFrame,
205
+ pub fn apply_lambda_with_list_out_type(
206
+ df: &DataFrame,
207
207
  lambda: Value,
208
208
  init_null_count: usize,
209
209
  first_value: Option<&Series>,
@@ -1,8 +1,7 @@
1
1
  use magnus::Value;
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::lazy::dsl::RbExpr;
5
- use crate::Wrap;
4
+ use crate::{RbExpr, Wrap};
6
5
 
7
6
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
7
  todo!();
@@ -1,4 +1,5 @@
1
1
  pub mod dataframe;
2
+ pub mod lazy;
2
3
  pub mod series;
3
4
 
4
5
  use magnus::{RHash, Value};
@@ -7,11 +7,17 @@ use std::cell::RefCell;
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
10
11
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
11
12
 
13
+ pub enum BatchedReader {
14
+ MMap(OwnedBatchedCsvReaderMmap),
15
+ Read(OwnedBatchedCsvReader),
16
+ }
17
+
12
18
  #[magnus::wrap(class = "Polars::RbBatchedCsv")]
13
19
  pub struct RbBatchedCsv {
14
- pub reader: RefCell<OwnedBatchedCsvReader>,
20
+ pub reader: RefCell<BatchedReader>,
15
21
  }
16
22
 
17
23
  impl RbBatchedCsv {
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
38
44
  let comment_char: Option<String> = arguments[16].try_convert()?;
39
45
  let quote_char: Option<String> = arguments[17].try_convert()?;
40
46
  let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
41
- let parse_dates: bool = arguments[19].try_convert()?;
47
+ let try_parse_dates: bool = arguments[19].try_convert()?;
42
48
  let skip_rows_after_header: usize = arguments[20].try_convert()?;
43
49
  let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
44
50
  let sample_size: usize = arguments[22].try_convert()?;
@@ -62,11 +68,13 @@ impl RbBatchedCsv {
62
68
  };
63
69
 
64
70
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
65
- let fields = overwrite_dtype.iter().map(|(name, dtype)| {
66
- let dtype = dtype.0.clone();
67
- Field::new(name, dtype)
68
- });
69
- Schema::from(fields)
71
+ overwrite_dtype
72
+ .iter()
73
+ .map(|(name, dtype)| {
74
+ let dtype = dtype.0.clone();
75
+ Field::new(name, dtype)
76
+ })
77
+ .collect::<Schema>()
70
78
  });
71
79
 
72
80
  let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
@@ -95,14 +103,24 @@ impl RbBatchedCsv {
95
103
  .low_memory(low_memory)
96
104
  .with_comment_char(comment_char)
97
105
  .with_null_values(null_values)
98
- .with_parse_dates(parse_dates)
106
+ .with_try_parse_dates(try_parse_dates)
99
107
  .with_quote_char(quote_char)
100
108
  .with_end_of_line_char(eol_char)
101
109
  .with_skip_rows_after_header(skip_rows_after_header)
102
110
  .with_row_count(row_count)
103
- .sample_size(sample_size)
104
- .batched(overwrite_dtype.map(Arc::new))
105
- .map_err(RbPolarsErr::from)?;
111
+ .sample_size(sample_size);
112
+
113
+ let reader = if low_memory {
114
+ let reader = reader
115
+ .batched_read(overwrite_dtype.map(Arc::new))
116
+ .map_err(RbPolarsErr::from)?;
117
+ BatchedReader::Read(reader)
118
+ } else {
119
+ let reader = reader
120
+ .batched_mmap(overwrite_dtype.map(Arc::new))
121
+ .map_err(RbPolarsErr::from)?;
122
+ BatchedReader::MMap(reader)
123
+ };
106
124
 
107
125
  Ok(RbBatchedCsv {
108
126
  reader: RefCell::new(reader),
@@ -110,13 +128,12 @@ impl RbBatchedCsv {
110
128
  }
111
129
 
112
130
  pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
- let batches = self
114
- .reader
115
- .borrow_mut()
116
- .next_batches(n)
117
- .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| {
119
- RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
- }))
131
+ let batches = match &mut *self.reader.borrow_mut() {
132
+ BatchedReader::MMap(reader) => reader.next_batches(n),
133
+ BatchedReader::Read(reader) => reader.next_batches(n),
134
+ }
135
+ .map_err(RbPolarsErr::from)?;
136
+
137
+ Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
121
138
  }
122
139
  }
@@ -1,7 +1,11 @@
1
+ use std::fmt::{Display, Formatter};
2
+ use std::hash::{Hash, Hasher};
3
+
1
4
  use magnus::{
2
5
  class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
6
  RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
7
  };
8
+ use magnus::encoding::{EncodingCapable, Index};
5
9
  use polars::chunked_array::object::PolarsObjectSafe;
6
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
7
11
  use polars::datatypes::AnyValue;
@@ -10,9 +14,9 @@ use polars::frame::NullStrategy;
10
14
  use polars::io::avro::AvroCompression;
11
15
  use polars::prelude::*;
12
16
  use polars::series::ops::NullBehavior;
13
- use std::fmt::{Display, Formatter};
14
- use std::hash::{Hash, Hasher};
17
+ use smartstring::alias::String as SmartString;
15
18
 
19
+ use crate::rb_modules::utils;
16
20
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
17
21
 
18
22
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
@@ -21,6 +25,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
21
25
  unsafe { std::mem::transmute(slice) }
22
26
  }
23
27
 
28
+ pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
29
+ // Safety:
30
+ // Wrap is transparent.
31
+ unsafe { std::mem::transmute(slice) }
32
+ }
33
+
24
34
  pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
25
35
  // Safety:
26
36
  // Wrap is transparent.
@@ -82,6 +92,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
82
92
  }
83
93
  }
84
94
 
95
+ impl TryConvert for Wrap<BinaryChunked> {
96
+ fn try_convert(obj: Value) -> RbResult<Self> {
97
+ let (seq, len) = get_rbseq(obj)?;
98
+ let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
99
+
100
+ for res in seq.each() {
101
+ let item = res?;
102
+ match item.try_convert::<RString>() {
103
+ Ok(val) => builder.append_value(unsafe { val.as_slice() }),
104
+ Err(_) => builder.append_null(),
105
+ }
106
+ }
107
+ Ok(Wrap(builder.finish()))
108
+ }
109
+ }
110
+
85
111
  impl TryConvert for Wrap<NullValues> {
86
112
  fn try_convert(ob: Value) -> RbResult<Self> {
87
113
  if let Ok(s) = ob.try_convert::<String>() {
@@ -98,6 +124,14 @@ impl TryConvert for Wrap<NullValues> {
98
124
  }
99
125
  }
100
126
 
127
+ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
128
+ let dict = RHash::new();
129
+ for (fld, val) in flds.iter().zip(vals) {
130
+ dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
131
+ }
132
+ dict.into_value()
133
+ }
134
+
101
135
  impl IntoValue for Wrap<AnyValue<'_>> {
102
136
  fn into_value_with(self, _: &RubyHandle) -> Value {
103
137
  match self.0 {
@@ -114,7 +148,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
114
148
  AnyValue::Null => *QNIL,
115
149
  AnyValue::Boolean(v) => Value::from(v),
116
150
  AnyValue::Utf8(v) => Value::from(v),
117
- AnyValue::Utf8Owned(_v) => todo!(),
151
+ AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
118
152
  AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
119
153
  AnyValue::Date(v) => class::time()
120
154
  .funcall::<_, _, Value>("at", (v * 86400,))
@@ -154,15 +188,25 @@ impl IntoValue for Wrap<AnyValue<'_>> {
154
188
  t.funcall::<_, _, Value>("utc", ()).unwrap()
155
189
  }
156
190
  }
157
- AnyValue::Duration(_v, _tu) => todo!(),
191
+ AnyValue::Duration(v, tu) => {
192
+ let tu = tu.to_ascii();
193
+ utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
194
+ }
158
195
  AnyValue::Time(_v) => todo!(),
159
196
  AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
160
- ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
161
- AnyValue::StructOwned(_payload) => todo!(),
162
- AnyValue::Object(_v) => todo!(),
163
- AnyValue::ObjectOwned(_v) => todo!(),
164
- AnyValue::Binary(_v) => todo!(),
165
- AnyValue::BinaryOwned(_v) => todo!(),
197
+ ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
+ AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
+ AnyValue::Object(v) => {
200
+ let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
201
+ object.inner
202
+ }
203
+ AnyValue::ObjectOwned(v) => {
204
+ let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
205
+ object.inner
206
+ }
207
+ AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
+ AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
+ AnyValue::Decimal(_v, _scale) => todo!(),
166
210
  }
167
211
  }
168
212
  }
@@ -182,7 +226,7 @@ impl IntoValue for Wrap<DataType> {
182
226
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
183
227
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
184
228
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
185
- DataType::Decimal128(_) => todo!(),
229
+ DataType::Decimal(_precision, _scale) => todo!(),
186
230
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
187
231
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
188
232
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
@@ -210,7 +254,7 @@ impl IntoValue for Wrap<DataType> {
210
254
  DataType::Struct(fields) => {
211
255
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
212
256
  let iter = fields.iter().map(|fld| {
213
- let name = fld.name().clone();
257
+ let name = fld.name().as_str();
214
258
  let dtype = Wrap(fld.data_type().clone());
215
259
  field_class
216
260
  .funcall::<_, _, Value>("new", (name, dtype))
@@ -228,6 +272,75 @@ impl IntoValue for Wrap<DataType> {
228
272
  }
229
273
  }
230
274
 
275
+ impl IntoValue for Wrap<TimeUnit> {
276
+ fn into_value_with(self, _: &RubyHandle) -> Value {
277
+ let tu = match self.0 {
278
+ TimeUnit::Nanoseconds => "ns",
279
+ TimeUnit::Microseconds => "us",
280
+ TimeUnit::Milliseconds => "ms",
281
+ };
282
+ tu.into_value()
283
+ }
284
+ }
285
+
286
+ impl IntoValue for Wrap<&Utf8Chunked> {
287
+ fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ let iter = self.0.into_iter();
289
+ RArray::from_iter(iter).into_value()
290
+ }
291
+ }
292
+
293
+ impl IntoValue for Wrap<&BinaryChunked> {
294
+ fn into_value_with(self, _: &RubyHandle) -> Value {
295
+ let iter = self
296
+ .0
297
+ .into_iter()
298
+ .map(|opt_bytes| opt_bytes.map(RString::from_slice));
299
+ RArray::from_iter(iter).into_value()
300
+ }
301
+ }
302
+
303
+ impl IntoValue for Wrap<&StructChunked> {
304
+ fn into_value_with(self, _: &RubyHandle) -> Value {
305
+ let s = self.0.clone().into_series();
306
+ // todo! iterate its chunks and flatten.
307
+ // make series::iter() accept a chunk index.
308
+ let s = s.rechunk();
309
+ let iter = s.iter().map(|av| {
310
+ if let AnyValue::Struct(_, _, flds) = av {
311
+ struct_dict(av._iter_struct_av(), flds)
312
+ } else {
313
+ unreachable!()
314
+ }
315
+ });
316
+
317
+ RArray::from_iter(iter).into_value()
318
+ }
319
+ }
320
+
321
+ impl IntoValue for Wrap<&DurationChunked> {
322
+ fn into_value_with(self, _: &RubyHandle) -> Value {
323
+ let utils = utils();
324
+ let tu = Wrap(self.0.time_unit()).into_value();
325
+ let iter = self.0.into_iter().map(|opt_v| {
326
+ opt_v.map(|v| {
327
+ utils
328
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
329
+ .unwrap()
330
+ })
331
+ });
332
+ RArray::from_iter(iter).into_value()
333
+ }
334
+ }
335
+
336
+ impl TryConvert for Wrap<Field> {
337
+ fn try_convert(ob: Value) -> RbResult<Self> {
338
+ let name: String = ob.funcall("name", ())?;
339
+ let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
340
+ Ok(Wrap(Field::new(&name, dtype.0)))
341
+ }
342
+ }
343
+
231
344
  impl TryConvert for Wrap<DataType> {
232
345
  fn try_convert(ob: Value) -> RbResult<Self> {
233
346
  let dtype = if ob.is_kind_of(class::class()) {
@@ -261,6 +374,20 @@ impl TryConvert for Wrap<DataType> {
261
374
  )))
262
375
  }
263
376
  }
377
+ // TODO improve
378
+ } else if ob.try_convert::<String>().is_err() {
379
+ let name = unsafe { ob.class().name() }.into_owned();
380
+ match name.as_str() {
381
+ "Polars::Struct" => {
382
+ let arr: RArray = ob.funcall("fields", ())?;
383
+ let mut fields = Vec::with_capacity(arr.len());
384
+ for v in arr.each() {
385
+ fields.push(v?.try_convert::<Wrap<Field>>()?.0);
386
+ }
387
+ DataType::Struct(fields)
388
+ }
389
+ _ => todo!(),
390
+ }
264
391
  } else {
265
392
  match ob.try_convert::<String>()?.as_str() {
266
393
  "u8" => DataType::UInt8,
@@ -306,7 +433,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
306
433
  } else if let Some(v) = RFloat::from_value(ob) {
307
434
  Ok(AnyValue::Float64(v.to_f64()).into())
308
435
  } else if let Some(v) = RString::from_value(ob) {
309
- Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
436
+ if v.enc_get() == Index::utf8() {
437
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
438
+ } else {
439
+ Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
440
+ }
310
441
  // call is_a? for ActiveSupport::TimeWithZone
311
442
  } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
312
443
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
@@ -340,10 +471,14 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
340
471
  let n = 25;
341
472
  let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
342
473
  .map_err(RbPolarsErr::from)?;
343
- let s = Series::from_any_values_and_dtype("", &avs, &dtype)
474
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
344
475
  .map_err(RbPolarsErr::from)?;
345
476
  Ok(Wrap(AnyValue::List(s)))
346
477
  }
478
+ } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
+ let sec: i64 = ob.funcall("to_i", ())?;
480
+ let nsec: i64 = ob.funcall("nsec", ())?;
481
+ Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
347
482
  } else if ob.is_kind_of(crate::rb_modules::date()) {
348
483
  // convert to DateTime for UTC
349
484
  let v = ob
@@ -377,12 +512,12 @@ impl TryConvert for Wrap<Schema> {
377
512
 
378
513
  let mut schema = Vec::new();
379
514
  dict.foreach(|key: String, val: Wrap<DataType>| {
380
- schema.push(Field::new(&key, val.0));
515
+ schema.push(Ok(Field::new(&key, val.0)));
381
516
  Ok(ForEach::Continue)
382
517
  })
383
518
  .unwrap();
384
519
 
385
- Ok(Wrap(schema.into_iter().into()))
520
+ Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
386
521
  }
387
522
  }
388
523
 
@@ -870,3 +1005,11 @@ pub fn parse_parquet_compression(
870
1005
  };
871
1006
  Ok(parsed)
872
1007
  }
1008
+
1009
+ pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1010
+ where
1011
+ I: IntoIterator<Item = S>,
1012
+ S: AsRef<str>,
1013
+ {
1014
+ container.into_iter().map(|s| s.as_ref().into()).collect()
1015
+ }