polars-df 0.3.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/Cargo.lock +486 -380
  4. data/Cargo.toml +0 -2
  5. data/README.md +31 -2
  6. data/ext/polars/Cargo.toml +10 -4
  7. data/ext/polars/src/apply/dataframe.rs +2 -2
  8. data/ext/polars/src/{lazy/apply.rs → apply/lazy.rs} +1 -2
  9. data/ext/polars/src/apply/mod.rs +1 -0
  10. data/ext/polars/src/batched_csv.rs +36 -19
  11. data/ext/polars/src/conversion.rs +159 -16
  12. data/ext/polars/src/dataframe.rs +51 -52
  13. data/ext/polars/src/error.rs +0 -4
  14. data/ext/polars/src/expr/binary.rs +69 -0
  15. data/ext/polars/src/expr/categorical.rs +10 -0
  16. data/ext/polars/src/expr/datetime.rs +223 -0
  17. data/ext/polars/src/{lazy/dsl.rs → expr/general.rs} +22 -799
  18. data/ext/polars/src/expr/list.rs +146 -0
  19. data/ext/polars/src/{lazy → expr}/meta.rs +16 -6
  20. data/ext/polars/src/expr/string.rs +313 -0
  21. data/ext/polars/src/expr/struct.rs +15 -0
  22. data/ext/polars/src/expr.rs +33 -0
  23. data/ext/polars/src/functions/eager.rs +93 -0
  24. data/ext/polars/src/functions/io.rs +34 -0
  25. data/ext/polars/src/functions/lazy.rs +209 -0
  26. data/ext/polars/src/functions/meta.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/whenthen.rs +43 -0
  29. data/ext/polars/src/{lazy/dataframe.rs → lazyframe.rs} +58 -45
  30. data/ext/polars/src/lazygroupby.rs +29 -0
  31. data/ext/polars/src/lib.rs +216 -300
  32. data/ext/polars/src/rb_modules.rs +8 -0
  33. data/ext/polars/src/series/aggregation.rs +83 -0
  34. data/ext/polars/src/series/arithmetic.rs +88 -0
  35. data/ext/polars/src/series/comparison.rs +251 -0
  36. data/ext/polars/src/series/construction.rs +164 -0
  37. data/ext/polars/src/series.rs +103 -531
  38. data/lib/polars/batched_csv_reader.rb +1 -1
  39. data/lib/polars/binary_expr.rb +77 -0
  40. data/lib/polars/binary_name_space.rb +66 -0
  41. data/lib/polars/convert.rb +2 -2
  42. data/lib/polars/data_frame.rb +263 -87
  43. data/lib/polars/data_types.rb +6 -4
  44. data/lib/polars/date_time_expr.rb +148 -8
  45. data/lib/polars/expr.rb +78 -11
  46. data/lib/polars/io.rb +73 -62
  47. data/lib/polars/lazy_frame.rb +107 -10
  48. data/lib/polars/lazy_functions.rb +7 -3
  49. data/lib/polars/list_expr.rb +70 -21
  50. data/lib/polars/list_name_space.rb +2 -2
  51. data/lib/polars/series.rb +190 -74
  52. data/lib/polars/string_expr.rb +150 -44
  53. data/lib/polars/string_name_space.rb +4 -4
  54. data/lib/polars/struct_name_space.rb +32 -0
  55. data/lib/polars/utils.rb +51 -9
  56. data/lib/polars/version.rb +1 -1
  57. data/lib/polars.rb +4 -2
  58. metadata +29 -12
  59. data/ext/polars/src/lazy/mod.rs +0 -5
  60. data/ext/polars/src/lazy/utils.rs +0 -13
  61. data/ext/polars/src/list_construction.rs +0 -100
  62. /data/ext/polars/src/{numo.rs → series/export.rs} +0 -0
  63. /data/ext/polars/src/{set.rs → series/set_at_idx.rs} +0 -0
data/Cargo.toml CHANGED
@@ -3,8 +3,6 @@ members = ["ext/polars"]
3
3
 
4
4
  [patch.crates-io]
5
5
  jsonpath_lib = { git = "https://github.com/ritchie46/jsonpath", rev = "24eaf0b4416edff38a4d1b6b17bc4b9f3f047b4b" }
6
- halfbrown = { git = "https://github.com/Licenser/halfbrown", rev = "952023c5dd6461b009bb5ba66b9aa979bd75949f" }
7
- arrow2 = { git = "https://github.com/ankane/arrow2", rev = "ef0270922a217070ba9942567c0ff3263ae8c531" }
8
6
 
9
7
  [profile.release]
10
8
  strip = true
data/README.md CHANGED
@@ -50,14 +50,43 @@ From Parquet
50
50
 
51
51
  ```ruby
52
52
  Polars.read_parquet("file.parquet")
53
+
54
+ # or lazily with
55
+ Polars.scan_parquet("file.parquet")
53
56
  ```
54
57
 
55
58
  From Active Record
56
59
 
57
60
  ```ruby
58
- Polars.read_sql(User.all)
61
+ Polars.read_database(User.all)
62
+ # or
63
+ Polars.read_database("SELECT * FROM users")
64
+ ```
65
+
66
+ From JSON
67
+
68
+ ```ruby
69
+ Polars.read_json("file.json")
59
70
  # or
60
- Polars.read_sql("SELECT * FROM users")
71
+ Polars.read_ndjson("file.ndjson")
72
+
73
+ # or lazily with
74
+ Polars.scan_ndjson("file.ndjson")
75
+ ```
76
+
77
+ From Feather / Arrow IPC
78
+
79
+ ```ruby
80
+ Polars.read_ipc("file.arrow")
81
+
82
+ # or lazily with
83
+ Polars.scan_ipc("file.arrow")
84
+ ```
85
+
86
+ From Avro
87
+
88
+ ```ruby
89
+ Polars.read_avro("file.avro")
61
90
  ```
62
91
 
63
92
  From a hash
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.3.1"
3
+ version = "0.5.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,13 +12,15 @@ crate-type = ["cdylib"]
12
12
  [dependencies]
13
13
  ahash = "0.8"
14
14
  magnus = "0.5"
15
- polars-core = "0.27.0"
15
+ polars-core = "0.29.0"
16
16
  serde_json = "1"
17
+ smartstring = "1"
17
18
 
18
19
  [dependencies.polars]
19
- version = "0.27.0"
20
+ version = "0.29.0"
20
21
  features = [
21
22
  "abs",
23
+ "approx_unique",
22
24
  "arange",
23
25
  "arg_where",
24
26
  "asof_join",
@@ -26,7 +28,7 @@ features = [
26
28
  "binary_encoding",
27
29
  "concat_str",
28
30
  "cse",
29
- "csv-file",
31
+ "csv",
30
32
  "cum_agg",
31
33
  "cumulative_eval",
32
34
  "dataframe_arithmetic",
@@ -44,10 +46,13 @@ features = [
44
46
  "ipc",
45
47
  "is_first",
46
48
  "is_in",
49
+ "is_unique",
47
50
  "json",
48
51
  "lazy",
49
52
  "lazy_regex",
53
+ "list_count",
50
54
  "list_eval",
55
+ "list_take",
51
56
  "list_to_struct",
52
57
  "log",
53
58
  "meta",
@@ -73,6 +78,7 @@ features = [
73
78
  "serde-lazy",
74
79
  "sign",
75
80
  "string_encoding",
81
+ "string_from_radix",
76
82
  "string_justify",
77
83
  "strings",
78
84
  "timezones",
@@ -202,8 +202,8 @@ pub fn apply_lambda_with_utf8_out_type(
202
202
  }
203
203
 
204
204
  /// Apply a lambda with list output type
205
- pub fn apply_lambda_with_list_out_type<'a>(
206
- df: &'a DataFrame,
205
+ pub fn apply_lambda_with_list_out_type(
206
+ df: &DataFrame,
207
207
  lambda: Value,
208
208
  init_null_count: usize,
209
209
  first_value: Option<&Series>,
@@ -1,8 +1,7 @@
1
1
  use magnus::Value;
2
2
  use polars::prelude::*;
3
3
 
4
- use crate::lazy::dsl::RbExpr;
5
- use crate::Wrap;
4
+ use crate::{RbExpr, Wrap};
6
5
 
7
6
  pub fn binary_lambda(_lambda: Value, _a: Series, _b: Series) -> PolarsResult<Option<Series>> {
8
7
  todo!();
@@ -1,4 +1,5 @@
1
1
  pub mod dataframe;
2
+ pub mod lazy;
2
3
  pub mod series;
3
4
 
4
5
  use magnus::{RHash, Value};
@@ -7,11 +7,17 @@ use std::cell::RefCell;
7
7
  use std::path::PathBuf;
8
8
 
9
9
  use crate::conversion::*;
10
+ use crate::prelude::read_impl::OwnedBatchedCsvReaderMmap;
10
11
  use crate::{RbDataFrame, RbPolarsErr, RbResult};
11
12
 
13
+ pub enum BatchedReader {
14
+ MMap(OwnedBatchedCsvReaderMmap),
15
+ Read(OwnedBatchedCsvReader),
16
+ }
17
+
12
18
  #[magnus::wrap(class = "Polars::RbBatchedCsv")]
13
19
  pub struct RbBatchedCsv {
14
- pub reader: RefCell<OwnedBatchedCsvReader>,
20
+ pub reader: RefCell<BatchedReader>,
15
21
  }
16
22
 
17
23
  impl RbBatchedCsv {
@@ -38,7 +44,7 @@ impl RbBatchedCsv {
38
44
  let comment_char: Option<String> = arguments[16].try_convert()?;
39
45
  let quote_char: Option<String> = arguments[17].try_convert()?;
40
46
  let null_values: Option<Wrap<NullValues>> = arguments[18].try_convert()?;
41
- let parse_dates: bool = arguments[19].try_convert()?;
47
+ let try_parse_dates: bool = arguments[19].try_convert()?;
42
48
  let skip_rows_after_header: usize = arguments[20].try_convert()?;
43
49
  let row_count: Option<(String, IdxSize)> = arguments[21].try_convert()?;
44
50
  let sample_size: usize = arguments[22].try_convert()?;
@@ -62,11 +68,13 @@ impl RbBatchedCsv {
62
68
  };
63
69
 
64
70
  let overwrite_dtype = overwrite_dtype.map(|overwrite_dtype| {
65
- let fields = overwrite_dtype.iter().map(|(name, dtype)| {
66
- let dtype = dtype.0.clone();
67
- Field::new(name, dtype)
68
- });
69
- Schema::from(fields)
71
+ overwrite_dtype
72
+ .iter()
73
+ .map(|(name, dtype)| {
74
+ let dtype = dtype.0.clone();
75
+ Field::new(name, dtype)
76
+ })
77
+ .collect::<Schema>()
70
78
  });
71
79
 
72
80
  let overwrite_dtype_slice = overwrite_dtype_slice.map(|overwrite_dtype| {
@@ -95,14 +103,24 @@ impl RbBatchedCsv {
95
103
  .low_memory(low_memory)
96
104
  .with_comment_char(comment_char)
97
105
  .with_null_values(null_values)
98
- .with_parse_dates(parse_dates)
106
+ .with_try_parse_dates(try_parse_dates)
99
107
  .with_quote_char(quote_char)
100
108
  .with_end_of_line_char(eol_char)
101
109
  .with_skip_rows_after_header(skip_rows_after_header)
102
110
  .with_row_count(row_count)
103
- .sample_size(sample_size)
104
- .batched(overwrite_dtype.map(Arc::new))
105
- .map_err(RbPolarsErr::from)?;
111
+ .sample_size(sample_size);
112
+
113
+ let reader = if low_memory {
114
+ let reader = reader
115
+ .batched_read(overwrite_dtype.map(Arc::new))
116
+ .map_err(RbPolarsErr::from)?;
117
+ BatchedReader::Read(reader)
118
+ } else {
119
+ let reader = reader
120
+ .batched_mmap(overwrite_dtype.map(Arc::new))
121
+ .map_err(RbPolarsErr::from)?;
122
+ BatchedReader::MMap(reader)
123
+ };
106
124
 
107
125
  Ok(RbBatchedCsv {
108
126
  reader: RefCell::new(reader),
@@ -110,13 +128,12 @@ impl RbBatchedCsv {
110
128
  }
111
129
 
112
130
  pub fn next_batches(&self, n: usize) -> RbResult<Option<RArray>> {
113
- let batches = self
114
- .reader
115
- .borrow_mut()
116
- .next_batches(n)
117
- .map_err(RbPolarsErr::from)?;
118
- Ok(batches.map(|batches| {
119
- RArray::from_iter(batches.into_iter().map(|out| RbDataFrame::from(out.1)))
120
- }))
131
+ let batches = match &mut *self.reader.borrow_mut() {
132
+ BatchedReader::MMap(reader) => reader.next_batches(n),
133
+ BatchedReader::Read(reader) => reader.next_batches(n),
134
+ }
135
+ .map_err(RbPolarsErr::from)?;
136
+
137
+ Ok(batches.map(|batches| RArray::from_iter(batches.into_iter().map(RbDataFrame::from))))
121
138
  }
122
139
  }
@@ -1,7 +1,11 @@
1
+ use std::fmt::{Display, Formatter};
2
+ use std::hash::{Hash, Hasher};
3
+
1
4
  use magnus::{
2
5
  class, exception, r_hash::ForEach, ruby_handle::RubyHandle, Integer, IntoValue, Module, RArray,
3
6
  RFloat, RHash, RString, Symbol, TryConvert, Value, QNIL,
4
7
  };
8
+ use magnus::encoding::{EncodingCapable, Index};
5
9
  use polars::chunked_array::object::PolarsObjectSafe;
6
10
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
7
11
  use polars::datatypes::AnyValue;
@@ -10,9 +14,9 @@ use polars::frame::NullStrategy;
10
14
  use polars::io::avro::AvroCompression;
11
15
  use polars::prelude::*;
12
16
  use polars::series::ops::NullBehavior;
13
- use std::fmt::{Display, Formatter};
14
- use std::hash::{Hash, Hasher};
17
+ use smartstring::alias::String as SmartString;
15
18
 
19
+ use crate::rb_modules::utils;
16
20
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbValueError};
17
21
 
18
22
  pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
@@ -21,6 +25,12 @@ pub(crate) fn slice_to_wrapped<T>(slice: &[T]) -> &[Wrap<T>] {
21
25
  unsafe { std::mem::transmute(slice) }
22
26
  }
23
27
 
28
+ pub(crate) fn slice_extract_wrapped<T>(slice: &[Wrap<T>]) -> &[T] {
29
+ // Safety:
30
+ // Wrap is transparent.
31
+ unsafe { std::mem::transmute(slice) }
32
+ }
33
+
24
34
  pub(crate) fn vec_extract_wrapped<T>(buf: Vec<Wrap<T>>) -> Vec<T> {
25
35
  // Safety:
26
36
  // Wrap is transparent.
@@ -82,6 +92,22 @@ impl TryConvert for Wrap<Utf8Chunked> {
82
92
  }
83
93
  }
84
94
 
95
+ impl TryConvert for Wrap<BinaryChunked> {
96
+ fn try_convert(obj: Value) -> RbResult<Self> {
97
+ let (seq, len) = get_rbseq(obj)?;
98
+ let mut builder = BinaryChunkedBuilder::new("", len, len * 25);
99
+
100
+ for res in seq.each() {
101
+ let item = res?;
102
+ match item.try_convert::<RString>() {
103
+ Ok(val) => builder.append_value(unsafe { val.as_slice() }),
104
+ Err(_) => builder.append_null(),
105
+ }
106
+ }
107
+ Ok(Wrap(builder.finish()))
108
+ }
109
+ }
110
+
85
111
  impl TryConvert for Wrap<NullValues> {
86
112
  fn try_convert(ob: Value) -> RbResult<Self> {
87
113
  if let Ok(s) = ob.try_convert::<String>() {
@@ -98,6 +124,14 @@ impl TryConvert for Wrap<NullValues> {
98
124
  }
99
125
  }
100
126
 
127
+ fn struct_dict<'a>(vals: impl Iterator<Item = AnyValue<'a>>, flds: &[Field]) -> Value {
128
+ let dict = RHash::new();
129
+ for (fld, val) in flds.iter().zip(vals) {
130
+ dict.aset(fld.name().as_str(), Wrap(val)).unwrap()
131
+ }
132
+ dict.into_value()
133
+ }
134
+
101
135
  impl IntoValue for Wrap<AnyValue<'_>> {
102
136
  fn into_value_with(self, _: &RubyHandle) -> Value {
103
137
  match self.0 {
@@ -114,7 +148,7 @@ impl IntoValue for Wrap<AnyValue<'_>> {
114
148
  AnyValue::Null => *QNIL,
115
149
  AnyValue::Boolean(v) => Value::from(v),
116
150
  AnyValue::Utf8(v) => Value::from(v),
117
- AnyValue::Utf8Owned(_v) => todo!(),
151
+ AnyValue::Utf8Owned(v) => Value::from(v.as_str()),
118
152
  AnyValue::Categorical(_idx, _rev, _arr) => todo!(),
119
153
  AnyValue::Date(v) => class::time()
120
154
  .funcall::<_, _, Value>("at", (v * 86400,))
@@ -154,15 +188,25 @@ impl IntoValue for Wrap<AnyValue<'_>> {
154
188
  t.funcall::<_, _, Value>("utc", ()).unwrap()
155
189
  }
156
190
  }
157
- AnyValue::Duration(_v, _tu) => todo!(),
191
+ AnyValue::Duration(v, tu) => {
192
+ let tu = tu.to_ascii();
193
+ utils().funcall("_to_ruby_duration", (v, tu)).unwrap()
194
+ }
158
195
  AnyValue::Time(_v) => todo!(),
159
196
  AnyValue::List(v) => RbSeries::new(v).to_a().into_value(),
160
- ref _av @ AnyValue::Struct(_, _, _flds) => todo!(),
161
- AnyValue::StructOwned(_payload) => todo!(),
162
- AnyValue::Object(_v) => todo!(),
163
- AnyValue::ObjectOwned(_v) => todo!(),
164
- AnyValue::Binary(_v) => todo!(),
165
- AnyValue::BinaryOwned(_v) => todo!(),
197
+ ref av @ AnyValue::Struct(_, _, flds) => struct_dict(av._iter_struct_av(), flds),
198
+ AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
199
+ AnyValue::Object(v) => {
200
+ let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
201
+ object.inner
202
+ }
203
+ AnyValue::ObjectOwned(v) => {
204
+ let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
205
+ object.inner
206
+ }
207
+ AnyValue::Binary(v) => RString::from_slice(v).into_value(),
208
+ AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
209
+ AnyValue::Decimal(_v, _scale) => todo!(),
166
210
  }
167
211
  }
168
212
  }
@@ -182,7 +226,7 @@ impl IntoValue for Wrap<DataType> {
182
226
  DataType::UInt64 => pl.const_get::<_, Value>("UInt64").unwrap(),
183
227
  DataType::Float32 => pl.const_get::<_, Value>("Float32").unwrap(),
184
228
  DataType::Float64 => pl.const_get::<_, Value>("Float64").unwrap(),
185
- DataType::Decimal128(_) => todo!(),
229
+ DataType::Decimal(_precision, _scale) => todo!(),
186
230
  DataType::Boolean => pl.const_get::<_, Value>("Boolean").unwrap(),
187
231
  DataType::Utf8 => pl.const_get::<_, Value>("Utf8").unwrap(),
188
232
  DataType::Binary => pl.const_get::<_, Value>("Binary").unwrap(),
@@ -210,7 +254,7 @@ impl IntoValue for Wrap<DataType> {
210
254
  DataType::Struct(fields) => {
211
255
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
212
256
  let iter = fields.iter().map(|fld| {
213
- let name = fld.name().clone();
257
+ let name = fld.name().as_str();
214
258
  let dtype = Wrap(fld.data_type().clone());
215
259
  field_class
216
260
  .funcall::<_, _, Value>("new", (name, dtype))
@@ -228,6 +272,75 @@ impl IntoValue for Wrap<DataType> {
228
272
  }
229
273
  }
230
274
 
275
+ impl IntoValue for Wrap<TimeUnit> {
276
+ fn into_value_with(self, _: &RubyHandle) -> Value {
277
+ let tu = match self.0 {
278
+ TimeUnit::Nanoseconds => "ns",
279
+ TimeUnit::Microseconds => "us",
280
+ TimeUnit::Milliseconds => "ms",
281
+ };
282
+ tu.into_value()
283
+ }
284
+ }
285
+
286
+ impl IntoValue for Wrap<&Utf8Chunked> {
287
+ fn into_value_with(self, _: &RubyHandle) -> Value {
288
+ let iter = self.0.into_iter();
289
+ RArray::from_iter(iter).into_value()
290
+ }
291
+ }
292
+
293
+ impl IntoValue for Wrap<&BinaryChunked> {
294
+ fn into_value_with(self, _: &RubyHandle) -> Value {
295
+ let iter = self
296
+ .0
297
+ .into_iter()
298
+ .map(|opt_bytes| opt_bytes.map(RString::from_slice));
299
+ RArray::from_iter(iter).into_value()
300
+ }
301
+ }
302
+
303
+ impl IntoValue for Wrap<&StructChunked> {
304
+ fn into_value_with(self, _: &RubyHandle) -> Value {
305
+ let s = self.0.clone().into_series();
306
+ // todo! iterate its chunks and flatten.
307
+ // make series::iter() accept a chunk index.
308
+ let s = s.rechunk();
309
+ let iter = s.iter().map(|av| {
310
+ if let AnyValue::Struct(_, _, flds) = av {
311
+ struct_dict(av._iter_struct_av(), flds)
312
+ } else {
313
+ unreachable!()
314
+ }
315
+ });
316
+
317
+ RArray::from_iter(iter).into_value()
318
+ }
319
+ }
320
+
321
+ impl IntoValue for Wrap<&DurationChunked> {
322
+ fn into_value_with(self, _: &RubyHandle) -> Value {
323
+ let utils = utils();
324
+ let tu = Wrap(self.0.time_unit()).into_value();
325
+ let iter = self.0.into_iter().map(|opt_v| {
326
+ opt_v.map(|v| {
327
+ utils
328
+ .funcall::<_, _, Value>("_to_ruby_duration", (v, tu))
329
+ .unwrap()
330
+ })
331
+ });
332
+ RArray::from_iter(iter).into_value()
333
+ }
334
+ }
335
+
336
+ impl TryConvert for Wrap<Field> {
337
+ fn try_convert(ob: Value) -> RbResult<Self> {
338
+ let name: String = ob.funcall("name", ())?;
339
+ let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
340
+ Ok(Wrap(Field::new(&name, dtype.0)))
341
+ }
342
+ }
343
+
231
344
  impl TryConvert for Wrap<DataType> {
232
345
  fn try_convert(ob: Value) -> RbResult<Self> {
233
346
  let dtype = if ob.is_kind_of(class::class()) {
@@ -261,6 +374,20 @@ impl TryConvert for Wrap<DataType> {
261
374
  )))
262
375
  }
263
376
  }
377
+ // TODO improve
378
+ } else if ob.try_convert::<String>().is_err() {
379
+ let name = unsafe { ob.class().name() }.into_owned();
380
+ match name.as_str() {
381
+ "Polars::Struct" => {
382
+ let arr: RArray = ob.funcall("fields", ())?;
383
+ let mut fields = Vec::with_capacity(arr.len());
384
+ for v in arr.each() {
385
+ fields.push(v?.try_convert::<Wrap<Field>>()?.0);
386
+ }
387
+ DataType::Struct(fields)
388
+ }
389
+ _ => todo!(),
390
+ }
264
391
  } else {
265
392
  match ob.try_convert::<String>()?.as_str() {
266
393
  "u8" => DataType::UInt8,
@@ -306,7 +433,11 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
306
433
  } else if let Some(v) = RFloat::from_value(ob) {
307
434
  Ok(AnyValue::Float64(v.to_f64()).into())
308
435
  } else if let Some(v) = RString::from_value(ob) {
309
- Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
436
+ if v.enc_get() == Index::utf8() {
437
+ Ok(AnyValue::Utf8Owned(v.to_string()?.into()).into())
438
+ } else {
439
+ Ok(AnyValue::BinaryOwned(unsafe { v.as_slice() }.to_vec()).into())
440
+ }
310
441
  // call is_a? for ActiveSupport::TimeWithZone
311
442
  } else if ob.funcall::<_, _, bool>("is_a?", (class::time(),))? {
312
443
  let sec = ob.funcall::<_, _, i64>("to_i", ())?;
@@ -340,10 +471,14 @@ impl<'s> TryConvert for Wrap<AnyValue<'s>> {
340
471
  let n = 25;
341
472
  let dtype = any_values_to_dtype(&avs[..std::cmp::min(avs.len(), n)])
342
473
  .map_err(RbPolarsErr::from)?;
343
- let s = Series::from_any_values_and_dtype("", &avs, &dtype)
474
+ let s = Series::from_any_values_and_dtype("", &avs, &dtype, true)
344
475
  .map_err(RbPolarsErr::from)?;
345
476
  Ok(Wrap(AnyValue::List(s)))
346
477
  }
478
+ } else if ob.is_kind_of(crate::rb_modules::datetime()) {
479
+ let sec: i64 = ob.funcall("to_i", ())?;
480
+ let nsec: i64 = ob.funcall("nsec", ())?;
481
+ Ok(Wrap(AnyValue::Datetime(sec * 1_000_000_000 + nsec, TimeUnit::Nanoseconds, &None)))
347
482
  } else if ob.is_kind_of(crate::rb_modules::date()) {
348
483
  // convert to DateTime for UTC
349
484
  let v = ob
@@ -377,12 +512,12 @@ impl TryConvert for Wrap<Schema> {
377
512
 
378
513
  let mut schema = Vec::new();
379
514
  dict.foreach(|key: String, val: Wrap<DataType>| {
380
- schema.push(Field::new(&key, val.0));
515
+ schema.push(Ok(Field::new(&key, val.0)));
381
516
  Ok(ForEach::Continue)
382
517
  })
383
518
  .unwrap();
384
519
 
385
- Ok(Wrap(schema.into_iter().into()))
520
+ Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
386
521
  }
387
522
  }
388
523
 
@@ -870,3 +1005,11 @@ pub fn parse_parquet_compression(
870
1005
  };
871
1006
  Ok(parsed)
872
1007
  }
1008
+
1009
+ pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1010
+ where
1011
+ I: IntoIterator<Item = S>,
1012
+ S: AsRef<str>,
1013
+ {
1014
+ container.into_iter().map(|s| s.as_ref().into()).collect()
1015
+ }