polars-df 0.14.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/Cargo.lock +1296 -283
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +125 -28
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +16 -11
  13. data/ext/polars/src/dataframe/io.rs +73 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +13 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/rolling.rs +6 -7
  23. data/ext/polars/src/expr/string.rs +9 -36
  24. data/ext/polars/src/file.rs +59 -22
  25. data/ext/polars/src/functions/business.rs +15 -0
  26. data/ext/polars/src/functions/lazy.rs +17 -8
  27. data/ext/polars/src/functions/mod.rs +1 -0
  28. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  29. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  30. data/ext/polars/src/interop/mod.rs +1 -0
  31. data/ext/polars/src/lazyframe/general.rs +877 -0
  32. data/ext/polars/src/lazyframe/mod.rs +3 -827
  33. data/ext/polars/src/lazyframe/serde.rs +31 -0
  34. data/ext/polars/src/lib.rs +45 -14
  35. data/ext/polars/src/map/dataframe.rs +10 -6
  36. data/ext/polars/src/map/lazy.rs +65 -4
  37. data/ext/polars/src/map/mod.rs +9 -8
  38. data/ext/polars/src/on_startup.rs +1 -1
  39. data/ext/polars/src/series/aggregation.rs +1 -5
  40. data/ext/polars/src/series/arithmetic.rs +10 -10
  41. data/ext/polars/src/series/construction.rs +2 -2
  42. data/ext/polars/src/series/export.rs +1 -1
  43. data/ext/polars/src/series/general.rs +643 -0
  44. data/ext/polars/src/series/import.rs +55 -0
  45. data/ext/polars/src/series/mod.rs +11 -638
  46. data/ext/polars/src/series/scatter.rs +2 -2
  47. data/ext/polars/src/utils.rs +0 -20
  48. data/lib/polars/batched_csv_reader.rb +0 -2
  49. data/lib/polars/binary_expr.rb +133 -9
  50. data/lib/polars/binary_name_space.rb +101 -6
  51. data/lib/polars/config.rb +4 -0
  52. data/lib/polars/data_frame.rb +275 -52
  53. data/lib/polars/data_type_group.rb +28 -0
  54. data/lib/polars/data_types.rb +2 -0
  55. data/lib/polars/date_time_expr.rb +244 -0
  56. data/lib/polars/date_time_name_space.rb +87 -0
  57. data/lib/polars/expr.rb +103 -2
  58. data/lib/polars/functions/as_datatype.rb +51 -2
  59. data/lib/polars/functions/col.rb +1 -1
  60. data/lib/polars/functions/eager.rb +1 -3
  61. data/lib/polars/functions/lazy.rb +88 -10
  62. data/lib/polars/functions/range/time_range.rb +21 -21
  63. data/lib/polars/io/csv.rb +14 -16
  64. data/lib/polars/io/database.rb +2 -2
  65. data/lib/polars/io/ipc.rb +14 -4
  66. data/lib/polars/io/ndjson.rb +10 -0
  67. data/lib/polars/io/parquet.rb +168 -111
  68. data/lib/polars/lazy_frame.rb +649 -15
  69. data/lib/polars/list_name_space.rb +169 -0
  70. data/lib/polars/selectors.rb +1144 -0
  71. data/lib/polars/series.rb +465 -35
  72. data/lib/polars/string_cache.rb +27 -1
  73. data/lib/polars/string_expr.rb +0 -1
  74. data/lib/polars/string_name_space.rb +73 -3
  75. data/lib/polars/struct_name_space.rb +31 -7
  76. data/lib/polars/utils/various.rb +5 -1
  77. data/lib/polars/utils.rb +45 -10
  78. data/lib/polars/version.rb +1 -1
  79. data/lib/polars.rb +2 -1
  80. metadata +14 -4
  81. data/lib/polars/functions.rb +0 -57
data/LICENSE.txt CHANGED
@@ -1,5 +1,6 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
2
  Copyright (c) 2022-2024 Andrew Kane
3
+ Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
 
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -17,8 +17,7 @@ gem "polars-df"
17
17
  This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
18
18
 
19
19
  ```ruby
20
- Polars.read_csv("iris.csv")
21
- .lazy
20
+ Polars.scan_csv("iris.csv")
22
21
  .filter(Polars.col("sepal_length") > 5)
23
22
  .group_by("species")
24
23
  .agg(Polars.all.sum)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.14.0"
3
+ version = "0.15.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,16 +12,20 @@ crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
14
  ahash = "0.8"
15
+ arrow = { package = "polars-arrow", version = "=0.44.2" }
16
+ bytes = "1"
15
17
  chrono = "0.4"
16
18
  either = "1.8"
17
19
  magnus = "0.7"
18
- polars-core = "=0.43.1"
19
- polars-parquet = "=0.43.1"
20
- polars-utils = "=0.43.1"
20
+ polars-core = "=0.44.2"
21
+ polars-plan = "=0.44.2"
22
+ polars-parquet = "=0.44.2"
23
+ polars-utils = "=0.44.2"
24
+ regex = "1"
21
25
  serde_json = "1"
22
26
 
23
27
  [dependencies.polars]
24
- version = "=0.43.1"
28
+ version = "=0.44.2"
25
29
  features = [
26
30
  "abs",
27
31
  "approx_unique",
@@ -30,7 +34,11 @@ features = [
30
34
  "array_count",
31
35
  "asof_join",
32
36
  "avro",
37
+ "aws",
38
+ "azure",
33
39
  "binary_encoding",
40
+ "business",
41
+ "cloud",
34
42
  "concat_str",
35
43
  "cov",
36
44
  "cross_join",
@@ -51,6 +59,8 @@ features = [
51
59
  "extract_jsonpath",
52
60
  "find_many",
53
61
  "fmt",
62
+ "gcp",
63
+ "http",
54
64
  "interpolate",
55
65
  "ipc",
56
66
  "ipc_streaming",
@@ -34,8 +34,7 @@ impl RbBatchedCsv {
34
34
  let n_threads = Option::<usize>::try_convert(arguments[11])?;
35
35
  let path = PathBuf::try_convert(arguments[12])?;
36
36
  let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
37
- // TODO fix
38
- let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
37
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
39
38
  let low_memory = bool::try_convert(arguments[15])?;
40
39
  let comment_prefix = Option::<String>::try_convert(arguments[16])?;
41
40
  let quote_char = Option::<String>::try_convert(arguments[17])?;
@@ -44,11 +43,10 @@ impl RbBatchedCsv {
44
43
  let try_parse_dates = bool::try_convert(arguments[20])?;
45
44
  let skip_rows_after_header = usize::try_convert(arguments[21])?;
46
45
  let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
47
- let sample_size = usize::try_convert(arguments[23])?;
48
- let eol_char = String::try_convert(arguments[24])?;
49
- let raise_if_empty = bool::try_convert(arguments[25])?;
50
- let truncate_ragged_lines = bool::try_convert(arguments[26])?;
51
- let decimal_comma = bool::try_convert(arguments[27])?;
46
+ let eol_char = String::try_convert(arguments[23])?;
47
+ let raise_if_empty = bool::try_convert(arguments[24])?;
48
+ let truncate_ragged_lines = bool::try_convert(arguments[25])?;
49
+ let decimal_comma = bool::try_convert(arguments[26])?;
52
50
  // end arguments
53
51
 
54
52
  let null_values = null_values.map(|w| w.0);
@@ -84,7 +82,7 @@ impl RbBatchedCsv {
84
82
  .collect::<Vec<_>>()
85
83
  });
86
84
 
87
- let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
85
+ let file = std::fs::File::open(path).map_err(RbPolarsErr::from)?;
88
86
  let reader = Box::new(file) as Box<dyn MmapBytesReader>;
89
87
  let reader = CsvReadOptions::default()
90
88
  .with_infer_schema_length(infer_schema_length)
@@ -101,7 +99,6 @@ impl RbBatchedCsv {
101
99
  .with_low_memory(low_memory)
102
100
  .with_skip_rows_after_header(skip_rows_after_header)
103
101
  .with_row_index(row_index)
104
- .with_sample_size(sample_size)
105
102
  .with_raise_if_empty(raise_if_empty)
106
103
  .with_parse_options(
107
104
  CsvParseOptions::default()
@@ -132,7 +129,7 @@ impl RbBatchedCsv {
132
129
  let batches = reader
133
130
  .borrow()
134
131
  .lock()
135
- .map_err(|e| RbPolarsErr::other(e.to_string()))?
132
+ .map_err(|e| RbPolarsErr::Other(e.to_string()))?
136
133
  .next_batches(n)
137
134
  .map_err(RbPolarsErr::from)?;
138
135
 
@@ -7,9 +7,9 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes;
7
7
 
8
8
  use super::{struct_dict, ObjectValue, Wrap};
9
9
 
10
- use crate::error::RbOverflowError;
10
+ use crate::exceptions::RbOverflowError;
11
11
  use crate::rb_modules::utils;
12
- use crate::{RbPolarsErr, RbResult, RbSeries};
12
+ use crate::{RbErr, RbPolarsErr, RbResult, RbSeries};
13
13
 
14
14
  impl IntoValue for Wrap<AnyValue<'_>> {
15
15
  fn into_value_with(self, ruby: &Ruby) -> Value {
@@ -47,15 +47,20 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
47
47
  };
48
48
  s.into_value()
49
49
  }
50
+ AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
51
+ let s = if arr.is_null() {
52
+ rev.get(idx)
53
+ } else {
54
+ unsafe { arr.deref_unchecked().value(idx as usize) }
55
+ };
56
+ s.into_value()
57
+ }
50
58
  AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
51
59
  AnyValue::Datetime(v, time_unit, time_zone) => {
52
- let time_unit = time_unit.to_ascii();
53
- utils()
54
- .funcall(
55
- "_to_ruby_datetime",
56
- (v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
57
- )
58
- .unwrap()
60
+ datetime_to_rb_object(v, time_unit, time_zone)
61
+ }
62
+ AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
63
+ datetime_to_rb_object(v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
59
64
  }
60
65
  AnyValue::Duration(v, time_unit) => {
61
66
  let time_unit = time_unit.to_ascii();
@@ -69,11 +74,11 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
69
74
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
70
75
  AnyValue::Object(v) => {
71
76
  let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
72
- object.to_object()
77
+ object.to_value()
73
78
  }
74
79
  AnyValue::ObjectOwned(v) => {
75
80
  let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
76
- object.to_object()
81
+ object.to_value()
77
82
  }
78
83
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
79
84
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
@@ -83,6 +88,13 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
83
88
  }
84
89
  }
85
90
 
91
+ fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value {
92
+ let tu = tu.to_ascii();
93
+ utils()
94
+ .funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string())))
95
+ .unwrap()
96
+ }
97
+
86
98
  pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
87
99
  // Conversion functions.
88
100
  fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
@@ -164,9 +176,8 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
164
176
  let len = dict.len();
165
177
  let mut keys = Vec::with_capacity(len);
166
178
  let mut vals = Vec::with_capacity(len);
167
- dict.foreach(|k: Value, v: Value| {
168
- let key = String::try_convert(k)?;
169
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
179
+ dict.foreach(|key: String, val: Wrap<AnyValue>| {
180
+ let val = val.0;
170
181
  let dtype = DataType::from(&val);
171
182
  keys.push(Field::new(key.into(), dtype));
172
183
  vals.push(val);
@@ -190,7 +201,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
190
201
  let v = sec * 1_000_000_000 + nsec;
191
202
  // TODO support time zone when possible
192
203
  // https://github.com/pola-rs/polars/issues/9103
193
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
204
+ Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, None))
194
205
  }
195
206
 
196
207
  fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
@@ -199,7 +210,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
199
210
  Ok(AnyValue::Datetime(
200
211
  sec * 1_000_000_000 + nsec,
201
212
  TimeUnit::Nanoseconds,
202
- &None,
213
+ None,
203
214
  ))
204
215
  }
205
216
 
@@ -224,7 +235,9 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
224
235
 
225
236
  let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
226
237
  let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
227
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
238
+ RbErr::from(RbPolarsErr::Other(
239
+ "BigDecimal is too large to fit in Decimal128".into(),
240
+ ))
228
241
  })?;
229
242
  if sign < 0 {
230
243
  // TODO better error
@@ -259,9 +272,6 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
259
272
  } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
260
273
  get_decimal(ob, strict)
261
274
  } else {
262
- Err(RbPolarsErr::other(format!(
263
- "object type not supported {:?}",
264
- ob
265
- )))
275
+ Err(RbPolarsErr::Other(format!("object type not supported {:?}", ob)).into())
266
276
  }
267
277
  }
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,12 +17,15 @@ use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
18
  use polars::frame::NullStrategy;
17
19
  use polars::io::avro::AvroCompression;
20
+ use polars::io::cloud::CloudOptions;
18
21
  use polars::prelude::*;
19
22
  use polars::series::ops::NullBehavior;
20
23
  use polars_core::utils::arrow::array::Array;
21
24
  use polars_core::utils::materialize_dyn_int;
25
+ use polars_plan::plans::ScanSources;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
24
29
  use crate::object::OBJECT_NAME;
25
30
  use crate::rb_modules::series;
26
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
104
109
  .collect(),
105
110
  )))
106
111
  } else {
107
- Err(RbPolarsErr::other(
108
- "could not extract value from null_values argument".into(),
109
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
110
116
  }
111
117
  }
112
118
  }
@@ -328,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
328
334
  )))
329
335
  }
330
336
  }
331
- // TODO improve
332
337
  } else if String::try_convert(ob).is_err() {
333
338
  let name = unsafe { ob.class().name() }.into_owned();
334
339
  match name.as_str() {
@@ -434,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
434
439
  }
435
440
  }
436
441
 
442
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
443
+
437
444
  impl TryConvert for Wrap<StatisticsOptions> {
438
445
  fn try_convert(ob: Value) -> RbResult<Self> {
439
446
  let mut statistics = StatisticsOptions::empty();
@@ -452,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
452
459
  }
453
460
  }
454
461
  Ok(ForEach::Continue)
455
- })
456
- .unwrap();
462
+ })?;
457
463
 
458
464
  Ok(Wrap(statistics))
459
465
  }
@@ -478,13 +484,75 @@ impl TryConvert for Wrap<Schema> {
478
484
  dict.foreach(|key: String, val: Wrap<DataType>| {
479
485
  schema.push(Ok(Field::new((&*key).into(), val.0)));
480
486
  Ok(ForEach::Continue)
481
- })
482
- .unwrap();
487
+ })?;
483
488
 
484
489
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
485
490
  }
486
491
  }
487
492
 
493
+ impl TryConvert for Wrap<ScanSources> {
494
+ fn try_convert(ob: Value) -> RbResult<Self> {
495
+ let list = RArray::try_convert(ob)?;
496
+
497
+ if list.is_empty() {
498
+ return Ok(Wrap(ScanSources::default()));
499
+ }
500
+
501
+ enum MutableSources {
502
+ Paths(Vec<PathBuf>),
503
+ Files(Vec<File>),
504
+ Buffers(Vec<bytes::Bytes>),
505
+ }
506
+
507
+ let num_items = list.len();
508
+ let mut iter = list
509
+ .into_iter()
510
+ .map(|val| get_ruby_scan_source_input(val, false));
511
+
512
+ let Some(first) = iter.next() else {
513
+ return Ok(Wrap(ScanSources::default()));
514
+ };
515
+
516
+ let mut sources = match first? {
517
+ RubyScanSourceInput::Path(path) => {
518
+ let mut sources = Vec::with_capacity(num_items);
519
+ sources.push(path);
520
+ MutableSources::Paths(sources)
521
+ }
522
+ RubyScanSourceInput::File(file) => {
523
+ let mut sources = Vec::with_capacity(num_items);
524
+ sources.push(file);
525
+ MutableSources::Files(sources)
526
+ }
527
+ RubyScanSourceInput::Buffer(buffer) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(buffer);
530
+ MutableSources::Buffers(sources)
531
+ }
532
+ };
533
+
534
+ for source in iter {
535
+ match (&mut sources, source?) {
536
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
537
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
538
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
539
+ _ => {
540
+ return Err(RbTypeError::new_err(
541
+ "Cannot combine in-memory bytes, paths and files for scan sources"
542
+ .to_string(),
543
+ ))
544
+ }
545
+ }
546
+ }
547
+
548
+ Ok(Wrap(match sources {
549
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
550
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
551
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
552
+ }))
553
+ }
554
+ }
555
+
488
556
  #[derive(Clone)]
489
557
  pub struct ObjectValue {
490
558
  pub inner: Opaque<Value>,
@@ -493,7 +561,7 @@ pub struct ObjectValue {
493
561
  impl Debug for ObjectValue {
494
562
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
495
563
  f.debug_struct("ObjectValue")
496
- .field("inner", &self.to_object())
564
+ .field("inner", &self.to_value())
497
565
  .finish()
498
566
  }
499
567
  }
@@ -501,7 +569,7 @@ impl Debug for ObjectValue {
501
569
  impl Hash for ObjectValue {
502
570
  fn hash<H: Hasher>(&self, state: &mut H) {
503
571
  let h = self
504
- .to_object()
572
+ .to_value()
505
573
  .funcall::<_, _, isize>("hash", ())
506
574
  .expect("should be hashable");
507
575
  state.write_isize(h)
@@ -512,7 +580,7 @@ impl Eq for ObjectValue {}
512
580
 
513
581
  impl PartialEq for ObjectValue {
514
582
  fn eq(&self, other: &Self) -> bool {
515
- self.to_object().eql(other.to_object()).unwrap_or(false)
583
+ self.to_value().eql(other.to_value()).unwrap_or(false)
516
584
  }
517
585
  }
518
586
 
@@ -533,7 +601,7 @@ impl TotalHash for ObjectValue {
533
601
 
534
602
  impl Display for ObjectValue {
535
603
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
536
- write!(f, "{}", self.to_object())
604
+ write!(f, "{}", self.to_value())
537
605
  }
538
606
  }
539
607
 
@@ -561,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
561
629
  }
562
630
  }
563
631
 
564
- // TODO remove
565
632
  impl ObjectValue {
566
- pub fn to_object(&self) -> Value {
567
- Ruby::get().unwrap().get_inner(self.inner)
633
+ pub fn to_value(&self) -> Value {
634
+ self.clone().into_value()
568
635
  }
569
636
  }
570
637
 
571
638
  impl IntoValue for ObjectValue {
572
- fn into_value_with(self, _: &Ruby) -> Value {
573
- self.to_object()
639
+ fn into_value_with(self, ruby: &Ruby) -> Value {
640
+ ruby.get_inner(self.inner)
574
641
  }
575
642
  }
576
643
 
@@ -587,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
587
654
  let parsed = match String::try_convert(ob)?.as_str() {
588
655
  "backward" => AsofStrategy::Backward,
589
656
  "forward" => AsofStrategy::Forward,
657
+ "nearest" => AsofStrategy::Nearest,
590
658
  v => {
591
659
  return Err(RbValueError::new_err(format!(
592
- "strategy must be one of {{'backward', 'forward'}}, got {}",
593
- v
660
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
594
661
  )))
595
662
  }
596
663
  };
@@ -830,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
830
897
  }
831
898
  }
832
899
 
833
- impl TryConvert for Wrap<QuantileInterpolOptions> {
900
+ impl TryConvert for Wrap<QuantileMethod> {
834
901
  fn try_convert(ob: Value) -> RbResult<Self> {
835
902
  let parsed = match String::try_convert(ob)?.as_str() {
836
- "lower" => QuantileInterpolOptions::Lower,
837
- "higher" => QuantileInterpolOptions::Higher,
838
- "nearest" => QuantileInterpolOptions::Nearest,
839
- "linear" => QuantileInterpolOptions::Linear,
840
- "midpoint" => QuantileInterpolOptions::Midpoint,
903
+ "lower" => QuantileMethod::Lower,
904
+ "higher" => QuantileMethod::Higher,
905
+ "nearest" => QuantileMethod::Nearest,
906
+ "linear" => QuantileMethod::Linear,
907
+ "midpoint" => QuantileMethod::Midpoint,
841
908
  v => {
842
909
  return Err(RbValueError::new_err(format!(
843
910
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -1001,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
1001
1068
  }
1002
1069
  }
1003
1070
 
1071
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1072
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1073
+ Ok(out)
1074
+ }
1075
+
1004
1076
  pub fn parse_fill_null_strategy(
1005
1077
  strategy: &str,
1006
1078
  limit: FillNullLimit,
@@ -1071,7 +1143,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
1071
1143
  let v = usize::try_convert(ob)?;
1072
1144
  NonZeroUsize::new(v)
1073
1145
  .map(Wrap)
1074
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1146
+ .ok_or(RbValueError::new_err("must be non-zero"))
1075
1147
  }
1076
1148
  }
1077
1149
 
@@ -1085,3 +1157,28 @@ where
1085
1157
  .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
1158
  .collect()
1087
1159
  }
1160
+
1161
+ #[derive(Debug, Copy, Clone)]
1162
+ pub struct RbCompatLevel(pub CompatLevel);
1163
+
1164
+ impl TryConvert for RbCompatLevel {
1165
+ fn try_convert(ob: Value) -> RbResult<Self> {
1166
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1167
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1168
+ compat_level
1169
+ } else {
1170
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1171
+ }
1172
+ } else if let Ok(future) = bool::try_convert(ob) {
1173
+ if future {
1174
+ CompatLevel::newest()
1175
+ } else {
1176
+ CompatLevel::oldest()
1177
+ }
1178
+ } else {
1179
+ return Err(RbTypeError::new_err(
1180
+ "'compat_level' argument accepts int or bool".to_string(),
1181
+ ));
1182
+ }))
1183
+ }
1184
+ }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -10,14 +10,14 @@ use crate::map::dataframe::{
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
12
  use crate::prelude::strings_to_pl_smallstr;
13
- use crate::series::{to_rbseries_collection, to_series_collection};
13
+ use crate::series::{to_rbseries, to_series};
14
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
15
15
 
16
16
  impl RbDataFrame {
17
17
  pub fn init(columns: RArray) -> RbResult<Self> {
18
18
  let mut cols = Vec::new();
19
19
  for i in columns.into_iter() {
20
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
20
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
21
21
  }
22
22
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
23
23
  Ok(RbDataFrame::new(df))
@@ -128,7 +128,7 @@ impl RbDataFrame {
128
128
 
129
129
  pub fn get_columns(&self) -> RArray {
130
130
  let cols = self.df.borrow().get_columns().to_vec();
131
- to_rbseries_collection(cols)
131
+ to_rbseries(cols)
132
132
  }
133
133
 
134
134
  pub fn columns(&self) -> Vec<String> {
@@ -174,7 +174,8 @@ impl RbDataFrame {
174
174
  }
175
175
 
176
176
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
177
- let columns = to_series_collection(columns)?;
177
+ let columns = to_series(columns)?;
178
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
178
179
  let df = self
179
180
  .df
180
181
  .borrow()
@@ -184,7 +185,8 @@ impl RbDataFrame {
184
185
  }
185
186
 
186
187
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
187
- let columns = to_series_collection(columns)?;
188
+ let columns = to_series(columns)?;
189
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
188
190
  self.df
189
191
  .borrow_mut()
190
192
  .hstack_mut(&columns)
@@ -223,6 +225,7 @@ impl RbDataFrame {
223
225
  .borrow_mut()
224
226
  .drop_in_place(&name)
225
227
  .map_err(RbPolarsErr::from)?;
228
+ let s = s.take_materialized_series();
226
229
  Ok(RbSeries::new(s))
227
230
  }
228
231
 
@@ -230,7 +233,7 @@ impl RbDataFrame {
230
233
  self.df
231
234
  .borrow()
232
235
  .select_at_idx(idx)
233
- .map(|s| RbSeries::new(s.clone()))
236
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
234
237
  }
235
238
 
236
239
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -238,11 +241,13 @@ impl RbDataFrame {
238
241
  }
239
242
 
240
243
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
241
- self.df
244
+ let series = self
245
+ .df
242
246
  .borrow()
243
247
  .column(&name)
244
- .map(|s| RbSeries::new(s.clone()))
245
- .map_err(RbPolarsErr::from)
248
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
249
+ .map_err(RbPolarsErr::from)?;
250
+ Ok(series)
246
251
  }
247
252
 
248
253
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -411,7 +416,7 @@ impl RbDataFrame {
411
416
  .borrow()
412
417
  .max_horizontal()
413
418
  .map_err(RbPolarsErr::from)?;
414
- Ok(s.map(|s| s.into()))
419
+ Ok(s.map(|s| s.take_materialized_series().into()))
415
420
  }
416
421
 
417
422
  pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
@@ -420,7 +425,7 @@ impl RbDataFrame {
420
425
  .borrow()
421
426
  .min_horizontal()
422
427
  .map_err(RbPolarsErr::from)?;
423
- Ok(s.map(|s| s.into()))
428
+ Ok(s.map(|s| s.take_materialized_series().into()))
424
429
  }
425
430
 
426
431
  pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {