polars-df 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/Cargo.lock +1296 -283
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +125 -28
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +16 -11
  13. data/ext/polars/src/dataframe/io.rs +73 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +13 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/rolling.rs +6 -7
  23. data/ext/polars/src/expr/string.rs +9 -36
  24. data/ext/polars/src/file.rs +59 -22
  25. data/ext/polars/src/functions/business.rs +15 -0
  26. data/ext/polars/src/functions/lazy.rs +17 -8
  27. data/ext/polars/src/functions/mod.rs +1 -0
  28. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  29. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  30. data/ext/polars/src/interop/mod.rs +1 -0
  31. data/ext/polars/src/lazyframe/general.rs +877 -0
  32. data/ext/polars/src/lazyframe/mod.rs +3 -827
  33. data/ext/polars/src/lazyframe/serde.rs +31 -0
  34. data/ext/polars/src/lib.rs +45 -14
  35. data/ext/polars/src/map/dataframe.rs +10 -6
  36. data/ext/polars/src/map/lazy.rs +65 -4
  37. data/ext/polars/src/map/mod.rs +9 -8
  38. data/ext/polars/src/on_startup.rs +1 -1
  39. data/ext/polars/src/series/aggregation.rs +1 -5
  40. data/ext/polars/src/series/arithmetic.rs +10 -10
  41. data/ext/polars/src/series/construction.rs +2 -2
  42. data/ext/polars/src/series/export.rs +1 -1
  43. data/ext/polars/src/series/general.rs +643 -0
  44. data/ext/polars/src/series/import.rs +55 -0
  45. data/ext/polars/src/series/mod.rs +11 -638
  46. data/ext/polars/src/series/scatter.rs +2 -2
  47. data/ext/polars/src/utils.rs +0 -20
  48. data/lib/polars/batched_csv_reader.rb +0 -2
  49. data/lib/polars/binary_expr.rb +133 -9
  50. data/lib/polars/binary_name_space.rb +101 -6
  51. data/lib/polars/config.rb +4 -0
  52. data/lib/polars/data_frame.rb +275 -52
  53. data/lib/polars/data_type_group.rb +28 -0
  54. data/lib/polars/data_types.rb +2 -0
  55. data/lib/polars/date_time_expr.rb +244 -0
  56. data/lib/polars/date_time_name_space.rb +87 -0
  57. data/lib/polars/expr.rb +103 -2
  58. data/lib/polars/functions/as_datatype.rb +51 -2
  59. data/lib/polars/functions/col.rb +1 -1
  60. data/lib/polars/functions/eager.rb +1 -3
  61. data/lib/polars/functions/lazy.rb +88 -10
  62. data/lib/polars/functions/range/time_range.rb +21 -21
  63. data/lib/polars/io/csv.rb +14 -16
  64. data/lib/polars/io/database.rb +2 -2
  65. data/lib/polars/io/ipc.rb +14 -4
  66. data/lib/polars/io/ndjson.rb +10 -0
  67. data/lib/polars/io/parquet.rb +168 -111
  68. data/lib/polars/lazy_frame.rb +649 -15
  69. data/lib/polars/list_name_space.rb +169 -0
  70. data/lib/polars/selectors.rb +1144 -0
  71. data/lib/polars/series.rb +465 -35
  72. data/lib/polars/string_cache.rb +27 -1
  73. data/lib/polars/string_expr.rb +0 -1
  74. data/lib/polars/string_name_space.rb +73 -3
  75. data/lib/polars/struct_name_space.rb +31 -7
  76. data/lib/polars/utils/various.rb +5 -1
  77. data/lib/polars/utils.rb +45 -10
  78. data/lib/polars/version.rb +1 -1
  79. data/lib/polars.rb +2 -1
  80. metadata +14 -4
  81. data/lib/polars/functions.rb +0 -57
data/LICENSE.txt CHANGED
@@ -1,5 +1,6 @@
1
1
  Copyright (c) 2020 Ritchie Vink
2
2
  Copyright (c) 2022-2024 Andrew Kane
3
+ Some portions Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
4
 
4
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
5
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -17,8 +17,7 @@ gem "polars-df"
17
17
  This library follows the [Polars Python API](https://pola-rs.github.io/polars/py-polars/html/reference/index.html).
18
18
 
19
19
  ```ruby
20
- Polars.read_csv("iris.csv")
21
- .lazy
20
+ Polars.scan_csv("iris.csv")
22
21
  .filter(Polars.col("sepal_length") > 5)
23
22
  .group_by("species")
24
23
  .agg(Polars.all.sum)
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "polars"
3
- version = "0.14.0"
3
+ version = "0.15.0"
4
4
  license = "MIT"
5
5
  authors = ["Andrew Kane <andrew@ankane.org>"]
6
6
  edition = "2021"
@@ -12,16 +12,20 @@ crate-type = ["cdylib"]
12
12
 
13
13
  [dependencies]
14
14
  ahash = "0.8"
15
+ arrow = { package = "polars-arrow", version = "=0.44.2" }
16
+ bytes = "1"
15
17
  chrono = "0.4"
16
18
  either = "1.8"
17
19
  magnus = "0.7"
18
- polars-core = "=0.43.1"
19
- polars-parquet = "=0.43.1"
20
- polars-utils = "=0.43.1"
20
+ polars-core = "=0.44.2"
21
+ polars-plan = "=0.44.2"
22
+ polars-parquet = "=0.44.2"
23
+ polars-utils = "=0.44.2"
24
+ regex = "1"
21
25
  serde_json = "1"
22
26
 
23
27
  [dependencies.polars]
24
- version = "=0.43.1"
28
+ version = "=0.44.2"
25
29
  features = [
26
30
  "abs",
27
31
  "approx_unique",
@@ -30,7 +34,11 @@ features = [
30
34
  "array_count",
31
35
  "asof_join",
32
36
  "avro",
37
+ "aws",
38
+ "azure",
33
39
  "binary_encoding",
40
+ "business",
41
+ "cloud",
34
42
  "concat_str",
35
43
  "cov",
36
44
  "cross_join",
@@ -51,6 +59,8 @@ features = [
51
59
  "extract_jsonpath",
52
60
  "find_many",
53
61
  "fmt",
62
+ "gcp",
63
+ "http",
54
64
  "interpolate",
55
65
  "ipc",
56
66
  "ipc_streaming",
@@ -34,8 +34,7 @@ impl RbBatchedCsv {
34
34
  let n_threads = Option::<usize>::try_convert(arguments[11])?;
35
35
  let path = PathBuf::try_convert(arguments[12])?;
36
36
  let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[13])?;
37
- // TODO fix
38
- let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
37
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::try_convert(arguments[14])?;
39
38
  let low_memory = bool::try_convert(arguments[15])?;
40
39
  let comment_prefix = Option::<String>::try_convert(arguments[16])?;
41
40
  let quote_char = Option::<String>::try_convert(arguments[17])?;
@@ -44,11 +43,10 @@ impl RbBatchedCsv {
44
43
  let try_parse_dates = bool::try_convert(arguments[20])?;
45
44
  let skip_rows_after_header = usize::try_convert(arguments[21])?;
46
45
  let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
47
- let sample_size = usize::try_convert(arguments[23])?;
48
- let eol_char = String::try_convert(arguments[24])?;
49
- let raise_if_empty = bool::try_convert(arguments[25])?;
50
- let truncate_ragged_lines = bool::try_convert(arguments[26])?;
51
- let decimal_comma = bool::try_convert(arguments[27])?;
46
+ let eol_char = String::try_convert(arguments[23])?;
47
+ let raise_if_empty = bool::try_convert(arguments[24])?;
48
+ let truncate_ragged_lines = bool::try_convert(arguments[25])?;
49
+ let decimal_comma = bool::try_convert(arguments[26])?;
52
50
  // end arguments
53
51
 
54
52
  let null_values = null_values.map(|w| w.0);
@@ -84,7 +82,7 @@ impl RbBatchedCsv {
84
82
  .collect::<Vec<_>>()
85
83
  });
86
84
 
87
- let file = std::fs::File::open(path).map_err(RbPolarsErr::io)?;
85
+ let file = std::fs::File::open(path).map_err(RbPolarsErr::from)?;
88
86
  let reader = Box::new(file) as Box<dyn MmapBytesReader>;
89
87
  let reader = CsvReadOptions::default()
90
88
  .with_infer_schema_length(infer_schema_length)
@@ -101,7 +99,6 @@ impl RbBatchedCsv {
101
99
  .with_low_memory(low_memory)
102
100
  .with_skip_rows_after_header(skip_rows_after_header)
103
101
  .with_row_index(row_index)
104
- .with_sample_size(sample_size)
105
102
  .with_raise_if_empty(raise_if_empty)
106
103
  .with_parse_options(
107
104
  CsvParseOptions::default()
@@ -132,7 +129,7 @@ impl RbBatchedCsv {
132
129
  let batches = reader
133
130
  .borrow()
134
131
  .lock()
135
- .map_err(|e| RbPolarsErr::other(e.to_string()))?
132
+ .map_err(|e| RbPolarsErr::Other(e.to_string()))?
136
133
  .next_batches(n)
137
134
  .map_err(RbPolarsErr::from)?;
138
135
 
@@ -7,9 +7,9 @@ use polars_core::utils::any_values_to_supertype_and_n_dtypes;
7
7
 
8
8
  use super::{struct_dict, ObjectValue, Wrap};
9
9
 
10
- use crate::error::RbOverflowError;
10
+ use crate::exceptions::RbOverflowError;
11
11
  use crate::rb_modules::utils;
12
- use crate::{RbPolarsErr, RbResult, RbSeries};
12
+ use crate::{RbErr, RbPolarsErr, RbResult, RbSeries};
13
13
 
14
14
  impl IntoValue for Wrap<AnyValue<'_>> {
15
15
  fn into_value_with(self, ruby: &Ruby) -> Value {
@@ -47,15 +47,20 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
47
47
  };
48
48
  s.into_value()
49
49
  }
50
+ AnyValue::CategoricalOwned(idx, rev, arr) | AnyValue::EnumOwned(idx, rev, arr) => {
51
+ let s = if arr.is_null() {
52
+ rev.get(idx)
53
+ } else {
54
+ unsafe { arr.deref_unchecked().value(idx as usize) }
55
+ };
56
+ s.into_value()
57
+ }
50
58
  AnyValue::Date(v) => utils().funcall("_to_ruby_date", (v,)).unwrap(),
51
59
  AnyValue::Datetime(v, time_unit, time_zone) => {
52
- let time_unit = time_unit.to_ascii();
53
- utils()
54
- .funcall(
55
- "_to_ruby_datetime",
56
- (v, time_unit, time_zone.as_ref().map(|v| v.to_string())),
57
- )
58
- .unwrap()
60
+ datetime_to_rb_object(v, time_unit, time_zone)
61
+ }
62
+ AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
63
+ datetime_to_rb_object(v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
59
64
  }
60
65
  AnyValue::Duration(v, time_unit) => {
61
66
  let time_unit = time_unit.to_ascii();
@@ -69,11 +74,11 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
69
74
  AnyValue::StructOwned(payload) => struct_dict(payload.0.into_iter(), &payload.1),
70
75
  AnyValue::Object(v) => {
71
76
  let object = v.as_any().downcast_ref::<ObjectValue>().unwrap();
72
- object.to_object()
77
+ object.to_value()
73
78
  }
74
79
  AnyValue::ObjectOwned(v) => {
75
80
  let object = v.0.as_any().downcast_ref::<ObjectValue>().unwrap();
76
- object.to_object()
81
+ object.to_value()
77
82
  }
78
83
  AnyValue::Binary(v) => RString::from_slice(v).into_value(),
79
84
  AnyValue::BinaryOwned(v) => RString::from_slice(&v).into_value(),
@@ -83,6 +88,13 @@ pub(crate) fn any_value_into_rb_object(av: AnyValue, ruby: &Ruby) -> Value {
83
88
  }
84
89
  }
85
90
 
91
+ fn datetime_to_rb_object(v: i64, tu: TimeUnit, tz: Option<&TimeZone>) -> Value {
92
+ let tu = tu.to_ascii();
93
+ utils()
94
+ .funcall("_to_ruby_datetime", (v, tu, tz.map(|v| v.to_string())))
95
+ .unwrap()
96
+ }
97
+
86
98
  pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<AnyValue<'s>> {
87
99
  // Conversion functions.
88
100
  fn get_null(_ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
@@ -164,9 +176,8 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
164
176
  let len = dict.len();
165
177
  let mut keys = Vec::with_capacity(len);
166
178
  let mut vals = Vec::with_capacity(len);
167
- dict.foreach(|k: Value, v: Value| {
168
- let key = String::try_convert(k)?;
169
- let val = Wrap::<AnyValue>::try_convert(v)?.0;
179
+ dict.foreach(|key: String, val: Wrap<AnyValue>| {
180
+ let val = val.0;
170
181
  let dtype = DataType::from(&val);
171
182
  keys.push(Field::new(key.into(), dtype));
172
183
  vals.push(val);
@@ -190,7 +201,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
190
201
  let v = sec * 1_000_000_000 + nsec;
191
202
  // TODO support time zone when possible
192
203
  // https://github.com/pola-rs/polars/issues/9103
193
- Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, &None))
204
+ Ok(AnyValue::Datetime(v, TimeUnit::Nanoseconds, None))
194
205
  }
195
206
 
196
207
  fn get_datetime(ob: Value, _strict: bool) -> RbResult<AnyValue<'static>> {
@@ -199,7 +210,7 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
199
210
  Ok(AnyValue::Datetime(
200
211
  sec * 1_000_000_000 + nsec,
201
212
  TimeUnit::Nanoseconds,
202
- &None,
213
+ None,
203
214
  ))
204
215
  }
205
216
 
@@ -224,7 +235,9 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
224
235
 
225
236
  let (sign, digits, _, exp): (i8, String, i32, i32) = ob.funcall("split", ()).unwrap();
226
237
  let (mut v, scale) = abs_decimal_from_digits(digits, exp).ok_or_else(|| {
227
- RbPolarsErr::other("BigDecimal is too large to fit in Decimal128".into())
238
+ RbErr::from(RbPolarsErr::Other(
239
+ "BigDecimal is too large to fit in Decimal128".into(),
240
+ ))
228
241
  })?;
229
242
  if sign < 0 {
230
243
  // TODO better error
@@ -259,9 +272,6 @@ pub(crate) fn rb_object_to_any_value<'s>(ob: Value, strict: bool) -> RbResult<An
259
272
  } else if ob.is_kind_of(crate::rb_modules::bigdecimal()) {
260
273
  get_decimal(ob, strict)
261
274
  } else {
262
- Err(RbPolarsErr::other(format!(
263
- "object type not supported {:?}",
264
- ob
265
- )))
275
+ Err(RbPolarsErr::Other(format!("object type not supported {:?}", ob)).into())
266
276
  }
267
277
  }
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,12 +17,15 @@ use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
18
  use polars::frame::NullStrategy;
17
19
  use polars::io::avro::AvroCompression;
20
+ use polars::io::cloud::CloudOptions;
18
21
  use polars::prelude::*;
19
22
  use polars::series::ops::NullBehavior;
20
23
  use polars_core::utils::arrow::array::Array;
21
24
  use polars_core::utils::materialize_dyn_int;
25
+ use polars_plan::plans::ScanSources;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
24
29
  use crate::object::OBJECT_NAME;
25
30
  use crate::rb_modules::series;
26
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
104
109
  .collect(),
105
110
  )))
106
111
  } else {
107
- Err(RbPolarsErr::other(
108
- "could not extract value from null_values argument".into(),
109
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
110
116
  }
111
117
  }
112
118
  }
@@ -328,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
328
334
  )))
329
335
  }
330
336
  }
331
- // TODO improve
332
337
  } else if String::try_convert(ob).is_err() {
333
338
  let name = unsafe { ob.class().name() }.into_owned();
334
339
  match name.as_str() {
@@ -434,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
434
439
  }
435
440
  }
436
441
 
442
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
443
+
437
444
  impl TryConvert for Wrap<StatisticsOptions> {
438
445
  fn try_convert(ob: Value) -> RbResult<Self> {
439
446
  let mut statistics = StatisticsOptions::empty();
@@ -452,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
452
459
  }
453
460
  }
454
461
  Ok(ForEach::Continue)
455
- })
456
- .unwrap();
462
+ })?;
457
463
 
458
464
  Ok(Wrap(statistics))
459
465
  }
@@ -478,13 +484,75 @@ impl TryConvert for Wrap<Schema> {
478
484
  dict.foreach(|key: String, val: Wrap<DataType>| {
479
485
  schema.push(Ok(Field::new((&*key).into(), val.0)));
480
486
  Ok(ForEach::Continue)
481
- })
482
- .unwrap();
487
+ })?;
483
488
 
484
489
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
485
490
  }
486
491
  }
487
492
 
493
+ impl TryConvert for Wrap<ScanSources> {
494
+ fn try_convert(ob: Value) -> RbResult<Self> {
495
+ let list = RArray::try_convert(ob)?;
496
+
497
+ if list.is_empty() {
498
+ return Ok(Wrap(ScanSources::default()));
499
+ }
500
+
501
+ enum MutableSources {
502
+ Paths(Vec<PathBuf>),
503
+ Files(Vec<File>),
504
+ Buffers(Vec<bytes::Bytes>),
505
+ }
506
+
507
+ let num_items = list.len();
508
+ let mut iter = list
509
+ .into_iter()
510
+ .map(|val| get_ruby_scan_source_input(val, false));
511
+
512
+ let Some(first) = iter.next() else {
513
+ return Ok(Wrap(ScanSources::default()));
514
+ };
515
+
516
+ let mut sources = match first? {
517
+ RubyScanSourceInput::Path(path) => {
518
+ let mut sources = Vec::with_capacity(num_items);
519
+ sources.push(path);
520
+ MutableSources::Paths(sources)
521
+ }
522
+ RubyScanSourceInput::File(file) => {
523
+ let mut sources = Vec::with_capacity(num_items);
524
+ sources.push(file);
525
+ MutableSources::Files(sources)
526
+ }
527
+ RubyScanSourceInput::Buffer(buffer) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(buffer);
530
+ MutableSources::Buffers(sources)
531
+ }
532
+ };
533
+
534
+ for source in iter {
535
+ match (&mut sources, source?) {
536
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
537
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
538
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
539
+ _ => {
540
+ return Err(RbTypeError::new_err(
541
+ "Cannot combine in-memory bytes, paths and files for scan sources"
542
+ .to_string(),
543
+ ))
544
+ }
545
+ }
546
+ }
547
+
548
+ Ok(Wrap(match sources {
549
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
550
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
551
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
552
+ }))
553
+ }
554
+ }
555
+
488
556
  #[derive(Clone)]
489
557
  pub struct ObjectValue {
490
558
  pub inner: Opaque<Value>,
@@ -493,7 +561,7 @@ pub struct ObjectValue {
493
561
  impl Debug for ObjectValue {
494
562
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
495
563
  f.debug_struct("ObjectValue")
496
- .field("inner", &self.to_object())
564
+ .field("inner", &self.to_value())
497
565
  .finish()
498
566
  }
499
567
  }
@@ -501,7 +569,7 @@ impl Debug for ObjectValue {
501
569
  impl Hash for ObjectValue {
502
570
  fn hash<H: Hasher>(&self, state: &mut H) {
503
571
  let h = self
504
- .to_object()
572
+ .to_value()
505
573
  .funcall::<_, _, isize>("hash", ())
506
574
  .expect("should be hashable");
507
575
  state.write_isize(h)
@@ -512,7 +580,7 @@ impl Eq for ObjectValue {}
512
580
 
513
581
  impl PartialEq for ObjectValue {
514
582
  fn eq(&self, other: &Self) -> bool {
515
- self.to_object().eql(other.to_object()).unwrap_or(false)
583
+ self.to_value().eql(other.to_value()).unwrap_or(false)
516
584
  }
517
585
  }
518
586
 
@@ -533,7 +601,7 @@ impl TotalHash for ObjectValue {
533
601
 
534
602
  impl Display for ObjectValue {
535
603
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
536
- write!(f, "{}", self.to_object())
604
+ write!(f, "{}", self.to_value())
537
605
  }
538
606
  }
539
607
 
@@ -561,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
561
629
  }
562
630
  }
563
631
 
564
- // TODO remove
565
632
  impl ObjectValue {
566
- pub fn to_object(&self) -> Value {
567
- Ruby::get().unwrap().get_inner(self.inner)
633
+ pub fn to_value(&self) -> Value {
634
+ self.clone().into_value()
568
635
  }
569
636
  }
570
637
 
571
638
  impl IntoValue for ObjectValue {
572
- fn into_value_with(self, _: &Ruby) -> Value {
573
- self.to_object()
639
+ fn into_value_with(self, ruby: &Ruby) -> Value {
640
+ ruby.get_inner(self.inner)
574
641
  }
575
642
  }
576
643
 
@@ -587,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
587
654
  let parsed = match String::try_convert(ob)?.as_str() {
588
655
  "backward" => AsofStrategy::Backward,
589
656
  "forward" => AsofStrategy::Forward,
657
+ "nearest" => AsofStrategy::Nearest,
590
658
  v => {
591
659
  return Err(RbValueError::new_err(format!(
592
- "strategy must be one of {{'backward', 'forward'}}, got {}",
593
- v
660
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
594
661
  )))
595
662
  }
596
663
  };
@@ -830,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
830
897
  }
831
898
  }
832
899
 
833
- impl TryConvert for Wrap<QuantileInterpolOptions> {
900
+ impl TryConvert for Wrap<QuantileMethod> {
834
901
  fn try_convert(ob: Value) -> RbResult<Self> {
835
902
  let parsed = match String::try_convert(ob)?.as_str() {
836
- "lower" => QuantileInterpolOptions::Lower,
837
- "higher" => QuantileInterpolOptions::Higher,
838
- "nearest" => QuantileInterpolOptions::Nearest,
839
- "linear" => QuantileInterpolOptions::Linear,
840
- "midpoint" => QuantileInterpolOptions::Midpoint,
903
+ "lower" => QuantileMethod::Lower,
904
+ "higher" => QuantileMethod::Higher,
905
+ "nearest" => QuantileMethod::Nearest,
906
+ "linear" => QuantileMethod::Linear,
907
+ "midpoint" => QuantileMethod::Midpoint,
841
908
  v => {
842
909
  return Err(RbValueError::new_err(format!(
843
910
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -1001,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
1001
1068
  }
1002
1069
  }
1003
1070
 
1071
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1072
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1073
+ Ok(out)
1074
+ }
1075
+
1004
1076
  pub fn parse_fill_null_strategy(
1005
1077
  strategy: &str,
1006
1078
  limit: FillNullLimit,
@@ -1071,7 +1143,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
1071
1143
  let v = usize::try_convert(ob)?;
1072
1144
  NonZeroUsize::new(v)
1073
1145
  .map(Wrap)
1074
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1146
+ .ok_or(RbValueError::new_err("must be non-zero"))
1075
1147
  }
1076
1148
  }
1077
1149
 
@@ -1085,3 +1157,28 @@ where
1085
1157
  .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
1158
  .collect()
1087
1159
  }
1160
+
1161
+ #[derive(Debug, Copy, Clone)]
1162
+ pub struct RbCompatLevel(pub CompatLevel);
1163
+
1164
+ impl TryConvert for RbCompatLevel {
1165
+ fn try_convert(ob: Value) -> RbResult<Self> {
1166
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1167
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1168
+ compat_level
1169
+ } else {
1170
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1171
+ }
1172
+ } else if let Ok(future) = bool::try_convert(ob) {
1173
+ if future {
1174
+ CompatLevel::newest()
1175
+ } else {
1176
+ CompatLevel::oldest()
1177
+ }
1178
+ } else {
1179
+ return Err(RbTypeError::new_err(
1180
+ "'compat_level' argument accepts int or bool".to_string(),
1181
+ ));
1182
+ }))
1183
+ }
1184
+ }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -10,14 +10,14 @@ use crate::map::dataframe::{
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
12
  use crate::prelude::strings_to_pl_smallstr;
13
- use crate::series::{to_rbseries_collection, to_series_collection};
13
+ use crate::series::{to_rbseries, to_series};
14
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
15
15
 
16
16
  impl RbDataFrame {
17
17
  pub fn init(columns: RArray) -> RbResult<Self> {
18
18
  let mut cols = Vec::new();
19
19
  for i in columns.into_iter() {
20
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
20
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
21
21
  }
22
22
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
23
23
  Ok(RbDataFrame::new(df))
@@ -128,7 +128,7 @@ impl RbDataFrame {
128
128
 
129
129
  pub fn get_columns(&self) -> RArray {
130
130
  let cols = self.df.borrow().get_columns().to_vec();
131
- to_rbseries_collection(cols)
131
+ to_rbseries(cols)
132
132
  }
133
133
 
134
134
  pub fn columns(&self) -> Vec<String> {
@@ -174,7 +174,8 @@ impl RbDataFrame {
174
174
  }
175
175
 
176
176
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
177
- let columns = to_series_collection(columns)?;
177
+ let columns = to_series(columns)?;
178
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
178
179
  let df = self
179
180
  .df
180
181
  .borrow()
@@ -184,7 +185,8 @@ impl RbDataFrame {
184
185
  }
185
186
 
186
187
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
187
- let columns = to_series_collection(columns)?;
188
+ let columns = to_series(columns)?;
189
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
188
190
  self.df
189
191
  .borrow_mut()
190
192
  .hstack_mut(&columns)
@@ -223,6 +225,7 @@ impl RbDataFrame {
223
225
  .borrow_mut()
224
226
  .drop_in_place(&name)
225
227
  .map_err(RbPolarsErr::from)?;
228
+ let s = s.take_materialized_series();
226
229
  Ok(RbSeries::new(s))
227
230
  }
228
231
 
@@ -230,7 +233,7 @@ impl RbDataFrame {
230
233
  self.df
231
234
  .borrow()
232
235
  .select_at_idx(idx)
233
- .map(|s| RbSeries::new(s.clone()))
236
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
234
237
  }
235
238
 
236
239
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -238,11 +241,13 @@ impl RbDataFrame {
238
241
  }
239
242
 
240
243
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
241
- self.df
244
+ let series = self
245
+ .df
242
246
  .borrow()
243
247
  .column(&name)
244
- .map(|s| RbSeries::new(s.clone()))
245
- .map_err(RbPolarsErr::from)
248
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
249
+ .map_err(RbPolarsErr::from)?;
250
+ Ok(series)
246
251
  }
247
252
 
248
253
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -411,7 +416,7 @@ impl RbDataFrame {
411
416
  .borrow()
412
417
  .max_horizontal()
413
418
  .map_err(RbPolarsErr::from)?;
414
- Ok(s.map(|s| s.into()))
419
+ Ok(s.map(|s| s.take_materialized_series().into()))
415
420
  }
416
421
 
417
422
  pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
@@ -420,7 +425,7 @@ impl RbDataFrame {
420
425
  .borrow()
421
426
  .min_horizontal()
422
427
  .map_err(RbPolarsErr::from)?;
423
- Ok(s.map(|s| s.into()))
428
+ Ok(s.map(|s| s.take_materialized_series().into()))
424
429
  }
425
430
 
426
431
  pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {