polars-df 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,13 +17,15 @@ use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
18
  use polars::frame::NullStrategy;
17
19
  use polars::io::avro::AvroCompression;
20
+ use polars::io::cloud::CloudOptions;
18
21
  use polars::prelude::*;
19
22
  use polars::series::ops::NullBehavior;
20
23
  use polars_core::utils::arrow::array::Array;
21
24
  use polars_core::utils::materialize_dyn_int;
25
+ use polars_plan::plans::ScanSources;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
- use smartstring::alias::String as SmartString;
24
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
25
29
  use crate::object::OBJECT_NAME;
26
30
  use crate::rb_modules::series;
27
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -84,18 +88,31 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
84
88
  .unwrap()
85
89
  }
86
90
 
91
+ impl TryConvert for Wrap<PlSmallStr> {
92
+ fn try_convert(ob: Value) -> RbResult<Self> {
93
+ Ok(Wrap((&*String::try_convert(ob)?).into()))
94
+ }
95
+ }
96
+
87
97
  impl TryConvert for Wrap<NullValues> {
88
98
  fn try_convert(ob: Value) -> RbResult<Self> {
89
99
  if let Ok(s) = String::try_convert(ob) {
90
- Ok(Wrap(NullValues::AllColumnsSingle(s)))
100
+ Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
91
101
  } else if let Ok(s) = Vec::<String>::try_convert(ob) {
92
- Ok(Wrap(NullValues::AllColumns(s)))
102
+ Ok(Wrap(NullValues::AllColumns(
103
+ s.into_iter().map(|x| (&*x).into()).collect(),
104
+ )))
93
105
  } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
94
- Ok(Wrap(NullValues::Named(s)))
106
+ Ok(Wrap(NullValues::Named(
107
+ s.into_iter()
108
+ .map(|(a, b)| ((&*a).into(), (&*b).into()))
109
+ .collect(),
110
+ )))
95
111
  } else {
96
- Err(RbPolarsErr::other(
97
- "could not extract value from null_values argument".into(),
98
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
99
116
  }
100
117
  }
101
118
  }
@@ -189,7 +206,7 @@ impl IntoValue for Wrap<DataType> {
189
206
  DataType::Datetime(tu, tz) => {
190
207
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
191
208
  datetime_class
192
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
209
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
193
210
  .unwrap()
194
211
  }
195
212
  DataType::Duration(tu) => {
@@ -210,7 +227,9 @@ impl IntoValue for Wrap<DataType> {
210
227
  // we should always have an initialized rev_map coming from rust
211
228
  let categories = rev_map.as_ref().unwrap().get_categories();
212
229
  let class = pl.const_get::<_, Value>("Enum").unwrap();
213
- let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
230
+ let s =
231
+ Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
232
+ .unwrap();
214
233
  let series = to_series(s.into());
215
234
  class.funcall::<_, _, Value>("new", (series,)).unwrap()
216
235
  }
@@ -222,7 +241,7 @@ impl IntoValue for Wrap<DataType> {
222
241
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
223
242
  let iter = fields.iter().map(|fld| {
224
243
  let name = fld.name().as_str();
225
- let dtype = Wrap(fld.data_type().clone());
244
+ let dtype = Wrap(fld.dtype().clone());
226
245
  field_class
227
246
  .funcall::<_, _, Value>("new", (name, dtype))
228
247
  .unwrap()
@@ -276,7 +295,7 @@ impl TryConvert for Wrap<Field> {
276
295
  fn try_convert(ob: Value) -> RbResult<Self> {
277
296
  let name: String = ob.funcall("name", ())?;
278
297
  let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
279
- Ok(Wrap(Field::new(&name, dtype.0)))
298
+ Ok(Wrap(Field::new((&*name).into(), dtype.0)))
280
299
  }
281
300
  }
282
301
 
@@ -315,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
315
334
  )))
316
335
  }
317
336
  }
318
- // TODO improve
319
337
  } else if String::try_convert(ob).is_err() {
320
338
  let name = unsafe { ob.class().name() }.into_owned();
321
339
  match name.as_str() {
@@ -341,7 +359,7 @@ impl TryConvert for Wrap<DataType> {
341
359
  let s = get_series(categories)?;
342
360
  let ca = s.str().map_err(RbPolarsErr::from)?;
343
361
  let categories = ca.downcast_iter().next().unwrap().clone();
344
- create_enum_data_type(categories)
362
+ create_enum_dtype(categories)
345
363
  }
346
364
  "Polars::Date" => DataType::Date,
347
365
  "Polars::Time" => DataType::Time,
@@ -357,8 +375,8 @@ impl TryConvert for Wrap<DataType> {
357
375
  "Polars::Datetime" => {
358
376
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
359
377
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
360
- let time_zone = ob.funcall("time_zone", ())?;
361
- DataType::Datetime(time_unit, time_zone)
378
+ let time_zone: Option<String> = ob.funcall("time_zone", ())?;
379
+ DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
362
380
  }
363
381
  "Polars::Decimal" => {
364
382
  let precision = ob.funcall("precision", ())?;
@@ -421,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
421
439
  }
422
440
  }
423
441
 
442
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
443
+
424
444
  impl TryConvert for Wrap<StatisticsOptions> {
425
445
  fn try_convert(ob: Value) -> RbResult<Self> {
426
446
  let mut statistics = StatisticsOptions::empty();
@@ -439,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
439
459
  }
440
460
  }
441
461
  Ok(ForEach::Continue)
442
- })
443
- .unwrap();
462
+ })?;
444
463
 
445
464
  Ok(Wrap(statistics))
446
465
  }
@@ -463,15 +482,77 @@ impl TryConvert for Wrap<Schema> {
463
482
 
464
483
  let mut schema = Vec::new();
465
484
  dict.foreach(|key: String, val: Wrap<DataType>| {
466
- schema.push(Ok(Field::new(&key, val.0)));
485
+ schema.push(Ok(Field::new((&*key).into(), val.0)));
467
486
  Ok(ForEach::Continue)
468
- })
469
- .unwrap();
487
+ })?;
470
488
 
471
489
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
472
490
  }
473
491
  }
474
492
 
493
+ impl TryConvert for Wrap<ScanSources> {
494
+ fn try_convert(ob: Value) -> RbResult<Self> {
495
+ let list = RArray::try_convert(ob)?;
496
+
497
+ if list.is_empty() {
498
+ return Ok(Wrap(ScanSources::default()));
499
+ }
500
+
501
+ enum MutableSources {
502
+ Paths(Vec<PathBuf>),
503
+ Files(Vec<File>),
504
+ Buffers(Vec<bytes::Bytes>),
505
+ }
506
+
507
+ let num_items = list.len();
508
+ let mut iter = list
509
+ .into_iter()
510
+ .map(|val| get_ruby_scan_source_input(val, false));
511
+
512
+ let Some(first) = iter.next() else {
513
+ return Ok(Wrap(ScanSources::default()));
514
+ };
515
+
516
+ let mut sources = match first? {
517
+ RubyScanSourceInput::Path(path) => {
518
+ let mut sources = Vec::with_capacity(num_items);
519
+ sources.push(path);
520
+ MutableSources::Paths(sources)
521
+ }
522
+ RubyScanSourceInput::File(file) => {
523
+ let mut sources = Vec::with_capacity(num_items);
524
+ sources.push(file);
525
+ MutableSources::Files(sources)
526
+ }
527
+ RubyScanSourceInput::Buffer(buffer) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(buffer);
530
+ MutableSources::Buffers(sources)
531
+ }
532
+ };
533
+
534
+ for source in iter {
535
+ match (&mut sources, source?) {
536
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
537
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
538
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
539
+ _ => {
540
+ return Err(RbTypeError::new_err(
541
+ "Cannot combine in-memory bytes, paths and files for scan sources"
542
+ .to_string(),
543
+ ))
544
+ }
545
+ }
546
+ }
547
+
548
+ Ok(Wrap(match sources {
549
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
550
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
551
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
552
+ }))
553
+ }
554
+ }
555
+
475
556
  #[derive(Clone)]
476
557
  pub struct ObjectValue {
477
558
  pub inner: Opaque<Value>,
@@ -480,7 +561,7 @@ pub struct ObjectValue {
480
561
  impl Debug for ObjectValue {
481
562
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
482
563
  f.debug_struct("ObjectValue")
483
- .field("inner", &self.to_object())
564
+ .field("inner", &self.to_value())
484
565
  .finish()
485
566
  }
486
567
  }
@@ -488,7 +569,7 @@ impl Debug for ObjectValue {
488
569
  impl Hash for ObjectValue {
489
570
  fn hash<H: Hasher>(&self, state: &mut H) {
490
571
  let h = self
491
- .to_object()
572
+ .to_value()
492
573
  .funcall::<_, _, isize>("hash", ())
493
574
  .expect("should be hashable");
494
575
  state.write_isize(h)
@@ -499,7 +580,7 @@ impl Eq for ObjectValue {}
499
580
 
500
581
  impl PartialEq for ObjectValue {
501
582
  fn eq(&self, other: &Self) -> bool {
502
- self.to_object().eql(other.to_object()).unwrap_or(false)
583
+ self.to_value().eql(other.to_value()).unwrap_or(false)
503
584
  }
504
585
  }
505
586
 
@@ -520,7 +601,7 @@ impl TotalHash for ObjectValue {
520
601
 
521
602
  impl Display for ObjectValue {
522
603
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
523
- write!(f, "{}", self.to_object())
604
+ write!(f, "{}", self.to_value())
524
605
  }
525
606
  }
526
607
 
@@ -548,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
548
629
  }
549
630
  }
550
631
 
551
- // TODO remove
552
632
  impl ObjectValue {
553
- pub fn to_object(&self) -> Value {
554
- Ruby::get().unwrap().get_inner(self.inner)
633
+ pub fn to_value(&self) -> Value {
634
+ self.clone().into_value()
555
635
  }
556
636
  }
557
637
 
558
638
  impl IntoValue for ObjectValue {
559
- fn into_value_with(self, _: &Ruby) -> Value {
560
- self.to_object()
639
+ fn into_value_with(self, ruby: &Ruby) -> Value {
640
+ ruby.get_inner(self.inner)
561
641
  }
562
642
  }
563
643
 
@@ -574,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
574
654
  let parsed = match String::try_convert(ob)?.as_str() {
575
655
  "backward" => AsofStrategy::Backward,
576
656
  "forward" => AsofStrategy::Forward,
657
+ "nearest" => AsofStrategy::Nearest,
577
658
  v => {
578
659
  return Err(RbValueError::new_err(format!(
579
- "strategy must be one of {{'backward', 'forward'}}, got {}",
580
- v
660
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
581
661
  )))
582
662
  }
583
663
  };
@@ -817,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
817
897
  }
818
898
  }
819
899
 
820
- impl TryConvert for Wrap<QuantileInterpolOptions> {
900
+ impl TryConvert for Wrap<QuantileMethod> {
821
901
  fn try_convert(ob: Value) -> RbResult<Self> {
822
902
  let parsed = match String::try_convert(ob)?.as_str() {
823
- "lower" => QuantileInterpolOptions::Lower,
824
- "higher" => QuantileInterpolOptions::Higher,
825
- "nearest" => QuantileInterpolOptions::Nearest,
826
- "linear" => QuantileInterpolOptions::Linear,
827
- "midpoint" => QuantileInterpolOptions::Midpoint,
903
+ "lower" => QuantileMethod::Lower,
904
+ "higher" => QuantileMethod::Higher,
905
+ "nearest" => QuantileMethod::Nearest,
906
+ "linear" => QuantileMethod::Linear,
907
+ "midpoint" => QuantileMethod::Midpoint,
828
908
  v => {
829
909
  return Err(RbValueError::new_err(format!(
830
910
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -988,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
988
1068
  }
989
1069
  }
990
1070
 
1071
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1072
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1073
+ Ok(out)
1074
+ }
1075
+
991
1076
  pub fn parse_fill_null_strategy(
992
1077
  strategy: &str,
993
1078
  limit: FillNullLimit,
@@ -1053,19 +1138,47 @@ pub fn parse_parquet_compression(
1053
1138
  Ok(parsed)
1054
1139
  }
1055
1140
 
1056
- pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1141
+ impl TryConvert for Wrap<NonZeroUsize> {
1142
+ fn try_convert(ob: Value) -> RbResult<Self> {
1143
+ let v = usize::try_convert(ob)?;
1144
+ NonZeroUsize::new(v)
1145
+ .map(Wrap)
1146
+ .ok_or(RbValueError::new_err("must be non-zero"))
1147
+ }
1148
+ }
1149
+
1150
+ pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1057
1151
  where
1058
1152
  I: IntoIterator<Item = S>,
1059
1153
  S: AsRef<str>,
1060
1154
  {
1061
- container.into_iter().map(|s| s.as_ref().into()).collect()
1155
+ container
1156
+ .into_iter()
1157
+ .map(|s| PlSmallStr::from_str(s.as_ref()))
1158
+ .collect()
1062
1159
  }
1063
1160
 
1064
- impl TryConvert for Wrap<NonZeroUsize> {
1161
+ #[derive(Debug, Copy, Clone)]
1162
+ pub struct RbCompatLevel(pub CompatLevel);
1163
+
1164
+ impl TryConvert for RbCompatLevel {
1065
1165
  fn try_convert(ob: Value) -> RbResult<Self> {
1066
- let v = usize::try_convert(ob)?;
1067
- NonZeroUsize::new(v)
1068
- .map(Wrap)
1069
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1166
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1167
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1168
+ compat_level
1169
+ } else {
1170
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1171
+ }
1172
+ } else if let Ok(future) = bool::try_convert(ob) {
1173
+ if future {
1174
+ CompatLevel::newest()
1175
+ } else {
1176
+ CompatLevel::oldest()
1177
+ }
1178
+ } else {
1179
+ return Err(RbTypeError::new_err(
1180
+ "'compat_level' argument accepts int or bool".to_string(),
1181
+ ));
1182
+ }))
1070
1183
  }
1071
1184
  }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -79,7 +76,7 @@ fn update_schema_from_rows(
79
76
  rows: &[Row],
80
77
  infer_schema_length: Option<usize>,
81
78
  ) -> RbResult<()> {
82
- let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
79
+ let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
83
80
  if schema_is_complete {
84
81
  return Ok(());
85
82
  }
@@ -89,7 +86,7 @@ fn update_schema_from_rows(
89
86
  rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
90
87
  let inferred_dtypes_slice = inferred_dtypes.as_slice();
91
88
 
92
- for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
89
+ for (i, dtype) in schema.iter_values_mut().enumerate() {
93
90
  if !dtype.is_known() {
94
91
  *dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
95
92
  polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
@@ -110,7 +107,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
110
107
  }
111
108
 
112
109
  fn erase_decimal_precision_scale(schema: &mut Schema) {
113
- for dtype in schema.iter_dtypes_mut() {
110
+ for dtype in schema.iter_values_mut() {
114
111
  if let DataType::Decimal(_, _) = dtype {
115
112
  *dtype = DataType::Decimal(None, None)
116
113
  }
@@ -123,7 +120,7 @@ where
123
120
  {
124
121
  let fields = column_names
125
122
  .into_iter()
126
- .map(|c| Field::new(c, DataType::Unknown(Default::default())));
123
+ .map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
127
124
  Schema::from_iter(fields)
128
125
  }
129
126
 
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -9,14 +9,15 @@ use crate::map::dataframe::{
9
9
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
- use crate::series::{to_rbseries_collection, to_series_collection};
12
+ use crate::prelude::strings_to_pl_smallstr;
13
+ use crate::series::{to_rbseries, to_series};
13
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
14
15
 
15
16
  impl RbDataFrame {
16
17
  pub fn init(columns: RArray) -> RbResult<Self> {
17
18
  let mut cols = Vec::new();
18
19
  for i in columns.into_iter() {
19
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
20
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
20
21
  }
21
22
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
22
23
  Ok(RbDataFrame::new(df))
@@ -127,7 +128,7 @@ impl RbDataFrame {
127
128
 
128
129
  pub fn get_columns(&self) -> RArray {
129
130
  let cols = self.df.borrow().get_columns().to_vec();
130
- to_rbseries_collection(cols)
131
+ to_rbseries(cols)
131
132
  }
132
133
 
133
134
  pub fn columns(&self) -> Vec<String> {
@@ -173,7 +174,8 @@ impl RbDataFrame {
173
174
  }
174
175
 
175
176
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
176
- let columns = to_series_collection(columns)?;
177
+ let columns = to_series(columns)?;
178
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
177
179
  let df = self
178
180
  .df
179
181
  .borrow()
@@ -183,7 +185,8 @@ impl RbDataFrame {
183
185
  }
184
186
 
185
187
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
186
- let columns = to_series_collection(columns)?;
188
+ let columns = to_series(columns)?;
189
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
187
190
  self.df
188
191
  .borrow_mut()
189
192
  .hstack_mut(&columns)
@@ -222,6 +225,7 @@ impl RbDataFrame {
222
225
  .borrow_mut()
223
226
  .drop_in_place(&name)
224
227
  .map_err(RbPolarsErr::from)?;
228
+ let s = s.take_materialized_series();
225
229
  Ok(RbSeries::new(s))
226
230
  }
227
231
 
@@ -229,7 +233,7 @@ impl RbDataFrame {
229
233
  self.df
230
234
  .borrow()
231
235
  .select_at_idx(idx)
232
- .map(|s| RbSeries::new(s.clone()))
236
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
233
237
  }
234
238
 
235
239
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -237,11 +241,13 @@ impl RbDataFrame {
237
241
  }
238
242
 
239
243
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
240
- self.df
244
+ let series = self
245
+ .df
241
246
  .borrow()
242
247
  .column(&name)
243
- .map(|s| RbSeries::new(s.clone()))
244
- .map_err(RbPolarsErr::from)
248
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
249
+ .map_err(RbPolarsErr::from)?;
250
+ Ok(series)
245
251
  }
246
252
 
247
253
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -254,7 +260,7 @@ impl RbDataFrame {
254
260
  }
255
261
 
256
262
  pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
257
- let indices = IdxCa::from_vec("", indices);
263
+ let indices = IdxCa::from_vec("".into(), indices);
258
264
  let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
259
265
  Ok(RbDataFrame::new(df))
260
266
  }
@@ -332,7 +338,7 @@ impl RbDataFrame {
332
338
  let df = self
333
339
  .df
334
340
  .borrow()
335
- .with_row_index(&name, offset)
341
+ .with_row_index(name.into(), offset)
336
342
  .map_err(RbPolarsErr::from)?;
337
343
  Ok(df.into())
338
344
  }
@@ -349,8 +355,8 @@ impl RbDataFrame {
349
355
  variable_name: Option<String>,
350
356
  ) -> RbResult<Self> {
351
357
  let args = UnpivotArgsIR {
352
- on: strings_to_smartstrings(on),
353
- index: strings_to_smartstrings(index),
358
+ on: strings_to_pl_smallstr(on),
359
+ index: strings_to_pl_smallstr(index),
354
360
  value_name: value_name.map(|s| s.into()),
355
361
  variable_name: variable_name.map(|s| s.into()),
356
362
  };
@@ -410,7 +416,7 @@ impl RbDataFrame {
410
416
  .borrow()
411
417
  .max_horizontal()
412
418
  .map_err(RbPolarsErr::from)?;
413
- Ok(s.map(|s| s.into()))
419
+ Ok(s.map(|s| s.take_materialized_series().into()))
414
420
  }
415
421
 
416
422
  pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
@@ -419,7 +425,7 @@ impl RbDataFrame {
419
425
  .borrow()
420
426
  .min_horizontal()
421
427
  .map_err(RbPolarsErr::from)?;
422
- Ok(s.map(|s| s.into()))
428
+ Ok(s.map(|s| s.take_materialized_series().into()))
423
429
  }
424
430
 
425
431
  pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
@@ -581,7 +587,7 @@ impl RbDataFrame {
581
587
  }
582
588
 
583
589
  pub fn to_struct(&self, name: String) -> RbSeries {
584
- let s = self.df.borrow().clone().into_struct(&name);
590
+ let s = self.df.borrow().clone().into_struct(name.into());
585
591
  s.into_series().into()
586
592
  }
587
593