polars-df 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +30 -0
  3. data/Cargo.lock +1368 -319
  4. data/LICENSE.txt +1 -0
  5. data/README.md +1 -2
  6. data/ext/polars/Cargo.toml +15 -6
  7. data/ext/polars/src/batched_csv.rs +10 -13
  8. data/ext/polars/src/conversion/any_value.rs +37 -21
  9. data/ext/polars/src/conversion/chunked_array.rs +3 -3
  10. data/ext/polars/src/conversion/mod.rs +159 -46
  11. data/ext/polars/src/dataframe/construction.rs +4 -7
  12. data/ext/polars/src/dataframe/export.rs +9 -2
  13. data/ext/polars/src/dataframe/general.rs +22 -16
  14. data/ext/polars/src/dataframe/io.rs +78 -174
  15. data/ext/polars/src/dataframe/mod.rs +1 -0
  16. data/ext/polars/src/dataframe/serde.rs +15 -0
  17. data/ext/polars/src/error.rs +31 -48
  18. data/ext/polars/src/exceptions.rs +24 -0
  19. data/ext/polars/src/expr/binary.rs +4 -42
  20. data/ext/polars/src/expr/datetime.rs +16 -7
  21. data/ext/polars/src/expr/general.rs +14 -23
  22. data/ext/polars/src/expr/list.rs +18 -11
  23. data/ext/polars/src/expr/name.rs +3 -2
  24. data/ext/polars/src/expr/rolling.rs +6 -7
  25. data/ext/polars/src/expr/string.rs +17 -37
  26. data/ext/polars/src/file.rs +59 -22
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +6 -6
  29. data/ext/polars/src/functions/lazy.rs +17 -8
  30. data/ext/polars/src/functions/mod.rs +1 -0
  31. data/ext/polars/src/functions/range.rs +4 -2
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +877 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -825
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +44 -13
  39. data/ext/polars/src/map/dataframe.rs +46 -14
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +17 -16
  42. data/ext/polars/src/map/series.rs +106 -64
  43. data/ext/polars/src/on_startup.rs +2 -2
  44. data/ext/polars/src/series/aggregation.rs +1 -5
  45. data/ext/polars/src/series/arithmetic.rs +10 -10
  46. data/ext/polars/src/series/construction.rs +52 -25
  47. data/ext/polars/src/series/export.rs +1 -1
  48. data/ext/polars/src/series/general.rs +643 -0
  49. data/ext/polars/src/series/import.rs +55 -0
  50. data/ext/polars/src/series/mod.rs +11 -638
  51. data/ext/polars/src/series/scatter.rs +2 -2
  52. data/ext/polars/src/utils.rs +0 -20
  53. data/lib/polars/batched_csv_reader.rb +0 -2
  54. data/lib/polars/binary_expr.rb +133 -9
  55. data/lib/polars/binary_name_space.rb +101 -6
  56. data/lib/polars/config.rb +4 -0
  57. data/lib/polars/data_frame.rb +285 -62
  58. data/lib/polars/data_type_group.rb +28 -0
  59. data/lib/polars/data_types.rb +2 -0
  60. data/lib/polars/date_time_expr.rb +244 -0
  61. data/lib/polars/date_time_name_space.rb +87 -0
  62. data/lib/polars/expr.rb +109 -8
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +88 -10
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/ipc.rb +14 -12
  71. data/lib/polars/io/ndjson.rb +10 -0
  72. data/lib/polars/io/parquet.rb +168 -111
  73. data/lib/polars/lazy_frame.rb +649 -15
  74. data/lib/polars/list_name_space.rb +169 -0
  75. data/lib/polars/selectors.rb +1144 -0
  76. data/lib/polars/series.rb +470 -40
  77. data/lib/polars/string_cache.rb +27 -1
  78. data/lib/polars/string_expr.rb +0 -1
  79. data/lib/polars/string_name_space.rb +73 -3
  80. data/lib/polars/struct_name_space.rb +31 -7
  81. data/lib/polars/utils/various.rb +5 -1
  82. data/lib/polars/utils.rb +45 -10
  83. data/lib/polars/version.rb +1 -1
  84. data/lib/polars.rb +2 -1
  85. metadata +14 -4
  86. data/lib/polars/functions.rb +0 -57
@@ -2,12 +2,14 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
@@ -15,13 +17,15 @@ use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
18
  use polars::frame::NullStrategy;
17
19
  use polars::io::avro::AvroCompression;
20
+ use polars::io::cloud::CloudOptions;
18
21
  use polars::prelude::*;
19
22
  use polars::series::ops::NullBehavior;
20
23
  use polars_core::utils::arrow::array::Array;
21
24
  use polars_core::utils::materialize_dyn_int;
25
+ use polars_plan::plans::ScanSources;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
- use smartstring::alias::String as SmartString;
24
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
25
29
  use crate::object::OBJECT_NAME;
26
30
  use crate::rb_modules::series;
27
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -84,18 +88,31 @@ pub(crate) fn to_series(s: RbSeries) -> Value {
84
88
  .unwrap()
85
89
  }
86
90
 
91
+ impl TryConvert for Wrap<PlSmallStr> {
92
+ fn try_convert(ob: Value) -> RbResult<Self> {
93
+ Ok(Wrap((&*String::try_convert(ob)?).into()))
94
+ }
95
+ }
96
+
87
97
  impl TryConvert for Wrap<NullValues> {
88
98
  fn try_convert(ob: Value) -> RbResult<Self> {
89
99
  if let Ok(s) = String::try_convert(ob) {
90
- Ok(Wrap(NullValues::AllColumnsSingle(s)))
100
+ Ok(Wrap(NullValues::AllColumnsSingle((&*s).into())))
91
101
  } else if let Ok(s) = Vec::<String>::try_convert(ob) {
92
- Ok(Wrap(NullValues::AllColumns(s)))
102
+ Ok(Wrap(NullValues::AllColumns(
103
+ s.into_iter().map(|x| (&*x).into()).collect(),
104
+ )))
93
105
  } else if let Ok(s) = Vec::<(String, String)>::try_convert(ob) {
94
- Ok(Wrap(NullValues::Named(s)))
106
+ Ok(Wrap(NullValues::Named(
107
+ s.into_iter()
108
+ .map(|(a, b)| ((&*a).into(), (&*b).into()))
109
+ .collect(),
110
+ )))
95
111
  } else {
96
- Err(RbPolarsErr::other(
97
- "could not extract value from null_values argument".into(),
98
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
99
116
  }
100
117
  }
101
118
  }
@@ -189,7 +206,7 @@ impl IntoValue for Wrap<DataType> {
189
206
  DataType::Datetime(tu, tz) => {
190
207
  let datetime_class = pl.const_get::<_, Value>("Datetime").unwrap();
191
208
  datetime_class
192
- .funcall::<_, _, Value>("new", (tu.to_ascii(), tz))
209
+ .funcall::<_, _, Value>("new", (tu.to_ascii(), tz.as_deref()))
193
210
  .unwrap()
194
211
  }
195
212
  DataType::Duration(tu) => {
@@ -210,7 +227,9 @@ impl IntoValue for Wrap<DataType> {
210
227
  // we should always have an initialized rev_map coming from rust
211
228
  let categories = rev_map.as_ref().unwrap().get_categories();
212
229
  let class = pl.const_get::<_, Value>("Enum").unwrap();
213
- let s = Series::from_arrow("category", categories.to_boxed()).unwrap();
230
+ let s =
231
+ Series::from_arrow(PlSmallStr::from_static("category"), categories.to_boxed())
232
+ .unwrap();
214
233
  let series = to_series(s.into());
215
234
  class.funcall::<_, _, Value>("new", (series,)).unwrap()
216
235
  }
@@ -222,7 +241,7 @@ impl IntoValue for Wrap<DataType> {
222
241
  let field_class = pl.const_get::<_, Value>("Field").unwrap();
223
242
  let iter = fields.iter().map(|fld| {
224
243
  let name = fld.name().as_str();
225
- let dtype = Wrap(fld.data_type().clone());
244
+ let dtype = Wrap(fld.dtype().clone());
226
245
  field_class
227
246
  .funcall::<_, _, Value>("new", (name, dtype))
228
247
  .unwrap()
@@ -276,7 +295,7 @@ impl TryConvert for Wrap<Field> {
276
295
  fn try_convert(ob: Value) -> RbResult<Self> {
277
296
  let name: String = ob.funcall("name", ())?;
278
297
  let dtype: Wrap<DataType> = ob.funcall("dtype", ())?;
279
- Ok(Wrap(Field::new(&name, dtype.0)))
298
+ Ok(Wrap(Field::new((&*name).into(), dtype.0)))
280
299
  }
281
300
  }
282
301
 
@@ -315,7 +334,6 @@ impl TryConvert for Wrap<DataType> {
315
334
  )))
316
335
  }
317
336
  }
318
- // TODO improve
319
337
  } else if String::try_convert(ob).is_err() {
320
338
  let name = unsafe { ob.class().name() }.into_owned();
321
339
  match name.as_str() {
@@ -341,7 +359,7 @@ impl TryConvert for Wrap<DataType> {
341
359
  let s = get_series(categories)?;
342
360
  let ca = s.str().map_err(RbPolarsErr::from)?;
343
361
  let categories = ca.downcast_iter().next().unwrap().clone();
344
- create_enum_data_type(categories)
362
+ create_enum_dtype(categories)
345
363
  }
346
364
  "Polars::Date" => DataType::Date,
347
365
  "Polars::Time" => DataType::Time,
@@ -357,8 +375,8 @@ impl TryConvert for Wrap<DataType> {
357
375
  "Polars::Datetime" => {
358
376
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
359
377
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
360
- let time_zone = ob.funcall("time_zone", ())?;
361
- DataType::Datetime(time_unit, time_zone)
378
+ let time_zone: Option<String> = ob.funcall("time_zone", ())?;
379
+ DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
362
380
  }
363
381
  "Polars::Decimal" => {
364
382
  let precision = ob.funcall("precision", ())?;
@@ -421,6 +439,8 @@ impl TryConvert for Wrap<DataType> {
421
439
  }
422
440
  }
423
441
 
442
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
443
+
424
444
  impl TryConvert for Wrap<StatisticsOptions> {
425
445
  fn try_convert(ob: Value) -> RbResult<Self> {
426
446
  let mut statistics = StatisticsOptions::empty();
@@ -439,8 +459,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
439
459
  }
440
460
  }
441
461
  Ok(ForEach::Continue)
442
- })
443
- .unwrap();
462
+ })?;
444
463
 
445
464
  Ok(Wrap(statistics))
446
465
  }
@@ -463,15 +482,77 @@ impl TryConvert for Wrap<Schema> {
463
482
 
464
483
  let mut schema = Vec::new();
465
484
  dict.foreach(|key: String, val: Wrap<DataType>| {
466
- schema.push(Ok(Field::new(&key, val.0)));
485
+ schema.push(Ok(Field::new((&*key).into(), val.0)));
467
486
  Ok(ForEach::Continue)
468
- })
469
- .unwrap();
487
+ })?;
470
488
 
471
489
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
472
490
  }
473
491
  }
474
492
 
493
+ impl TryConvert for Wrap<ScanSources> {
494
+ fn try_convert(ob: Value) -> RbResult<Self> {
495
+ let list = RArray::try_convert(ob)?;
496
+
497
+ if list.is_empty() {
498
+ return Ok(Wrap(ScanSources::default()));
499
+ }
500
+
501
+ enum MutableSources {
502
+ Paths(Vec<PathBuf>),
503
+ Files(Vec<File>),
504
+ Buffers(Vec<bytes::Bytes>),
505
+ }
506
+
507
+ let num_items = list.len();
508
+ let mut iter = list
509
+ .into_iter()
510
+ .map(|val| get_ruby_scan_source_input(val, false));
511
+
512
+ let Some(first) = iter.next() else {
513
+ return Ok(Wrap(ScanSources::default()));
514
+ };
515
+
516
+ let mut sources = match first? {
517
+ RubyScanSourceInput::Path(path) => {
518
+ let mut sources = Vec::with_capacity(num_items);
519
+ sources.push(path);
520
+ MutableSources::Paths(sources)
521
+ }
522
+ RubyScanSourceInput::File(file) => {
523
+ let mut sources = Vec::with_capacity(num_items);
524
+ sources.push(file);
525
+ MutableSources::Files(sources)
526
+ }
527
+ RubyScanSourceInput::Buffer(buffer) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(buffer);
530
+ MutableSources::Buffers(sources)
531
+ }
532
+ };
533
+
534
+ for source in iter {
535
+ match (&mut sources, source?) {
536
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
537
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
538
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
539
+ _ => {
540
+ return Err(RbTypeError::new_err(
541
+ "Cannot combine in-memory bytes, paths and files for scan sources"
542
+ .to_string(),
543
+ ))
544
+ }
545
+ }
546
+ }
547
+
548
+ Ok(Wrap(match sources {
549
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
550
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
551
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
552
+ }))
553
+ }
554
+ }
555
+
475
556
  #[derive(Clone)]
476
557
  pub struct ObjectValue {
477
558
  pub inner: Opaque<Value>,
@@ -480,7 +561,7 @@ pub struct ObjectValue {
480
561
  impl Debug for ObjectValue {
481
562
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
482
563
  f.debug_struct("ObjectValue")
483
- .field("inner", &self.to_object())
564
+ .field("inner", &self.to_value())
484
565
  .finish()
485
566
  }
486
567
  }
@@ -488,7 +569,7 @@ impl Debug for ObjectValue {
488
569
  impl Hash for ObjectValue {
489
570
  fn hash<H: Hasher>(&self, state: &mut H) {
490
571
  let h = self
491
- .to_object()
572
+ .to_value()
492
573
  .funcall::<_, _, isize>("hash", ())
493
574
  .expect("should be hashable");
494
575
  state.write_isize(h)
@@ -499,7 +580,7 @@ impl Eq for ObjectValue {}
499
580
 
500
581
  impl PartialEq for ObjectValue {
501
582
  fn eq(&self, other: &Self) -> bool {
502
- self.to_object().eql(other.to_object()).unwrap_or(false)
583
+ self.to_value().eql(other.to_value()).unwrap_or(false)
503
584
  }
504
585
  }
505
586
 
@@ -520,7 +601,7 @@ impl TotalHash for ObjectValue {
520
601
 
521
602
  impl Display for ObjectValue {
522
603
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
523
- write!(f, "{}", self.to_object())
604
+ write!(f, "{}", self.to_value())
524
605
  }
525
606
  }
526
607
 
@@ -548,16 +629,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
548
629
  }
549
630
  }
550
631
 
551
- // TODO remove
552
632
  impl ObjectValue {
553
- pub fn to_object(&self) -> Value {
554
- Ruby::get().unwrap().get_inner(self.inner)
633
+ pub fn to_value(&self) -> Value {
634
+ self.clone().into_value()
555
635
  }
556
636
  }
557
637
 
558
638
  impl IntoValue for ObjectValue {
559
- fn into_value_with(self, _: &Ruby) -> Value {
560
- self.to_object()
639
+ fn into_value_with(self, ruby: &Ruby) -> Value {
640
+ ruby.get_inner(self.inner)
561
641
  }
562
642
  }
563
643
 
@@ -574,10 +654,10 @@ impl TryConvert for Wrap<AsofStrategy> {
574
654
  let parsed = match String::try_convert(ob)?.as_str() {
575
655
  "backward" => AsofStrategy::Backward,
576
656
  "forward" => AsofStrategy::Forward,
657
+ "nearest" => AsofStrategy::Nearest,
577
658
  v => {
578
659
  return Err(RbValueError::new_err(format!(
579
- "strategy must be one of {{'backward', 'forward'}}, got {}",
580
- v
660
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
581
661
  )))
582
662
  }
583
663
  };
@@ -817,14 +897,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
817
897
  }
818
898
  }
819
899
 
820
- impl TryConvert for Wrap<QuantileInterpolOptions> {
900
+ impl TryConvert for Wrap<QuantileMethod> {
821
901
  fn try_convert(ob: Value) -> RbResult<Self> {
822
902
  let parsed = match String::try_convert(ob)?.as_str() {
823
- "lower" => QuantileInterpolOptions::Lower,
824
- "higher" => QuantileInterpolOptions::Higher,
825
- "nearest" => QuantileInterpolOptions::Nearest,
826
- "linear" => QuantileInterpolOptions::Linear,
827
- "midpoint" => QuantileInterpolOptions::Midpoint,
903
+ "lower" => QuantileMethod::Lower,
904
+ "higher" => QuantileMethod::Higher,
905
+ "nearest" => QuantileMethod::Nearest,
906
+ "linear" => QuantileMethod::Linear,
907
+ "midpoint" => QuantileMethod::Midpoint,
828
908
  v => {
829
909
  return Err(RbValueError::new_err(format!(
830
910
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -988,6 +1068,11 @@ impl TryConvert for Wrap<QuoteStyle> {
988
1068
  }
989
1069
  }
990
1070
 
1071
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1072
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1073
+ Ok(out)
1074
+ }
1075
+
991
1076
  pub fn parse_fill_null_strategy(
992
1077
  strategy: &str,
993
1078
  limit: FillNullLimit,
@@ -1053,19 +1138,47 @@ pub fn parse_parquet_compression(
1053
1138
  Ok(parsed)
1054
1139
  }
1055
1140
 
1056
- pub(crate) fn strings_to_smartstrings<I, S>(container: I) -> Vec<SmartString>
1141
+ impl TryConvert for Wrap<NonZeroUsize> {
1142
+ fn try_convert(ob: Value) -> RbResult<Self> {
1143
+ let v = usize::try_convert(ob)?;
1144
+ NonZeroUsize::new(v)
1145
+ .map(Wrap)
1146
+ .ok_or(RbValueError::new_err("must be non-zero"))
1147
+ }
1148
+ }
1149
+
1150
+ pub(crate) fn strings_to_pl_smallstr<I, S>(container: I) -> Vec<PlSmallStr>
1057
1151
  where
1058
1152
  I: IntoIterator<Item = S>,
1059
1153
  S: AsRef<str>,
1060
1154
  {
1061
- container.into_iter().map(|s| s.as_ref().into()).collect()
1155
+ container
1156
+ .into_iter()
1157
+ .map(|s| PlSmallStr::from_str(s.as_ref()))
1158
+ .collect()
1062
1159
  }
1063
1160
 
1064
- impl TryConvert for Wrap<NonZeroUsize> {
1161
+ #[derive(Debug, Copy, Clone)]
1162
+ pub struct RbCompatLevel(pub CompatLevel);
1163
+
1164
+ impl TryConvert for RbCompatLevel {
1065
1165
  fn try_convert(ob: Value) -> RbResult<Self> {
1066
- let v = usize::try_convert(ob)?;
1067
- NonZeroUsize::new(v)
1068
- .map(Wrap)
1069
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1166
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1167
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1168
+ compat_level
1169
+ } else {
1170
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1171
+ }
1172
+ } else if let Ok(future) = bool::try_convert(ob) {
1173
+ if future {
1174
+ CompatLevel::newest()
1175
+ } else {
1176
+ CompatLevel::oldest()
1177
+ }
1178
+ } else {
1179
+ return Err(RbTypeError::new_err(
1180
+ "'compat_level' argument accepts int or bool".to_string(),
1181
+ ));
1182
+ }))
1070
1183
  }
1071
1184
  }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -79,7 +76,7 @@ fn update_schema_from_rows(
79
76
  rows: &[Row],
80
77
  infer_schema_length: Option<usize>,
81
78
  ) -> RbResult<()> {
82
- let schema_is_complete = schema.iter_dtypes().all(|dtype| dtype.is_known());
79
+ let schema_is_complete = schema.iter_values().all(|dtype| dtype.is_known());
83
80
  if schema_is_complete {
84
81
  return Ok(());
85
82
  }
@@ -89,7 +86,7 @@ fn update_schema_from_rows(
89
86
  rows_to_supertypes(rows, infer_schema_length).map_err(RbPolarsErr::from)?;
90
87
  let inferred_dtypes_slice = inferred_dtypes.as_slice();
91
88
 
92
- for (i, dtype) in schema.iter_dtypes_mut().enumerate() {
89
+ for (i, dtype) in schema.iter_values_mut().enumerate() {
93
90
  if !dtype.is_known() {
94
91
  *dtype = inferred_dtypes_slice.get(i).ok_or_else(|| {
95
92
  polars_err!(SchemaMismatch: "the number of columns in the schema does not match the data")
@@ -110,7 +107,7 @@ fn resolve_schema_overrides(schema: &mut Schema, schema_overrides: Option<Schema
110
107
  }
111
108
 
112
109
  fn erase_decimal_precision_scale(schema: &mut Schema) {
113
- for dtype in schema.iter_dtypes_mut() {
110
+ for dtype in schema.iter_values_mut() {
114
111
  if let DataType::Decimal(_, _) = dtype {
115
112
  *dtype = DataType::Decimal(None, None)
116
113
  }
@@ -123,7 +120,7 @@ where
123
120
  {
124
121
  let fields = column_names
125
122
  .into_iter()
126
- .map(|c| Field::new(c, DataType::Unknown(Default::default())));
123
+ .map(|c| Field::new(c.into(), DataType::Unknown(Default::default())));
127
124
  Schema::from_iter(fields)
128
125
  }
129
126
 
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -9,14 +9,15 @@ use crate::map::dataframe::{
9
9
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
10
10
  apply_lambda_with_utf8_out_type,
11
11
  };
12
- use crate::series::{to_rbseries_collection, to_series_collection};
12
+ use crate::prelude::strings_to_pl_smallstr;
13
+ use crate::series::{to_rbseries, to_series};
13
14
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
14
15
 
15
16
  impl RbDataFrame {
16
17
  pub fn init(columns: RArray) -> RbResult<Self> {
17
18
  let mut cols = Vec::new();
18
19
  for i in columns.into_iter() {
19
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
20
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
20
21
  }
21
22
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
22
23
  Ok(RbDataFrame::new(df))
@@ -127,7 +128,7 @@ impl RbDataFrame {
127
128
 
128
129
  pub fn get_columns(&self) -> RArray {
129
130
  let cols = self.df.borrow().get_columns().to_vec();
130
- to_rbseries_collection(cols)
131
+ to_rbseries(cols)
131
132
  }
132
133
 
133
134
  pub fn columns(&self) -> Vec<String> {
@@ -173,7 +174,8 @@ impl RbDataFrame {
173
174
  }
174
175
 
175
176
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
176
- let columns = to_series_collection(columns)?;
177
+ let columns = to_series(columns)?;
178
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
177
179
  let df = self
178
180
  .df
179
181
  .borrow()
@@ -183,7 +185,8 @@ impl RbDataFrame {
183
185
  }
184
186
 
185
187
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
186
- let columns = to_series_collection(columns)?;
188
+ let columns = to_series(columns)?;
189
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
187
190
  self.df
188
191
  .borrow_mut()
189
192
  .hstack_mut(&columns)
@@ -222,6 +225,7 @@ impl RbDataFrame {
222
225
  .borrow_mut()
223
226
  .drop_in_place(&name)
224
227
  .map_err(RbPolarsErr::from)?;
228
+ let s = s.take_materialized_series();
225
229
  Ok(RbSeries::new(s))
226
230
  }
227
231
 
@@ -229,7 +233,7 @@ impl RbDataFrame {
229
233
  self.df
230
234
  .borrow()
231
235
  .select_at_idx(idx)
232
- .map(|s| RbSeries::new(s.clone()))
236
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
233
237
  }
234
238
 
235
239
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -237,11 +241,13 @@ impl RbDataFrame {
237
241
  }
238
242
 
239
243
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
240
- self.df
244
+ let series = self
245
+ .df
241
246
  .borrow()
242
247
  .column(&name)
243
- .map(|s| RbSeries::new(s.clone()))
244
- .map_err(RbPolarsErr::from)
248
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
249
+ .map_err(RbPolarsErr::from)?;
250
+ Ok(series)
245
251
  }
246
252
 
247
253
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -254,7 +260,7 @@ impl RbDataFrame {
254
260
  }
255
261
 
256
262
  pub fn gather(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
257
- let indices = IdxCa::from_vec("", indices);
263
+ let indices = IdxCa::from_vec("".into(), indices);
258
264
  let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
259
265
  Ok(RbDataFrame::new(df))
260
266
  }
@@ -332,7 +338,7 @@ impl RbDataFrame {
332
338
  let df = self
333
339
  .df
334
340
  .borrow()
335
- .with_row_index(&name, offset)
341
+ .with_row_index(name.into(), offset)
336
342
  .map_err(RbPolarsErr::from)?;
337
343
  Ok(df.into())
338
344
  }
@@ -349,8 +355,8 @@ impl RbDataFrame {
349
355
  variable_name: Option<String>,
350
356
  ) -> RbResult<Self> {
351
357
  let args = UnpivotArgsIR {
352
- on: strings_to_smartstrings(on),
353
- index: strings_to_smartstrings(index),
358
+ on: strings_to_pl_smallstr(on),
359
+ index: strings_to_pl_smallstr(index),
354
360
  value_name: value_name.map(|s| s.into()),
355
361
  variable_name: variable_name.map(|s| s.into()),
356
362
  };
@@ -410,7 +416,7 @@ impl RbDataFrame {
410
416
  .borrow()
411
417
  .max_horizontal()
412
418
  .map_err(RbPolarsErr::from)?;
413
- Ok(s.map(|s| s.into()))
419
+ Ok(s.map(|s| s.take_materialized_series().into()))
414
420
  }
415
421
 
416
422
  pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
@@ -419,7 +425,7 @@ impl RbDataFrame {
419
425
  .borrow()
420
426
  .min_horizontal()
421
427
  .map_err(RbPolarsErr::from)?;
422
- Ok(s.map(|s| s.into()))
428
+ Ok(s.map(|s| s.take_materialized_series().into()))
423
429
  }
424
430
 
425
431
  pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
@@ -581,7 +587,7 @@ impl RbDataFrame {
581
587
  }
582
588
 
583
589
  pub fn to_struct(&self, name: String) -> RbSeries {
584
- let s = self.df.borrow().clone().into_struct(&name);
590
+ let s = self.df.borrow().clone().into_struct(name.into());
585
591
  s.into_series().into()
586
592
  }
587
593