polars-df 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -2,25 +2,30 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
14
16
  use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
- use polars::frame::NullStrategy;
17
18
  use polars::io::avro::AvroCompression;
19
+ use polars::io::cloud::CloudOptions;
18
20
  use polars::prelude::*;
19
21
  use polars::series::ops::NullBehavior;
20
22
  use polars_core::utils::arrow::array::Array;
21
23
  use polars_core::utils::materialize_dyn_int;
24
+ use polars_plan::plans::ScanSources;
25
+ use polars_utils::mmap::MemSlice;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
24
29
  use crate::object::OBJECT_NAME;
25
30
  use crate::rb_modules::series;
26
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
104
109
  .collect(),
105
110
  )))
106
111
  } else {
107
- Err(RbPolarsErr::other(
108
- "could not extract value from null_values argument".into(),
109
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
110
116
  }
111
117
  }
112
118
  }
@@ -298,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
298
304
  let dtype = if ob.is_kind_of(class::class()) {
299
305
  let name = ob.funcall::<_, _, String>("name", ())?;
300
306
  match name.as_str() {
301
- "Polars::UInt8" => DataType::UInt8,
302
- "Polars::UInt16" => DataType::UInt16,
303
- "Polars::UInt32" => DataType::UInt32,
304
- "Polars::UInt64" => DataType::UInt64,
305
307
  "Polars::Int8" => DataType::Int8,
306
308
  "Polars::Int16" => DataType::Int16,
307
309
  "Polars::Int32" => DataType::Int32,
308
310
  "Polars::Int64" => DataType::Int64,
311
+ "Polars::UInt8" => DataType::UInt8,
312
+ "Polars::UInt16" => DataType::UInt16,
313
+ "Polars::UInt32" => DataType::UInt32,
314
+ "Polars::UInt64" => DataType::UInt64,
315
+ "Polars::Float32" => DataType::Float32,
316
+ "Polars::Float64" => DataType::Float64,
317
+ "Polars::Boolean" => DataType::Boolean,
309
318
  "Polars::String" => DataType::String,
310
319
  "Polars::Binary" => DataType::Binary,
311
- "Polars::Boolean" => DataType::Boolean,
312
320
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
313
321
  "Polars::Enum" => DataType::Enum(None, Default::default()),
314
322
  "Polars::Date" => DataType::Date,
315
- "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
316
323
  "Polars::Time" => DataType::Time,
324
+ "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
317
325
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
318
326
  "Polars::Decimal" => DataType::Decimal(None, None),
319
- "Polars::Float32" => DataType::Float32,
320
- "Polars::Float64" => DataType::Float64,
321
- "Polars::Object" => DataType::Object(OBJECT_NAME, None),
322
327
  "Polars::List" => DataType::List(Box::new(DataType::Null)),
328
+ "Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
329
+ "Polars::Struct" => DataType::Struct(vec![]),
323
330
  "Polars::Null" => DataType::Null,
331
+ "Polars::Object" => DataType::Object(OBJECT_NAME, None),
324
332
  "Polars::Unknown" => DataType::Unknown(Default::default()),
325
333
  dt => {
326
334
  return Err(RbValueError::new_err(format!(
@@ -328,7 +336,6 @@ impl TryConvert for Wrap<DataType> {
328
336
  )))
329
337
  }
330
338
  }
331
- // TODO improve
332
339
  } else if String::try_convert(ob).is_err() {
333
340
  let name = unsafe { ob.class().name() }.into_owned();
334
341
  match name.as_str() {
@@ -340,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
340
347
  "Polars::UInt16" => DataType::UInt16,
341
348
  "Polars::UInt32" => DataType::UInt32,
342
349
  "Polars::UInt64" => DataType::UInt64,
350
+ "Polars::Float32" => DataType::Float32,
351
+ "Polars::Float64" => DataType::Float64,
352
+ "Polars::Boolean" => DataType::Boolean,
343
353
  "Polars::String" => DataType::String,
344
354
  "Polars::Binary" => DataType::Binary,
345
- "Polars::Boolean" => DataType::Boolean,
346
355
  "Polars::Categorical" => {
347
356
  let ordering = ob
348
357
  .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
@@ -358,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
358
367
  }
359
368
  "Polars::Date" => DataType::Date,
360
369
  "Polars::Time" => DataType::Time,
361
- "Polars::Float32" => DataType::Float32,
362
- "Polars::Float64" => DataType::Float64,
363
- "Polars::Null" => DataType::Null,
364
- "Polars::Unknown" => DataType::Unknown(Default::default()),
365
- "Polars::Duration" => {
366
- let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
367
- let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
368
- DataType::Duration(time_unit)
369
- }
370
370
  "Polars::Datetime" => {
371
371
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
372
372
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
373
373
  let time_zone: Option<String> = ob.funcall("time_zone", ())?;
374
374
  DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
375
375
  }
376
+ "Polars::Duration" => {
377
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
378
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
379
+ DataType::Duration(time_unit)
380
+ }
376
381
  "Polars::Decimal" => {
377
382
  let precision = ob.funcall("precision", ())?;
378
383
  let scale = ob.funcall("scale", ())?;
@@ -383,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
383
388
  let inner = Wrap::<DataType>::try_convert(inner)?;
384
389
  DataType::List(Box::new(inner.0))
385
390
  }
391
+ "Polars::Array" => {
392
+ let inner: Value = ob.funcall("inner", ()).unwrap();
393
+ let size: Value = ob.funcall("size", ()).unwrap();
394
+ let inner = Wrap::<DataType>::try_convert(inner)?;
395
+ let size = usize::try_convert(size)?;
396
+ DataType::Array(Box::new(inner.0), size)
397
+ }
386
398
  "Polars::Struct" => {
387
399
  let arr: RArray = ob.funcall("fields", ())?;
388
400
  let mut fields = Vec::with_capacity(arr.len());
@@ -391,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
391
403
  }
392
404
  DataType::Struct(fields)
393
405
  }
406
+ "Polars::Null" => DataType::Null,
407
+ "Object" => DataType::Object(OBJECT_NAME, None),
408
+ "Polars::Unknown" => DataType::Unknown(Default::default()),
394
409
  dt => {
395
410
  return Err(RbTypeError::new_err(format!(
396
411
  "A {dt} object is not a correct polars DataType. \
@@ -434,6 +449,8 @@ impl TryConvert for Wrap<DataType> {
434
449
  }
435
450
  }
436
451
 
452
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
453
+
437
454
  impl TryConvert for Wrap<StatisticsOptions> {
438
455
  fn try_convert(ob: Value) -> RbResult<Self> {
439
456
  let mut statistics = StatisticsOptions::empty();
@@ -452,8 +469,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
452
469
  }
453
470
  }
454
471
  Ok(ForEach::Continue)
455
- })
456
- .unwrap();
472
+ })?;
457
473
 
458
474
  Ok(Wrap(statistics))
459
475
  }
@@ -478,13 +494,75 @@ impl TryConvert for Wrap<Schema> {
478
494
  dict.foreach(|key: String, val: Wrap<DataType>| {
479
495
  schema.push(Ok(Field::new((&*key).into(), val.0)));
480
496
  Ok(ForEach::Continue)
481
- })
482
- .unwrap();
497
+ })?;
483
498
 
484
499
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
485
500
  }
486
501
  }
487
502
 
503
+ impl TryConvert for Wrap<ScanSources> {
504
+ fn try_convert(ob: Value) -> RbResult<Self> {
505
+ let list = RArray::try_convert(ob)?;
506
+
507
+ if list.is_empty() {
508
+ return Ok(Wrap(ScanSources::default()));
509
+ }
510
+
511
+ enum MutableSources {
512
+ Paths(Vec<PathBuf>),
513
+ Files(Vec<File>),
514
+ Buffers(Vec<MemSlice>),
515
+ }
516
+
517
+ let num_items = list.len();
518
+ let mut iter = list
519
+ .into_iter()
520
+ .map(|val| get_ruby_scan_source_input(val, false));
521
+
522
+ let Some(first) = iter.next() else {
523
+ return Ok(Wrap(ScanSources::default()));
524
+ };
525
+
526
+ let mut sources = match first? {
527
+ RubyScanSourceInput::Path(path) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(path);
530
+ MutableSources::Paths(sources)
531
+ }
532
+ RubyScanSourceInput::File(file) => {
533
+ let mut sources = Vec::with_capacity(num_items);
534
+ sources.push(file);
535
+ MutableSources::Files(sources)
536
+ }
537
+ RubyScanSourceInput::Buffer(buffer) => {
538
+ let mut sources = Vec::with_capacity(num_items);
539
+ sources.push(buffer);
540
+ MutableSources::Buffers(sources)
541
+ }
542
+ };
543
+
544
+ for source in iter {
545
+ match (&mut sources, source?) {
546
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
547
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
548
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
549
+ _ => {
550
+ return Err(RbTypeError::new_err(
551
+ "Cannot combine in-memory bytes, paths and files for scan sources"
552
+ .to_string(),
553
+ ))
554
+ }
555
+ }
556
+ }
557
+
558
+ Ok(Wrap(match sources {
559
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
560
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
561
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
562
+ }))
563
+ }
564
+ }
565
+
488
566
  #[derive(Clone)]
489
567
  pub struct ObjectValue {
490
568
  pub inner: Opaque<Value>,
@@ -493,7 +571,7 @@ pub struct ObjectValue {
493
571
  impl Debug for ObjectValue {
494
572
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
495
573
  f.debug_struct("ObjectValue")
496
- .field("inner", &self.to_object())
574
+ .field("inner", &self.to_value())
497
575
  .finish()
498
576
  }
499
577
  }
@@ -501,7 +579,7 @@ impl Debug for ObjectValue {
501
579
  impl Hash for ObjectValue {
502
580
  fn hash<H: Hasher>(&self, state: &mut H) {
503
581
  let h = self
504
- .to_object()
582
+ .to_value()
505
583
  .funcall::<_, _, isize>("hash", ())
506
584
  .expect("should be hashable");
507
585
  state.write_isize(h)
@@ -512,7 +590,7 @@ impl Eq for ObjectValue {}
512
590
 
513
591
  impl PartialEq for ObjectValue {
514
592
  fn eq(&self, other: &Self) -> bool {
515
- self.to_object().eql(other.to_object()).unwrap_or(false)
593
+ self.to_value().eql(other.to_value()).unwrap_or(false)
516
594
  }
517
595
  }
518
596
 
@@ -533,7 +611,7 @@ impl TotalHash for ObjectValue {
533
611
 
534
612
  impl Display for ObjectValue {
535
613
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
536
- write!(f, "{}", self.to_object())
614
+ write!(f, "{}", self.to_value())
537
615
  }
538
616
  }
539
617
 
@@ -561,16 +639,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
561
639
  }
562
640
  }
563
641
 
564
- // TODO remove
565
642
  impl ObjectValue {
566
- pub fn to_object(&self) -> Value {
567
- Ruby::get().unwrap().get_inner(self.inner)
643
+ pub fn to_value(&self) -> Value {
644
+ self.clone().into_value()
568
645
  }
569
646
  }
570
647
 
571
648
  impl IntoValue for ObjectValue {
572
- fn into_value_with(self, _: &Ruby) -> Value {
573
- self.to_object()
649
+ fn into_value_with(self, ruby: &Ruby) -> Value {
650
+ ruby.get_inner(self.inner)
574
651
  }
575
652
  }
576
653
 
@@ -587,10 +664,10 @@ impl TryConvert for Wrap<AsofStrategy> {
587
664
  let parsed = match String::try_convert(ob)?.as_str() {
588
665
  "backward" => AsofStrategy::Backward,
589
666
  "forward" => AsofStrategy::Forward,
667
+ "nearest" => AsofStrategy::Nearest,
590
668
  v => {
591
669
  return Err(RbValueError::new_err(format!(
592
- "strategy must be one of {{'backward', 'forward'}}, got {}",
593
- v
670
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
594
671
  )))
595
672
  }
596
673
  };
@@ -830,14 +907,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
830
907
  }
831
908
  }
832
909
 
833
- impl TryConvert for Wrap<QuantileInterpolOptions> {
910
+ impl TryConvert for Wrap<QuantileMethod> {
834
911
  fn try_convert(ob: Value) -> RbResult<Self> {
835
912
  let parsed = match String::try_convert(ob)?.as_str() {
836
- "lower" => QuantileInterpolOptions::Lower,
837
- "higher" => QuantileInterpolOptions::Higher,
838
- "nearest" => QuantileInterpolOptions::Nearest,
839
- "linear" => QuantileInterpolOptions::Linear,
840
- "midpoint" => QuantileInterpolOptions::Midpoint,
913
+ "lower" => QuantileMethod::Lower,
914
+ "higher" => QuantileMethod::Higher,
915
+ "nearest" => QuantileMethod::Nearest,
916
+ "linear" => QuantileMethod::Linear,
917
+ "midpoint" => QuantileMethod::Midpoint,
841
918
  v => {
842
919
  return Err(RbValueError::new_err(format!(
843
920
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -1001,6 +1078,11 @@ impl TryConvert for Wrap<QuoteStyle> {
1001
1078
  }
1002
1079
  }
1003
1080
 
1081
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1082
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1083
+ Ok(out)
1084
+ }
1085
+
1004
1086
  pub fn parse_fill_null_strategy(
1005
1087
  strategy: &str,
1006
1088
  limit: FillNullLimit,
@@ -1071,7 +1153,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
1071
1153
  let v = usize::try_convert(ob)?;
1072
1154
  NonZeroUsize::new(v)
1073
1155
  .map(Wrap)
1074
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1156
+ .ok_or(RbValueError::new_err("must be non-zero"))
1075
1157
  }
1076
1158
  }
1077
1159
 
@@ -1085,3 +1167,28 @@ where
1085
1167
  .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
1168
  .collect()
1087
1169
  }
1170
+
1171
+ #[derive(Debug, Copy, Clone)]
1172
+ pub struct RbCompatLevel(pub CompatLevel);
1173
+
1174
+ impl TryConvert for RbCompatLevel {
1175
+ fn try_convert(ob: Value) -> RbResult<Self> {
1176
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1177
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1178
+ compat_level
1179
+ } else {
1180
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1181
+ }
1182
+ } else if let Ok(future) = bool::try_convert(ob) {
1183
+ if future {
1184
+ CompatLevel::newest()
1185
+ } else {
1186
+ CompatLevel::oldest()
1187
+ }
1188
+ } else {
1189
+ return Err(RbTypeError::new_err(
1190
+ "'compat_level' argument accepts int or bool".to_string(),
1191
+ ));
1192
+ }))
1193
+ }
1194
+ }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -1,6 +1,5 @@
1
1
  use either::Either;
2
2
  use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
3
- use polars::frame::NullStrategy;
4
3
  use polars::prelude::pivot::{pivot, pivot_stable};
5
4
  use polars::prelude::*;
6
5
 
@@ -10,14 +9,14 @@ use crate::map::dataframe::{
10
9
  apply_lambda_with_utf8_out_type,
11
10
  };
12
11
  use crate::prelude::strings_to_pl_smallstr;
13
- use crate::series::{to_rbseries_collection, to_series_collection};
12
+ use crate::series::{to_rbseries, to_series};
14
13
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
15
14
 
16
15
  impl RbDataFrame {
17
16
  pub fn init(columns: RArray) -> RbResult<Self> {
18
17
  let mut cols = Vec::new();
19
18
  for i in columns.into_iter() {
20
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
19
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
21
20
  }
22
21
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
23
22
  Ok(RbDataFrame::new(df))
@@ -128,7 +127,7 @@ impl RbDataFrame {
128
127
 
129
128
  pub fn get_columns(&self) -> RArray {
130
129
  let cols = self.df.borrow().get_columns().to_vec();
131
- to_rbseries_collection(cols)
130
+ to_rbseries(cols)
132
131
  }
133
132
 
134
133
  pub fn columns(&self) -> Vec<String> {
@@ -158,7 +157,7 @@ impl RbDataFrame {
158
157
  }
159
158
 
160
159
  pub fn n_chunks(&self) -> usize {
161
- self.df.borrow().n_chunks()
160
+ self.df.borrow().first_col_n_chunks()
162
161
  }
163
162
 
164
163
  pub fn shape(&self) -> (usize, usize) {
@@ -174,7 +173,8 @@ impl RbDataFrame {
174
173
  }
175
174
 
176
175
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
177
- let columns = to_series_collection(columns)?;
176
+ let columns = to_series(columns)?;
177
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
178
178
  let df = self
179
179
  .df
180
180
  .borrow()
@@ -184,7 +184,8 @@ impl RbDataFrame {
184
184
  }
185
185
 
186
186
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
187
- let columns = to_series_collection(columns)?;
187
+ let columns = to_series(columns)?;
188
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
188
189
  self.df
189
190
  .borrow_mut()
190
191
  .hstack_mut(&columns)
@@ -223,6 +224,7 @@ impl RbDataFrame {
223
224
  .borrow_mut()
224
225
  .drop_in_place(&name)
225
226
  .map_err(RbPolarsErr::from)?;
227
+ let s = s.take_materialized_series();
226
228
  Ok(RbSeries::new(s))
227
229
  }
228
230
 
@@ -230,7 +232,7 @@ impl RbDataFrame {
230
232
  self.df
231
233
  .borrow()
232
234
  .select_at_idx(idx)
233
- .map(|s| RbSeries::new(s.clone()))
235
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
234
236
  }
235
237
 
236
238
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -238,11 +240,13 @@ impl RbDataFrame {
238
240
  }
239
241
 
240
242
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
241
- self.df
243
+ let series = self
244
+ .df
242
245
  .borrow()
243
246
  .column(&name)
244
- .map(|s| RbSeries::new(s.clone()))
245
- .map_err(RbPolarsErr::from)
247
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
248
+ .map_err(RbPolarsErr::from)?;
249
+ Ok(series)
246
250
  }
247
251
 
248
252
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -405,52 +409,6 @@ impl RbDataFrame {
405
409
  self.df.borrow().clone().lazy().into()
406
410
  }
407
411
 
408
- pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
409
- let s = self
410
- .df
411
- .borrow()
412
- .max_horizontal()
413
- .map_err(RbPolarsErr::from)?;
414
- Ok(s.map(|s| s.into()))
415
- }
416
-
417
- pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
418
- let s = self
419
- .df
420
- .borrow()
421
- .min_horizontal()
422
- .map_err(RbPolarsErr::from)?;
423
- Ok(s.map(|s| s.into()))
424
- }
425
-
426
- pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
427
- let null_strategy = if ignore_nulls {
428
- NullStrategy::Ignore
429
- } else {
430
- NullStrategy::Propagate
431
- };
432
- let s = self
433
- .df
434
- .borrow()
435
- .sum_horizontal(null_strategy)
436
- .map_err(RbPolarsErr::from)?;
437
- Ok(s.map(|s| s.into()))
438
- }
439
-
440
- pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
441
- let null_strategy = if ignore_nulls {
442
- NullStrategy::Ignore
443
- } else {
444
- NullStrategy::Propagate
445
- };
446
- let s = self
447
- .df
448
- .borrow()
449
- .mean_horizontal(null_strategy)
450
- .map_err(RbPolarsErr::from)?;
451
- Ok(s.map(|s| s.into()))
452
- }
453
-
454
412
  pub fn to_dummies(
455
413
  &self,
456
414
  columns: Option<Vec<String>>,