polars-df 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +35 -0
  3. data/Cargo.lock +1523 -378
  4. data/LICENSE.txt +1 -0
  5. data/README.md +38 -4
  6. data/ext/polars/Cargo.toml +15 -5
  7. data/ext/polars/src/batched_csv.rs +7 -10
  8. data/ext/polars/src/conversion/any_value.rs +31 -21
  9. data/ext/polars/src/conversion/mod.rs +155 -48
  10. data/ext/polars/src/dataframe/construction.rs +0 -3
  11. data/ext/polars/src/dataframe/export.rs +9 -2
  12. data/ext/polars/src/dataframe/general.rs +15 -57
  13. data/ext/polars/src/dataframe/io.rs +77 -169
  14. data/ext/polars/src/dataframe/mod.rs +1 -0
  15. data/ext/polars/src/dataframe/serde.rs +15 -0
  16. data/ext/polars/src/error.rs +31 -48
  17. data/ext/polars/src/exceptions.rs +24 -0
  18. data/ext/polars/src/expr/binary.rs +4 -42
  19. data/ext/polars/src/expr/datetime.rs +5 -4
  20. data/ext/polars/src/expr/general.rs +16 -22
  21. data/ext/polars/src/expr/list.rs +18 -11
  22. data/ext/polars/src/expr/meta.rs +6 -2
  23. data/ext/polars/src/expr/rolling.rs +6 -7
  24. data/ext/polars/src/expr/string.rs +9 -36
  25. data/ext/polars/src/file.rs +78 -23
  26. data/ext/polars/src/functions/aggregation.rs +4 -4
  27. data/ext/polars/src/functions/business.rs +15 -0
  28. data/ext/polars/src/functions/io.rs +34 -13
  29. data/ext/polars/src/functions/lazy.rs +22 -12
  30. data/ext/polars/src/functions/meta.rs +1 -1
  31. data/ext/polars/src/functions/mod.rs +1 -0
  32. data/ext/polars/src/interop/arrow/mod.rs +1 -0
  33. data/ext/polars/src/interop/arrow/to_ruby.rs +83 -0
  34. data/ext/polars/src/interop/mod.rs +1 -0
  35. data/ext/polars/src/lazyframe/general.rs +920 -0
  36. data/ext/polars/src/lazyframe/mod.rs +3 -827
  37. data/ext/polars/src/lazyframe/serde.rs +31 -0
  38. data/ext/polars/src/lib.rs +54 -27
  39. data/ext/polars/src/map/dataframe.rs +10 -6
  40. data/ext/polars/src/map/lazy.rs +65 -4
  41. data/ext/polars/src/map/mod.rs +9 -8
  42. data/ext/polars/src/on_startup.rs +1 -1
  43. data/ext/polars/src/series/aggregation.rs +1 -5
  44. data/ext/polars/src/series/arithmetic.rs +10 -10
  45. data/ext/polars/src/series/construction.rs +2 -2
  46. data/ext/polars/src/series/export.rs +1 -1
  47. data/ext/polars/src/series/general.rs +631 -0
  48. data/ext/polars/src/series/import.rs +55 -0
  49. data/ext/polars/src/series/mod.rs +11 -638
  50. data/ext/polars/src/series/scatter.rs +2 -2
  51. data/ext/polars/src/utils.rs +0 -20
  52. data/lib/polars/batched_csv_reader.rb +0 -2
  53. data/lib/polars/binary_expr.rb +133 -9
  54. data/lib/polars/binary_name_space.rb +101 -6
  55. data/lib/polars/config.rb +4 -0
  56. data/lib/polars/data_frame.rb +452 -101
  57. data/lib/polars/data_type_group.rb +28 -0
  58. data/lib/polars/data_types.rb +3 -1
  59. data/lib/polars/date_time_expr.rb +244 -0
  60. data/lib/polars/date_time_name_space.rb +87 -0
  61. data/lib/polars/expr.rb +103 -2
  62. data/lib/polars/functions/aggregation/horizontal.rb +10 -4
  63. data/lib/polars/functions/as_datatype.rb +51 -2
  64. data/lib/polars/functions/col.rb +1 -1
  65. data/lib/polars/functions/eager.rb +1 -3
  66. data/lib/polars/functions/lazy.rb +95 -13
  67. data/lib/polars/functions/range/time_range.rb +21 -21
  68. data/lib/polars/io/csv.rb +14 -16
  69. data/lib/polars/io/database.rb +2 -2
  70. data/lib/polars/io/delta.rb +126 -0
  71. data/lib/polars/io/ipc.rb +14 -4
  72. data/lib/polars/io/ndjson.rb +10 -0
  73. data/lib/polars/io/parquet.rb +168 -111
  74. data/lib/polars/lazy_frame.rb +684 -20
  75. data/lib/polars/list_name_space.rb +169 -0
  76. data/lib/polars/selectors.rb +1226 -0
  77. data/lib/polars/series.rb +465 -35
  78. data/lib/polars/string_cache.rb +27 -1
  79. data/lib/polars/string_expr.rb +0 -1
  80. data/lib/polars/string_name_space.rb +73 -3
  81. data/lib/polars/struct_name_space.rb +31 -7
  82. data/lib/polars/utils/various.rb +5 -1
  83. data/lib/polars/utils.rb +45 -10
  84. data/lib/polars/version.rb +1 -1
  85. data/lib/polars.rb +17 -1
  86. metadata +16 -9
  87. data/lib/polars/functions.rb +0 -57
@@ -2,25 +2,30 @@ pub(crate) mod any_value;
2
2
  mod chunked_array;
3
3
 
4
4
  use std::fmt::{Debug, Display, Formatter};
5
+ use std::fs::File;
5
6
  use std::hash::{Hash, Hasher};
6
7
  use std::num::NonZeroUsize;
8
+ use std::path::PathBuf;
7
9
 
8
10
  use magnus::{
9
- class, exception, prelude::*, r_hash::ForEach, value::Opaque, IntoValue, Module, RArray, RHash,
10
- Ruby, Symbol, TryConvert, Value,
11
+ class, exception, prelude::*, r_hash::ForEach, try_convert::TryConvertOwned, value::Opaque,
12
+ IntoValue, Module, RArray, RHash, Ruby, Symbol, TryConvert, Value,
11
13
  };
12
14
  use polars::chunked_array::object::PolarsObjectSafe;
13
15
  use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy};
14
16
  use polars::datatypes::AnyValue;
15
17
  use polars::frame::row::Row;
16
- use polars::frame::NullStrategy;
17
18
  use polars::io::avro::AvroCompression;
19
+ use polars::io::cloud::CloudOptions;
18
20
  use polars::prelude::*;
19
21
  use polars::series::ops::NullBehavior;
20
22
  use polars_core::utils::arrow::array::Array;
21
23
  use polars_core::utils::materialize_dyn_int;
24
+ use polars_plan::plans::ScanSources;
25
+ use polars_utils::mmap::MemSlice;
22
26
  use polars_utils::total_ord::{TotalEq, TotalHash};
23
27
 
28
+ use crate::file::{get_ruby_scan_source_input, RubyScanSourceInput};
24
29
  use crate::object::OBJECT_NAME;
25
30
  use crate::rb_modules::series;
26
31
  use crate::{RbDataFrame, RbLazyFrame, RbPolarsErr, RbResult, RbSeries, RbTypeError, RbValueError};
@@ -104,9 +109,10 @@ impl TryConvert for Wrap<NullValues> {
104
109
  .collect(),
105
110
  )))
106
111
  } else {
107
- Err(RbPolarsErr::other(
108
- "could not extract value from null_values argument".into(),
109
- ))
112
+ Err(
113
+ RbPolarsErr::Other("could not extract value from null_values argument".into())
114
+ .into(),
115
+ )
110
116
  }
111
117
  }
112
118
  }
@@ -298,29 +304,31 @@ impl TryConvert for Wrap<DataType> {
298
304
  let dtype = if ob.is_kind_of(class::class()) {
299
305
  let name = ob.funcall::<_, _, String>("name", ())?;
300
306
  match name.as_str() {
301
- "Polars::UInt8" => DataType::UInt8,
302
- "Polars::UInt16" => DataType::UInt16,
303
- "Polars::UInt32" => DataType::UInt32,
304
- "Polars::UInt64" => DataType::UInt64,
305
307
  "Polars::Int8" => DataType::Int8,
306
308
  "Polars::Int16" => DataType::Int16,
307
309
  "Polars::Int32" => DataType::Int32,
308
310
  "Polars::Int64" => DataType::Int64,
311
+ "Polars::UInt8" => DataType::UInt8,
312
+ "Polars::UInt16" => DataType::UInt16,
313
+ "Polars::UInt32" => DataType::UInt32,
314
+ "Polars::UInt64" => DataType::UInt64,
315
+ "Polars::Float32" => DataType::Float32,
316
+ "Polars::Float64" => DataType::Float64,
317
+ "Polars::Boolean" => DataType::Boolean,
309
318
  "Polars::String" => DataType::String,
310
319
  "Polars::Binary" => DataType::Binary,
311
- "Polars::Boolean" => DataType::Boolean,
312
320
  "Polars::Categorical" => DataType::Categorical(None, Default::default()),
313
321
  "Polars::Enum" => DataType::Enum(None, Default::default()),
314
322
  "Polars::Date" => DataType::Date,
315
- "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
316
323
  "Polars::Time" => DataType::Time,
324
+ "Polars::Datetime" => DataType::Datetime(TimeUnit::Microseconds, None),
317
325
  "Polars::Duration" => DataType::Duration(TimeUnit::Microseconds),
318
326
  "Polars::Decimal" => DataType::Decimal(None, None),
319
- "Polars::Float32" => DataType::Float32,
320
- "Polars::Float64" => DataType::Float64,
321
- "Polars::Object" => DataType::Object(OBJECT_NAME, None),
322
327
  "Polars::List" => DataType::List(Box::new(DataType::Null)),
328
+ "Polars::Array" => DataType::Array(Box::new(DataType::Null), 0),
329
+ "Polars::Struct" => DataType::Struct(vec![]),
323
330
  "Polars::Null" => DataType::Null,
331
+ "Polars::Object" => DataType::Object(OBJECT_NAME, None),
324
332
  "Polars::Unknown" => DataType::Unknown(Default::default()),
325
333
  dt => {
326
334
  return Err(RbValueError::new_err(format!(
@@ -328,7 +336,6 @@ impl TryConvert for Wrap<DataType> {
328
336
  )))
329
337
  }
330
338
  }
331
- // TODO improve
332
339
  } else if String::try_convert(ob).is_err() {
333
340
  let name = unsafe { ob.class().name() }.into_owned();
334
341
  match name.as_str() {
@@ -340,9 +347,11 @@ impl TryConvert for Wrap<DataType> {
340
347
  "Polars::UInt16" => DataType::UInt16,
341
348
  "Polars::UInt32" => DataType::UInt32,
342
349
  "Polars::UInt64" => DataType::UInt64,
350
+ "Polars::Float32" => DataType::Float32,
351
+ "Polars::Float64" => DataType::Float64,
352
+ "Polars::Boolean" => DataType::Boolean,
343
353
  "Polars::String" => DataType::String,
344
354
  "Polars::Binary" => DataType::Binary,
345
- "Polars::Boolean" => DataType::Boolean,
346
355
  "Polars::Categorical" => {
347
356
  let ordering = ob
348
357
  .funcall::<_, _, Wrap<CategoricalOrdering>>("ordering", ())?
@@ -358,21 +367,17 @@ impl TryConvert for Wrap<DataType> {
358
367
  }
359
368
  "Polars::Date" => DataType::Date,
360
369
  "Polars::Time" => DataType::Time,
361
- "Polars::Float32" => DataType::Float32,
362
- "Polars::Float64" => DataType::Float64,
363
- "Polars::Null" => DataType::Null,
364
- "Polars::Unknown" => DataType::Unknown(Default::default()),
365
- "Polars::Duration" => {
366
- let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
367
- let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
368
- DataType::Duration(time_unit)
369
- }
370
370
  "Polars::Datetime" => {
371
371
  let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
372
372
  let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
373
373
  let time_zone: Option<String> = ob.funcall("time_zone", ())?;
374
374
  DataType::Datetime(time_unit, time_zone.as_deref().map(|x| x.into()))
375
375
  }
376
+ "Polars::Duration" => {
377
+ let time_unit: Value = ob.funcall("time_unit", ()).unwrap();
378
+ let time_unit = Wrap::<TimeUnit>::try_convert(time_unit)?.0;
379
+ DataType::Duration(time_unit)
380
+ }
376
381
  "Polars::Decimal" => {
377
382
  let precision = ob.funcall("precision", ())?;
378
383
  let scale = ob.funcall("scale", ())?;
@@ -383,6 +388,13 @@ impl TryConvert for Wrap<DataType> {
383
388
  let inner = Wrap::<DataType>::try_convert(inner)?;
384
389
  DataType::List(Box::new(inner.0))
385
390
  }
391
+ "Polars::Array" => {
392
+ let inner: Value = ob.funcall("inner", ()).unwrap();
393
+ let size: Value = ob.funcall("size", ()).unwrap();
394
+ let inner = Wrap::<DataType>::try_convert(inner)?;
395
+ let size = usize::try_convert(size)?;
396
+ DataType::Array(Box::new(inner.0), size)
397
+ }
386
398
  "Polars::Struct" => {
387
399
  let arr: RArray = ob.funcall("fields", ())?;
388
400
  let mut fields = Vec::with_capacity(arr.len());
@@ -391,6 +403,9 @@ impl TryConvert for Wrap<DataType> {
391
403
  }
392
404
  DataType::Struct(fields)
393
405
  }
406
+ "Polars::Null" => DataType::Null,
407
+ "Object" => DataType::Object(OBJECT_NAME, None),
408
+ "Polars::Unknown" => DataType::Unknown(Default::default()),
394
409
  dt => {
395
410
  return Err(RbTypeError::new_err(format!(
396
411
  "A {dt} object is not a correct polars DataType. \
@@ -434,6 +449,8 @@ impl TryConvert for Wrap<DataType> {
434
449
  }
435
450
  }
436
451
 
452
+ unsafe impl TryConvertOwned for Wrap<DataType> {}
453
+
437
454
  impl TryConvert for Wrap<StatisticsOptions> {
438
455
  fn try_convert(ob: Value) -> RbResult<Self> {
439
456
  let mut statistics = StatisticsOptions::empty();
@@ -452,8 +469,7 @@ impl TryConvert for Wrap<StatisticsOptions> {
452
469
  }
453
470
  }
454
471
  Ok(ForEach::Continue)
455
- })
456
- .unwrap();
472
+ })?;
457
473
 
458
474
  Ok(Wrap(statistics))
459
475
  }
@@ -478,13 +494,75 @@ impl TryConvert for Wrap<Schema> {
478
494
  dict.foreach(|key: String, val: Wrap<DataType>| {
479
495
  schema.push(Ok(Field::new((&*key).into(), val.0)));
480
496
  Ok(ForEach::Continue)
481
- })
482
- .unwrap();
497
+ })?;
483
498
 
484
499
  Ok(Wrap(schema.into_iter().collect::<RbResult<Schema>>()?))
485
500
  }
486
501
  }
487
502
 
503
+ impl TryConvert for Wrap<ScanSources> {
504
+ fn try_convert(ob: Value) -> RbResult<Self> {
505
+ let list = RArray::try_convert(ob)?;
506
+
507
+ if list.is_empty() {
508
+ return Ok(Wrap(ScanSources::default()));
509
+ }
510
+
511
+ enum MutableSources {
512
+ Paths(Vec<PathBuf>),
513
+ Files(Vec<File>),
514
+ Buffers(Vec<MemSlice>),
515
+ }
516
+
517
+ let num_items = list.len();
518
+ let mut iter = list
519
+ .into_iter()
520
+ .map(|val| get_ruby_scan_source_input(val, false));
521
+
522
+ let Some(first) = iter.next() else {
523
+ return Ok(Wrap(ScanSources::default()));
524
+ };
525
+
526
+ let mut sources = match first? {
527
+ RubyScanSourceInput::Path(path) => {
528
+ let mut sources = Vec::with_capacity(num_items);
529
+ sources.push(path);
530
+ MutableSources::Paths(sources)
531
+ }
532
+ RubyScanSourceInput::File(file) => {
533
+ let mut sources = Vec::with_capacity(num_items);
534
+ sources.push(file);
535
+ MutableSources::Files(sources)
536
+ }
537
+ RubyScanSourceInput::Buffer(buffer) => {
538
+ let mut sources = Vec::with_capacity(num_items);
539
+ sources.push(buffer);
540
+ MutableSources::Buffers(sources)
541
+ }
542
+ };
543
+
544
+ for source in iter {
545
+ match (&mut sources, source?) {
546
+ (MutableSources::Paths(v), RubyScanSourceInput::Path(p)) => v.push(p),
547
+ (MutableSources::Files(v), RubyScanSourceInput::File(f)) => v.push(f),
548
+ (MutableSources::Buffers(v), RubyScanSourceInput::Buffer(f)) => v.push(f),
549
+ _ => {
550
+ return Err(RbTypeError::new_err(
551
+ "Cannot combine in-memory bytes, paths and files for scan sources"
552
+ .to_string(),
553
+ ))
554
+ }
555
+ }
556
+ }
557
+
558
+ Ok(Wrap(match sources {
559
+ MutableSources::Paths(i) => ScanSources::Paths(i.into()),
560
+ MutableSources::Files(i) => ScanSources::Files(i.into()),
561
+ MutableSources::Buffers(i) => ScanSources::Buffers(i.into()),
562
+ }))
563
+ }
564
+ }
565
+
488
566
  #[derive(Clone)]
489
567
  pub struct ObjectValue {
490
568
  pub inner: Opaque<Value>,
@@ -493,7 +571,7 @@ pub struct ObjectValue {
493
571
  impl Debug for ObjectValue {
494
572
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
495
573
  f.debug_struct("ObjectValue")
496
- .field("inner", &self.to_object())
574
+ .field("inner", &self.to_value())
497
575
  .finish()
498
576
  }
499
577
  }
@@ -501,7 +579,7 @@ impl Debug for ObjectValue {
501
579
  impl Hash for ObjectValue {
502
580
  fn hash<H: Hasher>(&self, state: &mut H) {
503
581
  let h = self
504
- .to_object()
582
+ .to_value()
505
583
  .funcall::<_, _, isize>("hash", ())
506
584
  .expect("should be hashable");
507
585
  state.write_isize(h)
@@ -512,7 +590,7 @@ impl Eq for ObjectValue {}
512
590
 
513
591
  impl PartialEq for ObjectValue {
514
592
  fn eq(&self, other: &Self) -> bool {
515
- self.to_object().eql(other.to_object()).unwrap_or(false)
593
+ self.to_value().eql(other.to_value()).unwrap_or(false)
516
594
  }
517
595
  }
518
596
 
@@ -533,7 +611,7 @@ impl TotalHash for ObjectValue {
533
611
 
534
612
  impl Display for ObjectValue {
535
613
  fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
536
- write!(f, "{}", self.to_object())
614
+ write!(f, "{}", self.to_value())
537
615
  }
538
616
  }
539
617
 
@@ -561,16 +639,15 @@ impl From<&dyn PolarsObjectSafe> for &ObjectValue {
561
639
  }
562
640
  }
563
641
 
564
- // TODO remove
565
642
  impl ObjectValue {
566
- pub fn to_object(&self) -> Value {
567
- Ruby::get().unwrap().get_inner(self.inner)
643
+ pub fn to_value(&self) -> Value {
644
+ self.clone().into_value()
568
645
  }
569
646
  }
570
647
 
571
648
  impl IntoValue for ObjectValue {
572
- fn into_value_with(self, _: &Ruby) -> Value {
573
- self.to_object()
649
+ fn into_value_with(self, ruby: &Ruby) -> Value {
650
+ ruby.get_inner(self.inner)
574
651
  }
575
652
  }
576
653
 
@@ -587,10 +664,10 @@ impl TryConvert for Wrap<AsofStrategy> {
587
664
  let parsed = match String::try_convert(ob)?.as_str() {
588
665
  "backward" => AsofStrategy::Backward,
589
666
  "forward" => AsofStrategy::Forward,
667
+ "nearest" => AsofStrategy::Nearest,
590
668
  v => {
591
669
  return Err(RbValueError::new_err(format!(
592
- "strategy must be one of {{'backward', 'forward'}}, got {}",
593
- v
670
+ "asof `strategy` must be one of {{'backward', 'forward', 'nearest'}}, got {v}",
594
671
  )))
595
672
  }
596
673
  };
@@ -830,14 +907,14 @@ impl TryConvert for Wrap<ParallelStrategy> {
830
907
  }
831
908
  }
832
909
 
833
- impl TryConvert for Wrap<QuantileInterpolOptions> {
910
+ impl TryConvert for Wrap<QuantileMethod> {
834
911
  fn try_convert(ob: Value) -> RbResult<Self> {
835
912
  let parsed = match String::try_convert(ob)?.as_str() {
836
- "lower" => QuantileInterpolOptions::Lower,
837
- "higher" => QuantileInterpolOptions::Higher,
838
- "nearest" => QuantileInterpolOptions::Nearest,
839
- "linear" => QuantileInterpolOptions::Linear,
840
- "midpoint" => QuantileInterpolOptions::Midpoint,
913
+ "lower" => QuantileMethod::Lower,
914
+ "higher" => QuantileMethod::Higher,
915
+ "nearest" => QuantileMethod::Nearest,
916
+ "linear" => QuantileMethod::Linear,
917
+ "midpoint" => QuantileMethod::Midpoint,
841
918
  v => {
842
919
  return Err(RbValueError::new_err(format!(
843
920
  "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}",
@@ -1001,6 +1078,11 @@ impl TryConvert for Wrap<QuoteStyle> {
1001
1078
  }
1002
1079
  }
1003
1080
 
1081
+ pub(crate) fn parse_cloud_options(uri: &str, kv: Vec<(String, String)>) -> RbResult<CloudOptions> {
1082
+ let out = CloudOptions::from_untyped_config(uri, kv).map_err(RbPolarsErr::from)?;
1083
+ Ok(out)
1084
+ }
1085
+
1004
1086
  pub fn parse_fill_null_strategy(
1005
1087
  strategy: &str,
1006
1088
  limit: FillNullLimit,
@@ -1071,7 +1153,7 @@ impl TryConvert for Wrap<NonZeroUsize> {
1071
1153
  let v = usize::try_convert(ob)?;
1072
1154
  NonZeroUsize::new(v)
1073
1155
  .map(Wrap)
1074
- .ok_or(RbValueError::new_err("must be non-zero".into()))
1156
+ .ok_or(RbValueError::new_err("must be non-zero"))
1075
1157
  }
1076
1158
  }
1077
1159
 
@@ -1085,3 +1167,28 @@ where
1085
1167
  .map(|s| PlSmallStr::from_str(s.as_ref()))
1086
1168
  .collect()
1087
1169
  }
1170
+
1171
+ #[derive(Debug, Copy, Clone)]
1172
+ pub struct RbCompatLevel(pub CompatLevel);
1173
+
1174
+ impl TryConvert for RbCompatLevel {
1175
+ fn try_convert(ob: Value) -> RbResult<Self> {
1176
+ Ok(RbCompatLevel(if let Ok(level) = u16::try_convert(ob) {
1177
+ if let Ok(compat_level) = CompatLevel::with_level(level) {
1178
+ compat_level
1179
+ } else {
1180
+ return Err(RbValueError::new_err("invalid compat level".to_string()));
1181
+ }
1182
+ } else if let Ok(future) = bool::try_convert(ob) {
1183
+ if future {
1184
+ CompatLevel::newest()
1185
+ } else {
1186
+ CompatLevel::oldest()
1187
+ }
1188
+ } else {
1189
+ return Err(RbTypeError::new_err(
1190
+ "'compat_level' argument accepts int or bool".to_string(),
1191
+ ));
1192
+ }))
1193
+ }
1194
+ }
@@ -54,9 +54,6 @@ fn finish_from_rows(
54
54
  schema_overrides: Option<Schema>,
55
55
  infer_schema_length: Option<usize>,
56
56
  ) -> RbResult<RbDataFrame> {
57
- // Object builder must be registered
58
- crate::on_startup::register_object_builder();
59
-
60
57
  let mut schema = if let Some(mut schema) = schema {
61
58
  resolve_schema_overrides(&mut schema, schema_overrides);
62
59
  update_schema_from_rows(&mut schema, &rows, infer_schema_length)?;
@@ -2,6 +2,8 @@ use magnus::{prelude::*, IntoValue, RArray, Value};
2
2
 
3
3
  use super::*;
4
4
  use crate::conversion::{ObjectValue, Wrap};
5
+ use crate::interop::arrow::to_ruby::dataframe_to_stream;
6
+ use crate::RbResult;
5
7
 
6
8
  impl RbDataFrame {
7
9
  pub fn row_tuple(&self, idx: i64) -> Value {
@@ -18,7 +20,7 @@ impl RbDataFrame {
18
20
  .map(|s| match s.dtype() {
19
21
  DataType::Object(_, _) => {
20
22
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
21
- obj.unwrap().to_object()
23
+ obj.unwrap().to_value()
22
24
  }
23
25
  _ => Wrap(s.get(idx).unwrap()).into_value(),
24
26
  }),
@@ -37,7 +39,7 @@ impl RbDataFrame {
37
39
  .map(|s| match s.dtype() {
38
40
  DataType::Object(_, _) => {
39
41
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
40
- obj.unwrap().to_object()
42
+ obj.unwrap().to_value()
41
43
  }
42
44
  _ => Wrap(s.get(idx).unwrap()).into_value(),
43
45
  }),
@@ -45,4 +47,9 @@ impl RbDataFrame {
45
47
  }))
46
48
  .as_value()
47
49
  }
50
+
51
+ pub fn __arrow_c_stream__(&self) -> RbResult<Value> {
52
+ self.df.borrow_mut().align_chunks();
53
+ dataframe_to_stream(&self.df.borrow())
54
+ }
48
55
  }
@@ -1,6 +1,5 @@
1
1
  use either::Either;
2
2
  use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value};
3
- use polars::frame::NullStrategy;
4
3
  use polars::prelude::pivot::{pivot, pivot_stable};
5
4
  use polars::prelude::*;
6
5
 
@@ -10,14 +9,14 @@ use crate::map::dataframe::{
10
9
  apply_lambda_with_utf8_out_type,
11
10
  };
12
11
  use crate::prelude::strings_to_pl_smallstr;
13
- use crate::series::{to_rbseries_collection, to_series_collection};
12
+ use crate::series::{to_rbseries, to_series};
14
13
  use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
15
14
 
16
15
  impl RbDataFrame {
17
16
  pub fn init(columns: RArray) -> RbResult<Self> {
18
17
  let mut cols = Vec::new();
19
18
  for i in columns.into_iter() {
20
- cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone());
19
+ cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone().into());
21
20
  }
22
21
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
23
22
  Ok(RbDataFrame::new(df))
@@ -128,7 +127,7 @@ impl RbDataFrame {
128
127
 
129
128
  pub fn get_columns(&self) -> RArray {
130
129
  let cols = self.df.borrow().get_columns().to_vec();
131
- to_rbseries_collection(cols)
130
+ to_rbseries(cols)
132
131
  }
133
132
 
134
133
  pub fn columns(&self) -> Vec<String> {
@@ -158,7 +157,7 @@ impl RbDataFrame {
158
157
  }
159
158
 
160
159
  pub fn n_chunks(&self) -> usize {
161
- self.df.borrow().n_chunks()
160
+ self.df.borrow().first_col_n_chunks()
162
161
  }
163
162
 
164
163
  pub fn shape(&self) -> (usize, usize) {
@@ -174,7 +173,8 @@ impl RbDataFrame {
174
173
  }
175
174
 
176
175
  pub fn hstack(&self, columns: RArray) -> RbResult<Self> {
177
- let columns = to_series_collection(columns)?;
176
+ let columns = to_series(columns)?;
177
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
178
178
  let df = self
179
179
  .df
180
180
  .borrow()
@@ -184,7 +184,8 @@ impl RbDataFrame {
184
184
  }
185
185
 
186
186
  pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> {
187
- let columns = to_series_collection(columns)?;
187
+ let columns = to_series(columns)?;
188
+ let columns = columns.into_iter().map(Into::into).collect::<Vec<_>>();
188
189
  self.df
189
190
  .borrow_mut()
190
191
  .hstack_mut(&columns)
@@ -223,6 +224,7 @@ impl RbDataFrame {
223
224
  .borrow_mut()
224
225
  .drop_in_place(&name)
225
226
  .map_err(RbPolarsErr::from)?;
227
+ let s = s.take_materialized_series();
226
228
  Ok(RbSeries::new(s))
227
229
  }
228
230
 
@@ -230,7 +232,7 @@ impl RbDataFrame {
230
232
  self.df
231
233
  .borrow()
232
234
  .select_at_idx(idx)
233
- .map(|s| RbSeries::new(s.clone()))
235
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
234
236
  }
235
237
 
236
238
  pub fn get_column_index(&self, name: String) -> Option<usize> {
@@ -238,11 +240,13 @@ impl RbDataFrame {
238
240
  }
239
241
 
240
242
  pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
241
- self.df
243
+ let series = self
244
+ .df
242
245
  .borrow()
243
246
  .column(&name)
244
- .map(|s| RbSeries::new(s.clone()))
245
- .map_err(RbPolarsErr::from)
247
+ .map(|s| RbSeries::new(s.as_materialized_series().clone()))
248
+ .map_err(RbPolarsErr::from)?;
249
+ Ok(series)
246
250
  }
247
251
 
248
252
  pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
@@ -405,52 +409,6 @@ impl RbDataFrame {
405
409
  self.df.borrow().clone().lazy().into()
406
410
  }
407
411
 
408
- pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
409
- let s = self
410
- .df
411
- .borrow()
412
- .max_horizontal()
413
- .map_err(RbPolarsErr::from)?;
414
- Ok(s.map(|s| s.into()))
415
- }
416
-
417
- pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
418
- let s = self
419
- .df
420
- .borrow()
421
- .min_horizontal()
422
- .map_err(RbPolarsErr::from)?;
423
- Ok(s.map(|s| s.into()))
424
- }
425
-
426
- pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
427
- let null_strategy = if ignore_nulls {
428
- NullStrategy::Ignore
429
- } else {
430
- NullStrategy::Propagate
431
- };
432
- let s = self
433
- .df
434
- .borrow()
435
- .sum_horizontal(null_strategy)
436
- .map_err(RbPolarsErr::from)?;
437
- Ok(s.map(|s| s.into()))
438
- }
439
-
440
- pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
441
- let null_strategy = if ignore_nulls {
442
- NullStrategy::Ignore
443
- } else {
444
- NullStrategy::Propagate
445
- };
446
- let s = self
447
- .df
448
- .borrow()
449
- .mean_horizontal(null_strategy)
450
- .map_err(RbPolarsErr::from)?;
451
- Ok(s.map(|s| s.into()))
452
- }
453
-
454
412
  pub fn to_dummies(
455
413
  &self,
456
414
  columns: Option<Vec<String>>,