polars-df 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,4 +1,7 @@
1
- use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
1
+ use either::Either;
2
+ use magnus::{
3
+ prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
4
+ };
2
5
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
6
  use polars::frame::NullStrategy;
4
7
  use polars::io::avro::AvroCompression;
@@ -11,12 +14,12 @@ use std::cell::RefCell;
11
14
  use std::io::{BufWriter, Cursor};
12
15
  use std::ops::Deref;
13
16
 
14
- use crate::apply::dataframe::{
17
+ use crate::conversion::*;
18
+ use crate::file::{get_file_like, get_mmap_bytes_reader};
19
+ use crate::map::dataframe::{
15
20
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
16
21
  apply_lambda_with_utf8_out_type,
17
22
  };
18
- use crate::conversion::*;
19
- use crate::file::{get_file_like, get_mmap_bytes_reader};
20
23
  use crate::rb_modules;
21
24
  use crate::series::{to_rbseries_collection, to_series_collection};
22
25
  use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
@@ -45,7 +48,7 @@ impl RbDataFrame {
45
48
  schema_overwrite: Option<Schema>,
46
49
  ) -> RbResult<Self> {
47
50
  // object builder must be registered.
48
- crate::object::register_object_builder();
51
+ crate::on_startup::register_object_builder();
49
52
 
50
53
  let schema =
51
54
  rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
@@ -53,7 +56,7 @@ impl RbDataFrame {
53
56
  // replace inferred nulls with boolean
54
57
  let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
55
58
  DataType::Null => {
56
- fld.coerce(DataType::Boolean);
59
+ // fld.coerce(DataType::Boolean);
57
60
  fld
58
61
  }
59
62
  DataType::Decimal(_, _) => {
@@ -86,7 +89,7 @@ impl RbDataFrame {
86
89
  pub fn init(columns: RArray) -> RbResult<Self> {
87
90
  let mut cols = Vec::new();
88
91
  for i in columns.each() {
89
- cols.push(i?.try_convert::<&RbSeries>()?.series.borrow().clone());
92
+ cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
90
93
  }
91
94
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
92
95
  Ok(RbDataFrame::new(df))
@@ -99,36 +102,35 @@ impl RbDataFrame {
99
102
  pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
100
103
  // start arguments
101
104
  // this pattern is needed for more than 16
102
- let rb_f: Value = arguments[0].try_convert()?;
103
- let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
104
- let chunk_size: usize = arguments[2].try_convert()?;
105
- let has_header: bool = arguments[3].try_convert()?;
106
- let ignore_errors: bool = arguments[4].try_convert()?;
107
- let n_rows: Option<usize> = arguments[5].try_convert()?;
108
- let skip_rows: usize = arguments[6].try_convert()?;
109
- let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
110
- let sep: String = arguments[8].try_convert()?;
111
- let rechunk: bool = arguments[9].try_convert()?;
112
- let columns: Option<Vec<String>> = arguments[10].try_convert()?;
113
- let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
114
- let n_threads: Option<usize> = arguments[12].try_convert()?;
115
- let path: Option<String> = arguments[13].try_convert()?;
116
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
105
+ let rb_f = arguments[0];
106
+ let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
107
+ let chunk_size = usize::try_convert(arguments[2])?;
108
+ let has_header = bool::try_convert(arguments[3])?;
109
+ let ignore_errors = bool::try_convert(arguments[4])?;
110
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
111
+ let skip_rows = usize::try_convert(arguments[6])?;
112
+ let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
113
+ let separator = String::try_convert(arguments[8])?;
114
+ let rechunk = bool::try_convert(arguments[9])?;
115
+ let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
116
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
117
+ let n_threads = Option::<usize>::try_convert(arguments[12])?;
118
+ let path = Option::<String>::try_convert(arguments[13])?;
119
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
117
120
  // TODO fix
118
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
119
- let low_memory: bool = arguments[16].try_convert()?;
120
- let comment_char: Option<String> = arguments[17].try_convert()?;
121
- let quote_char: Option<String> = arguments[18].try_convert()?;
122
- let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
123
- let try_parse_dates: bool = arguments[20].try_convert()?;
124
- let skip_rows_after_header: usize = arguments[21].try_convert()?;
125
- let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
126
- let sample_size: usize = arguments[23].try_convert()?;
127
- let eol_char: String = arguments[24].try_convert()?;
121
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
122
+ let low_memory = bool::try_convert(arguments[16])?;
123
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
124
+ let quote_char = Option::<String>::try_convert(arguments[18])?;
125
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
126
+ let try_parse_dates = bool::try_convert(arguments[20])?;
127
+ let skip_rows_after_header = usize::try_convert(arguments[21])?;
128
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
129
+ let sample_size = usize::try_convert(arguments[23])?;
130
+ let eol_char = String::try_convert(arguments[24])?;
128
131
  // end arguments
129
132
 
130
133
  let null_values = null_values.map(|w| w.0);
131
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
132
134
  let eol_char = eol_char.as_bytes()[0];
133
135
 
134
136
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -165,7 +167,7 @@ impl RbDataFrame {
165
167
  .infer_schema(infer_schema_length)
166
168
  .has_header(has_header)
167
169
  .with_n_rows(n_rows)
168
- .with_delimiter(sep.as_bytes()[0])
170
+ .with_separator(separator.as_bytes()[0])
169
171
  .with_skip_rows(skip_rows)
170
172
  .with_ignore_errors(ignore_errors)
171
173
  .with_projection(projection)
@@ -178,7 +180,7 @@ impl RbDataFrame {
178
180
  .with_dtypes(overwrite_dtype.map(Arc::new))
179
181
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
180
182
  .low_memory(low_memory)
181
- .with_comment_char(comment_char)
183
+ .with_comment_prefix(comment_prefix.as_deref())
182
184
  .with_null_values(null_values)
183
185
  .with_try_parse_dates(try_parse_dates)
184
186
  .with_quote_char(quote_char)
@@ -265,7 +267,7 @@ impl RbDataFrame {
265
267
  ) -> RbResult<()> {
266
268
  use polars::io::avro::AvroWriter;
267
269
 
268
- if let Ok(s) = rb_f.try_convert::<String>() {
270
+ if let Ok(s) = String::try_convert(rb_f) {
269
271
  let f = std::fs::File::create(s).unwrap();
270
272
  AvroWriter::new(f)
271
273
  .with_compression(compression.0)
@@ -294,12 +296,18 @@ impl RbDataFrame {
294
296
  Ok(df) => Ok(df.into()),
295
297
  // try arrow json reader instead
296
298
  // this is row oriented
297
- Err(_) => {
298
- let out = JsonReader::new(mmap_bytes_r)
299
- .with_json_format(JsonFormat::Json)
300
- .finish()
301
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
302
- Ok(out.into())
299
+ Err(e) => {
300
+ let msg = format!("{e}");
301
+ if msg.contains("successful parse invalid data") {
302
+ let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
303
+ Err(e)
304
+ } else {
305
+ let out = JsonReader::new(mmap_bytes_r)
306
+ .with_json_format(JsonFormat::Json)
307
+ .finish()
308
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
309
+ Ok(out.into())
310
+ }
303
311
  }
304
312
  }
305
313
  }
@@ -341,6 +349,27 @@ impl RbDataFrame {
341
349
  Ok(())
342
350
  }
343
351
 
352
+ pub fn read_rows(
353
+ rb_rows: RArray,
354
+ infer_schema_length: Option<usize>,
355
+ schema_overwrite: Option<Wrap<Schema>>,
356
+ ) -> RbResult<Self> {
357
+ let mut rows = Vec::with_capacity(rb_rows.len());
358
+ for v in rb_rows.each() {
359
+ let rb_row = RArray::try_convert(v?)?;
360
+ let mut row = Vec::with_capacity(rb_row.len());
361
+ for val in rb_row.each() {
362
+ row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
363
+ }
364
+ rows.push(Row(row));
365
+ }
366
+ Self::finish_from_rows(
367
+ rows,
368
+ infer_schema_length,
369
+ schema_overwrite.map(|wrap| wrap.0),
370
+ )
371
+ }
372
+
344
373
  pub fn read_hashes(
345
374
  dicts: Value,
346
375
  infer_schema_length: Option<usize>,
@@ -395,9 +424,9 @@ impl RbDataFrame {
395
424
  pub fn write_csv(
396
425
  &self,
397
426
  rb_f: Value,
398
- has_header: bool,
399
- sep: u8,
400
- quote: u8,
427
+ include_header: bool,
428
+ separator: u8,
429
+ quote_char: u8,
401
430
  batch_size: usize,
402
431
  datetime_format: Option<String>,
403
432
  date_format: Option<String>,
@@ -407,13 +436,13 @@ impl RbDataFrame {
407
436
  ) -> RbResult<()> {
408
437
  let null = null_value.unwrap_or_default();
409
438
 
410
- if let Ok(s) = rb_f.try_convert::<String>() {
439
+ if let Ok(s) = String::try_convert(rb_f) {
411
440
  let f = std::fs::File::create(s).unwrap();
412
441
  // no need for a buffered writer, because the csv writer does internal buffering
413
442
  CsvWriter::new(f)
414
- .has_header(has_header)
415
- .with_delimiter(sep)
416
- .with_quoting_char(quote)
443
+ .include_header(include_header)
444
+ .with_separator(separator)
445
+ .with_quote_char(quote_char)
417
446
  .with_batch_size(batch_size)
418
447
  .with_datetime_format(datetime_format)
419
448
  .with_date_format(date_format)
@@ -425,9 +454,9 @@ impl RbDataFrame {
425
454
  } else {
426
455
  let mut buf = Cursor::new(Vec::new());
427
456
  CsvWriter::new(&mut buf)
428
- .has_header(has_header)
429
- .with_delimiter(sep)
430
- .with_quoting_char(quote)
457
+ .include_header(include_header)
458
+ .with_separator(separator)
459
+ .with_quote_char(quote_char)
431
460
  .with_batch_size(batch_size)
432
461
  .with_datetime_format(datetime_format)
433
462
  .with_date_format(date_format)
@@ -449,7 +478,7 @@ impl RbDataFrame {
449
478
  rb_f: Value,
450
479
  compression: Wrap<Option<IpcCompression>>,
451
480
  ) -> RbResult<()> {
452
- if let Ok(s) = rb_f.try_convert::<String>() {
481
+ if let Ok(s) = String::try_convert(rb_f) {
453
482
  let f = std::fs::File::create(s).unwrap();
454
483
  IpcWriter::new(f)
455
484
  .with_compression(compression.0)
@@ -480,14 +509,14 @@ impl RbDataFrame {
480
509
  .get_columns()
481
510
  .iter()
482
511
  .map(|s| match s.dtype() {
483
- DataType::Object(_) => {
512
+ DataType::Object(_, _) => {
484
513
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
485
514
  obj.unwrap().to_object()
486
515
  }
487
516
  _ => Wrap(s.get(idx).unwrap()).into_value(),
488
517
  }),
489
518
  )
490
- .into()
519
+ .as_value()
491
520
  }
492
521
 
493
522
  pub fn row_tuples(&self) -> Value {
@@ -499,7 +528,7 @@ impl RbDataFrame {
499
528
  .get_columns()
500
529
  .iter()
501
530
  .map(|s| match s.dtype() {
502
- DataType::Object(_) => {
531
+ DataType::Object(_, _) => {
503
532
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
504
533
  obj.unwrap().to_object()
505
534
  }
@@ -507,7 +536,7 @@ impl RbDataFrame {
507
536
  }),
508
537
  )
509
538
  }))
510
- .into()
539
+ .as_value()
511
540
  }
512
541
 
513
542
  pub fn to_numo(&self) -> Option<Value> {
@@ -537,7 +566,7 @@ impl RbDataFrame {
537
566
  ) -> RbResult<()> {
538
567
  let compression = parse_parquet_compression(&compression, compression_level)?;
539
568
 
540
- if let Ok(s) = rb_f.try_convert::<String>() {
569
+ if let Ok(s) = String::try_convert(rb_f) {
541
570
  let f = std::fs::File::create(s).unwrap();
542
571
  ParquetWriter::new(f)
543
572
  .with_compression(compression)
@@ -604,7 +633,7 @@ impl RbDataFrame {
604
633
 
605
634
  pub fn sample_n(
606
635
  &self,
607
- n: usize,
636
+ n: &RbSeries,
608
637
  with_replacement: bool,
609
638
  shuffle: bool,
610
639
  seed: Option<u64>,
@@ -612,14 +641,14 @@ impl RbDataFrame {
612
641
  let df = self
613
642
  .df
614
643
  .borrow()
615
- .sample_n(n, with_replacement, shuffle, seed)
644
+ .sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
616
645
  .map_err(RbPolarsErr::from)?;
617
646
  Ok(df.into())
618
647
  }
619
648
 
620
649
  pub fn sample_frac(
621
650
  &self,
622
- frac: f64,
651
+ frac: &RbSeries,
623
652
  with_replacement: bool,
624
653
  shuffle: bool,
625
654
  seed: Option<u64>,
@@ -627,7 +656,7 @@ impl RbDataFrame {
627
656
  let df = self
628
657
  .df
629
658
  .borrow()
630
- .sample_frac(frac, with_replacement, shuffle, seed)
659
+ .sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
631
660
  .map_err(RbPolarsErr::from)?;
632
661
  Ok(df.into())
633
662
  }
@@ -761,8 +790,8 @@ impl RbDataFrame {
761
790
  .map(|s| RbSeries::new(s.clone()))
762
791
  }
763
792
 
764
- pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
765
- self.df.borrow().find_idx_by_name(&name)
793
+ pub fn get_column_index(&self, name: String) -> Option<usize> {
794
+ self.df.borrow().get_column_index(&name)
766
795
  }
767
796
 
768
797
  // TODO remove clone
@@ -804,18 +833,18 @@ impl RbDataFrame {
804
833
  Ok(())
805
834
  }
806
835
 
807
- pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
836
+ pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
808
837
  self.df
809
838
  .borrow_mut()
810
- .replace_at_idx(index, new_col.series.borrow().clone())
839
+ .replace_column(index, new_col.series.borrow().clone())
811
840
  .map_err(RbPolarsErr::from)?;
812
841
  Ok(())
813
842
  }
814
843
 
815
- pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
844
+ pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
816
845
  self.df
817
846
  .borrow_mut()
818
- .insert_at_idx(index, new_col.series.borrow().clone())
847
+ .insert_column(index, new_col.series.borrow().clone())
819
848
  .map_err(RbPolarsErr::from)?;
820
849
  Ok(())
821
850
  }
@@ -850,11 +879,11 @@ impl RbDataFrame {
850
879
  Ok(mask.into_series().into())
851
880
  }
852
881
 
853
- pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
882
+ pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
854
883
  if null_equal {
855
- self.df.borrow().frame_equal_missing(&other.df.borrow())
884
+ self.df.borrow().equals_missing(&other.df.borrow())
856
885
  } else {
857
- self.df.borrow().frame_equal(&other.df.borrow())
886
+ self.df.borrow().equals(&other.df.borrow())
858
887
  }
859
888
  }
860
889
 
@@ -942,73 +971,50 @@ impl RbDataFrame {
942
971
  self.df.borrow().clone().lazy().into()
943
972
  }
944
973
 
945
- pub fn max(&self) -> Self {
946
- self.df.borrow().max().into()
947
- }
948
-
949
- pub fn min(&self) -> Self {
950
- self.df.borrow().min().into()
951
- }
952
-
953
- pub fn sum(&self) -> Self {
954
- self.df.borrow().sum().into()
955
- }
956
-
957
- pub fn mean(&self) -> Self {
958
- self.df.borrow().mean().into()
959
- }
960
-
961
- pub fn std(&self, ddof: u8) -> Self {
962
- self.df.borrow().std(ddof).into()
963
- }
964
-
965
- pub fn var(&self, ddof: u8) -> Self {
966
- self.df.borrow().var(ddof).into()
967
- }
968
-
969
- pub fn median(&self) -> Self {
970
- self.df.borrow().median().into()
971
- }
972
-
973
- pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
974
+ pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
974
975
  let s = self
975
976
  .df
976
977
  .borrow()
977
- .hmean(null_strategy.0)
978
+ .max_horizontal()
978
979
  .map_err(RbPolarsErr::from)?;
979
980
  Ok(s.map(|s| s.into()))
980
981
  }
981
982
 
982
- pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
983
- let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
984
- Ok(s.map(|s| s.into()))
985
- }
986
-
987
- pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
988
- let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
983
+ pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
984
+ let s = self
985
+ .df
986
+ .borrow()
987
+ .min_horizontal()
988
+ .map_err(RbPolarsErr::from)?;
989
989
  Ok(s.map(|s| s.into()))
990
990
  }
991
991
 
992
- pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
992
+ pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
993
+ let null_strategy = if ignore_nulls {
994
+ NullStrategy::Ignore
995
+ } else {
996
+ NullStrategy::Propagate
997
+ };
993
998
  let s = self
994
999
  .df
995
1000
  .borrow()
996
- .hsum(null_strategy.0)
1001
+ .sum_horizontal(null_strategy)
997
1002
  .map_err(RbPolarsErr::from)?;
998
1003
  Ok(s.map(|s| s.into()))
999
1004
  }
1000
1005
 
1001
- pub fn quantile(
1002
- &self,
1003
- quantile: f64,
1004
- interpolation: Wrap<QuantileInterpolOptions>,
1005
- ) -> RbResult<Self> {
1006
- let df = self
1006
+ pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
1007
+ let null_strategy = if ignore_nulls {
1008
+ NullStrategy::Ignore
1009
+ } else {
1010
+ NullStrategy::Propagate
1011
+ };
1012
+ let s = self
1007
1013
  .df
1008
1014
  .borrow()
1009
- .quantile(quantile, interpolation.0)
1015
+ .mean_horizontal(null_strategy)
1010
1016
  .map_err(RbPolarsErr::from)?;
1011
- Ok(df.into())
1017
+ Ok(s.map(|s| s.into()))
1012
1018
  }
1013
1019
 
1014
1020
  pub fn to_dummies(
@@ -1082,13 +1088,13 @@ impl RbDataFrame {
1082
1088
  .into_datetime(tu, tz)
1083
1089
  .into_series()
1084
1090
  }
1085
- Some(DataType::Utf8) => {
1091
+ Some(DataType::String) => {
1086
1092
  apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1087
1093
  }
1088
1094
  _ => return apply_lambda_unknown(df, lambda, inference_size),
1089
1095
  };
1090
1096
 
1091
- Ok((RbSeries::from(out).into(), false))
1097
+ Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
1092
1098
  }
1093
1099
 
1094
1100
  pub fn shrink_to_fit(&self) {
@@ -1105,17 +1111,20 @@ impl RbDataFrame {
1105
1111
  Ok(hash.into_series().into())
1106
1112
  }
1107
1113
 
1108
- pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
1109
- let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
1110
- if include_header {
1111
- let s = Utf8Chunked::from_iter_values(
1112
- &names,
1113
- self.df.borrow().get_columns().iter().map(|s| s.name()),
1114
- )
1115
- .into_series();
1116
- df.insert_at_idx(0, s).unwrap();
1117
- }
1118
- Ok(df.into())
1114
+ pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
1115
+ let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
1116
+ Some(Either::Right(name))
1117
+ } else if let Ok(name) = String::try_convert(column_names) {
1118
+ Some(Either::Left(name))
1119
+ } else {
1120
+ None
1121
+ };
1122
+ Ok(self
1123
+ .df
1124
+ .borrow()
1125
+ .transpose(keep_names_as.as_deref(), new_col_names)
1126
+ .map_err(RbPolarsErr::from)?
1127
+ .into())
1119
1128
  }
1120
1129
 
1121
1130
  pub fn upsample(
@@ -1,6 +1,5 @@
1
1
  use magnus::exception;
2
2
  use magnus::Error;
3
- use polars::error::ArrowError;
4
3
  use polars::prelude::PolarsError;
5
4
 
6
5
  pub struct RbPolarsErr {}
@@ -11,10 +10,6 @@ impl RbPolarsErr {
11
10
  Error::new(exception::runtime_error(), e.to_string())
12
11
  }
13
12
 
14
- pub fn arrow(e: ArrowError) -> Error {
15
- Error::new(exception::runtime_error(), e.to_string())
16
- }
17
-
18
13
  pub fn io(e: std::io::Error) -> Error {
19
14
  Error::new(exception::runtime_error(), e.to_string())
20
15
  }
@@ -3,16 +3,28 @@ use polars::prelude::*;
3
3
  use crate::RbExpr;
4
4
 
5
5
  impl RbExpr {
6
- pub fn bin_contains(&self, lit: Vec<u8>) -> Self {
7
- self.inner.clone().binary().contains_literal(lit).into()
6
+ pub fn bin_contains(&self, lit: &RbExpr) -> Self {
7
+ self.inner
8
+ .clone()
9
+ .binary()
10
+ .contains_literal(lit.inner.clone())
11
+ .into()
8
12
  }
9
13
 
10
- pub fn bin_ends_with(&self, sub: Vec<u8>) -> Self {
11
- self.inner.clone().binary().ends_with(sub).into()
14
+ pub fn bin_ends_with(&self, sub: &RbExpr) -> Self {
15
+ self.inner
16
+ .clone()
17
+ .binary()
18
+ .ends_with(sub.inner.clone())
19
+ .into()
12
20
  }
13
21
 
14
- pub fn bin_starts_with(&self, sub: Vec<u8>) -> Self {
15
- self.inner.clone().binary().starts_with(sub).into()
22
+ pub fn bin_starts_with(&self, sub: &RbExpr) -> Self {
23
+ self.inner
24
+ .clone()
25
+ .binary()
26
+ .starts_with(sub.inner.clone())
27
+ .into()
16
28
  }
17
29
 
18
30
  pub fn bin_hex_decode(&self, strict: bool) -> Self {
@@ -5,6 +5,13 @@ use crate::RbExpr;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
8
- self.inner.clone().cat().set_ordering(ordering.0).into()
8
+ self.inner
9
+ .clone()
10
+ .cast(DataType::Categorical(None, ordering.0))
11
+ .into()
12
+ }
13
+
14
+ pub fn cat_get_categories(&self) -> Self {
15
+ self.inner.clone().cat().get_categories().into()
9
16
  }
10
17
  }
@@ -8,9 +8,8 @@ impl RbExpr {
8
8
  self.inner.clone().dt().to_string(&format).into()
9
9
  }
10
10
 
11
- pub fn dt_offset_by(&self, by: String) -> Self {
12
- let by = Duration::parse(&by);
13
- self.inner.clone().dt().offset_by(by).into()
11
+ pub fn dt_offset_by(&self, by: &RbExpr) -> Self {
12
+ self.inner.clone().dt().offset_by(by.inner.clone()).into()
14
13
  }
15
14
 
16
15
  pub fn dt_epoch_seconds(&self) -> Self {
@@ -38,21 +37,20 @@ impl RbExpr {
38
37
  self.inner.clone().dt().cast_time_unit(tu.0).into()
39
38
  }
40
39
 
41
- pub fn dt_replace_time_zone(&self, tz: Option<String>, use_earliest: Option<bool>) -> Self {
40
+ pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
42
41
  self.inner
43
42
  .clone()
44
43
  .dt()
45
- .replace_time_zone(tz, use_earliest)
44
+ .replace_time_zone(time_zone, ambiguous.inner.clone())
46
45
  .into()
47
46
  }
48
47
 
49
- #[allow(deprecated)]
50
- pub fn dt_tz_localize(&self, tz: String) -> Self {
51
- self.inner.clone().dt().tz_localize(tz).into()
52
- }
53
-
54
- pub fn dt_truncate(&self, every: String, offset: String) -> Self {
55
- self.inner.clone().dt().truncate(&every, &offset).into()
48
+ pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
49
+ self.inner
50
+ .clone()
51
+ .dt()
52
+ .truncate(every.inner.clone(), offset)
53
+ .into()
56
54
  }
57
55
 
58
56
  pub fn dt_month_start(&self) -> Self {