polars-df 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/Cargo.lock +597 -599
  4. data/Cargo.toml +1 -0
  5. data/README.md +8 -7
  6. data/ext/polars/Cargo.toml +20 -10
  7. data/ext/polars/src/batched_csv.rs +27 -28
  8. data/ext/polars/src/conversion.rs +135 -106
  9. data/ext/polars/src/dataframe.rs +140 -131
  10. data/ext/polars/src/error.rs +0 -5
  11. data/ext/polars/src/expr/binary.rs +18 -6
  12. data/ext/polars/src/expr/categorical.rs +8 -1
  13. data/ext/polars/src/expr/datetime.rs +10 -12
  14. data/ext/polars/src/expr/general.rs +129 -286
  15. data/ext/polars/src/expr/list.rs +17 -9
  16. data/ext/polars/src/{expr.rs → expr/mod.rs} +4 -2
  17. data/ext/polars/src/expr/name.rs +44 -0
  18. data/ext/polars/src/expr/rolling.rs +201 -0
  19. data/ext/polars/src/expr/string.rs +94 -67
  20. data/ext/polars/src/file.rs +3 -3
  21. data/ext/polars/src/functions/aggregation.rs +35 -0
  22. data/ext/polars/src/functions/eager.rs +7 -31
  23. data/ext/polars/src/functions/io.rs +10 -10
  24. data/ext/polars/src/functions/lazy.rs +66 -41
  25. data/ext/polars/src/functions/meta.rs +30 -0
  26. data/ext/polars/src/functions/misc.rs +8 -0
  27. data/ext/polars/src/functions/mod.rs +5 -0
  28. data/ext/polars/src/functions/random.rs +6 -0
  29. data/ext/polars/src/functions/range.rs +41 -0
  30. data/ext/polars/src/functions/string_cache.rs +11 -0
  31. data/ext/polars/src/functions/whenthen.rs +7 -7
  32. data/ext/polars/src/lazyframe.rs +74 -60
  33. data/ext/polars/src/lib.rs +175 -91
  34. data/ext/polars/src/{apply → map}/dataframe.rs +29 -34
  35. data/ext/polars/src/{apply → map}/mod.rs +5 -5
  36. data/ext/polars/src/{apply → map}/series.rs +18 -22
  37. data/ext/polars/src/object.rs +0 -30
  38. data/ext/polars/src/on_startup.rs +32 -0
  39. data/ext/polars/src/rb_modules.rs +22 -7
  40. data/ext/polars/src/series/aggregation.rs +3 -0
  41. data/ext/polars/src/series/construction.rs +5 -5
  42. data/ext/polars/src/series/export.rs +4 -4
  43. data/ext/polars/src/{series.rs → series/mod.rs} +28 -45
  44. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +38 -22
  45. data/ext/polars/src/sql.rs +46 -0
  46. data/ext/polars/src/utils.rs +1 -1
  47. data/lib/polars/config.rb +530 -0
  48. data/lib/polars/data_frame.rb +182 -145
  49. data/lib/polars/data_types.rb +4 -1
  50. data/lib/polars/date_time_expr.rb +23 -28
  51. data/lib/polars/date_time_name_space.rb +17 -37
  52. data/lib/polars/dynamic_group_by.rb +2 -2
  53. data/lib/polars/expr.rb +398 -110
  54. data/lib/polars/functions.rb +29 -37
  55. data/lib/polars/group_by.rb +38 -55
  56. data/lib/polars/io.rb +40 -5
  57. data/lib/polars/lazy_frame.rb +116 -89
  58. data/lib/polars/lazy_functions.rb +40 -68
  59. data/lib/polars/lazy_group_by.rb +7 -8
  60. data/lib/polars/list_expr.rb +12 -8
  61. data/lib/polars/list_name_space.rb +2 -2
  62. data/lib/polars/name_expr.rb +198 -0
  63. data/lib/polars/rolling_group_by.rb +2 -2
  64. data/lib/polars/series.rb +315 -43
  65. data/lib/polars/sql_context.rb +194 -0
  66. data/lib/polars/string_expr.rb +114 -60
  67. data/lib/polars/string_name_space.rb +19 -4
  68. data/lib/polars/struct_expr.rb +1 -1
  69. data/lib/polars/struct_name_space.rb +1 -1
  70. data/lib/polars/utils.rb +25 -13
  71. data/lib/polars/version.rb +1 -1
  72. data/lib/polars.rb +3 -0
  73. metadata +23 -11
  74. /data/ext/polars/src/{apply → map}/lazy.rs +0 -0
@@ -1,4 +1,7 @@
1
- use magnus::{r_hash::ForEach, IntoValue, RArray, RHash, RString, Value};
1
+ use either::Either;
2
+ use magnus::{
3
+ prelude::*, r_hash::ForEach, typed_data::Obj, IntoValue, RArray, RHash, RString, Value,
4
+ };
2
5
  use polars::frame::row::{rows_to_schema_supertypes, Row};
3
6
  use polars::frame::NullStrategy;
4
7
  use polars::io::avro::AvroCompression;
@@ -11,12 +14,12 @@ use std::cell::RefCell;
11
14
  use std::io::{BufWriter, Cursor};
12
15
  use std::ops::Deref;
13
16
 
14
- use crate::apply::dataframe::{
17
+ use crate::conversion::*;
18
+ use crate::file::{get_file_like, get_mmap_bytes_reader};
19
+ use crate::map::dataframe::{
15
20
  apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type,
16
21
  apply_lambda_with_utf8_out_type,
17
22
  };
18
- use crate::conversion::*;
19
- use crate::file::{get_file_like, get_mmap_bytes_reader};
20
23
  use crate::rb_modules;
21
24
  use crate::series::{to_rbseries_collection, to_series_collection};
22
25
  use crate::{RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
@@ -45,7 +48,7 @@ impl RbDataFrame {
45
48
  schema_overwrite: Option<Schema>,
46
49
  ) -> RbResult<Self> {
47
50
  // object builder must be registered.
48
- crate::object::register_object_builder();
51
+ crate::on_startup::register_object_builder();
49
52
 
50
53
  let schema =
51
54
  rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
@@ -53,7 +56,7 @@ impl RbDataFrame {
53
56
  // replace inferred nulls with boolean
54
57
  let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
55
58
  DataType::Null => {
56
- fld.coerce(DataType::Boolean);
59
+ // fld.coerce(DataType::Boolean);
57
60
  fld
58
61
  }
59
62
  DataType::Decimal(_, _) => {
@@ -86,7 +89,7 @@ impl RbDataFrame {
86
89
  pub fn init(columns: RArray) -> RbResult<Self> {
87
90
  let mut cols = Vec::new();
88
91
  for i in columns.each() {
89
- cols.push(i?.try_convert::<&RbSeries>()?.series.borrow().clone());
92
+ cols.push(<&RbSeries>::try_convert(i?)?.series.borrow().clone());
90
93
  }
91
94
  let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
92
95
  Ok(RbDataFrame::new(df))
@@ -99,36 +102,35 @@ impl RbDataFrame {
99
102
  pub fn read_csv(arguments: &[Value]) -> RbResult<Self> {
100
103
  // start arguments
101
104
  // this pattern is needed for more than 16
102
- let rb_f: Value = arguments[0].try_convert()?;
103
- let infer_schema_length: Option<usize> = arguments[1].try_convert()?;
104
- let chunk_size: usize = arguments[2].try_convert()?;
105
- let has_header: bool = arguments[3].try_convert()?;
106
- let ignore_errors: bool = arguments[4].try_convert()?;
107
- let n_rows: Option<usize> = arguments[5].try_convert()?;
108
- let skip_rows: usize = arguments[6].try_convert()?;
109
- let projection: Option<Vec<usize>> = arguments[7].try_convert()?;
110
- let sep: String = arguments[8].try_convert()?;
111
- let rechunk: bool = arguments[9].try_convert()?;
112
- let columns: Option<Vec<String>> = arguments[10].try_convert()?;
113
- let encoding: Wrap<CsvEncoding> = arguments[11].try_convert()?;
114
- let n_threads: Option<usize> = arguments[12].try_convert()?;
115
- let path: Option<String> = arguments[13].try_convert()?;
116
- let overwrite_dtype: Option<Vec<(String, Wrap<DataType>)>> = arguments[14].try_convert()?;
105
+ let rb_f = arguments[0];
106
+ let infer_schema_length = Option::<usize>::try_convert(arguments[1])?;
107
+ let chunk_size = usize::try_convert(arguments[2])?;
108
+ let has_header = bool::try_convert(arguments[3])?;
109
+ let ignore_errors = bool::try_convert(arguments[4])?;
110
+ let n_rows = Option::<usize>::try_convert(arguments[5])?;
111
+ let skip_rows = usize::try_convert(arguments[6])?;
112
+ let projection = Option::<Vec<usize>>::try_convert(arguments[7])?;
113
+ let separator = String::try_convert(arguments[8])?;
114
+ let rechunk = bool::try_convert(arguments[9])?;
115
+ let columns = Option::<Vec<String>>::try_convert(arguments[10])?;
116
+ let encoding = Wrap::<CsvEncoding>::try_convert(arguments[11])?;
117
+ let n_threads = Option::<usize>::try_convert(arguments[12])?;
118
+ let path = Option::<String>::try_convert(arguments[13])?;
119
+ let overwrite_dtype = Option::<Vec<(String, Wrap<DataType>)>>::try_convert(arguments[14])?;
117
120
  // TODO fix
118
- let overwrite_dtype_slice: Option<Vec<Wrap<DataType>>> = None; // arguments[15].try_convert()?;
119
- let low_memory: bool = arguments[16].try_convert()?;
120
- let comment_char: Option<String> = arguments[17].try_convert()?;
121
- let quote_char: Option<String> = arguments[18].try_convert()?;
122
- let null_values: Option<Wrap<NullValues>> = arguments[19].try_convert()?;
123
- let try_parse_dates: bool = arguments[20].try_convert()?;
124
- let skip_rows_after_header: usize = arguments[21].try_convert()?;
125
- let row_count: Option<(String, IdxSize)> = arguments[22].try_convert()?;
126
- let sample_size: usize = arguments[23].try_convert()?;
127
- let eol_char: String = arguments[24].try_convert()?;
121
+ let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
122
+ let low_memory = bool::try_convert(arguments[16])?;
123
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
124
+ let quote_char = Option::<String>::try_convert(arguments[18])?;
125
+ let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
126
+ let try_parse_dates = bool::try_convert(arguments[20])?;
127
+ let skip_rows_after_header = usize::try_convert(arguments[21])?;
128
+ let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
129
+ let sample_size = usize::try_convert(arguments[23])?;
130
+ let eol_char = String::try_convert(arguments[24])?;
128
131
  // end arguments
129
132
 
130
133
  let null_values = null_values.map(|w| w.0);
131
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
132
134
  let eol_char = eol_char.as_bytes()[0];
133
135
 
134
136
  let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
@@ -165,7 +167,7 @@ impl RbDataFrame {
165
167
  .infer_schema(infer_schema_length)
166
168
  .has_header(has_header)
167
169
  .with_n_rows(n_rows)
168
- .with_delimiter(sep.as_bytes()[0])
170
+ .with_separator(separator.as_bytes()[0])
169
171
  .with_skip_rows(skip_rows)
170
172
  .with_ignore_errors(ignore_errors)
171
173
  .with_projection(projection)
@@ -178,7 +180,7 @@ impl RbDataFrame {
178
180
  .with_dtypes(overwrite_dtype.map(Arc::new))
179
181
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
180
182
  .low_memory(low_memory)
181
- .with_comment_char(comment_char)
183
+ .with_comment_prefix(comment_prefix.as_deref())
182
184
  .with_null_values(null_values)
183
185
  .with_try_parse_dates(try_parse_dates)
184
186
  .with_quote_char(quote_char)
@@ -265,7 +267,7 @@ impl RbDataFrame {
265
267
  ) -> RbResult<()> {
266
268
  use polars::io::avro::AvroWriter;
267
269
 
268
- if let Ok(s) = rb_f.try_convert::<String>() {
270
+ if let Ok(s) = String::try_convert(rb_f) {
269
271
  let f = std::fs::File::create(s).unwrap();
270
272
  AvroWriter::new(f)
271
273
  .with_compression(compression.0)
@@ -294,12 +296,18 @@ impl RbDataFrame {
294
296
  Ok(df) => Ok(df.into()),
295
297
  // try arrow json reader instead
296
298
  // this is row oriented
297
- Err(_) => {
298
- let out = JsonReader::new(mmap_bytes_r)
299
- .with_json_format(JsonFormat::Json)
300
- .finish()
301
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
302
- Ok(out.into())
299
+ Err(e) => {
300
+ let msg = format!("{e}");
301
+ if msg.contains("successful parse invalid data") {
302
+ let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
303
+ Err(e)
304
+ } else {
305
+ let out = JsonReader::new(mmap_bytes_r)
306
+ .with_json_format(JsonFormat::Json)
307
+ .finish()
308
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
309
+ Ok(out.into())
310
+ }
303
311
  }
304
312
  }
305
313
  }
@@ -341,6 +349,27 @@ impl RbDataFrame {
341
349
  Ok(())
342
350
  }
343
351
 
352
+ pub fn read_rows(
353
+ rb_rows: RArray,
354
+ infer_schema_length: Option<usize>,
355
+ schema_overwrite: Option<Wrap<Schema>>,
356
+ ) -> RbResult<Self> {
357
+ let mut rows = Vec::with_capacity(rb_rows.len());
358
+ for v in rb_rows.each() {
359
+ let rb_row = RArray::try_convert(v?)?;
360
+ let mut row = Vec::with_capacity(rb_row.len());
361
+ for val in rb_row.each() {
362
+ row.push(Wrap::<AnyValue>::try_convert(val?)?.0);
363
+ }
364
+ rows.push(Row(row));
365
+ }
366
+ Self::finish_from_rows(
367
+ rows,
368
+ infer_schema_length,
369
+ schema_overwrite.map(|wrap| wrap.0),
370
+ )
371
+ }
372
+
344
373
  pub fn read_hashes(
345
374
  dicts: Value,
346
375
  infer_schema_length: Option<usize>,
@@ -395,9 +424,9 @@ impl RbDataFrame {
395
424
  pub fn write_csv(
396
425
  &self,
397
426
  rb_f: Value,
398
- has_header: bool,
399
- sep: u8,
400
- quote: u8,
427
+ include_header: bool,
428
+ separator: u8,
429
+ quote_char: u8,
401
430
  batch_size: usize,
402
431
  datetime_format: Option<String>,
403
432
  date_format: Option<String>,
@@ -407,13 +436,13 @@ impl RbDataFrame {
407
436
  ) -> RbResult<()> {
408
437
  let null = null_value.unwrap_or_default();
409
438
 
410
- if let Ok(s) = rb_f.try_convert::<String>() {
439
+ if let Ok(s) = String::try_convert(rb_f) {
411
440
  let f = std::fs::File::create(s).unwrap();
412
441
  // no need for a buffered writer, because the csv writer does internal buffering
413
442
  CsvWriter::new(f)
414
- .has_header(has_header)
415
- .with_delimiter(sep)
416
- .with_quoting_char(quote)
443
+ .include_header(include_header)
444
+ .with_separator(separator)
445
+ .with_quote_char(quote_char)
417
446
  .with_batch_size(batch_size)
418
447
  .with_datetime_format(datetime_format)
419
448
  .with_date_format(date_format)
@@ -425,9 +454,9 @@ impl RbDataFrame {
425
454
  } else {
426
455
  let mut buf = Cursor::new(Vec::new());
427
456
  CsvWriter::new(&mut buf)
428
- .has_header(has_header)
429
- .with_delimiter(sep)
430
- .with_quoting_char(quote)
457
+ .include_header(include_header)
458
+ .with_separator(separator)
459
+ .with_quote_char(quote_char)
431
460
  .with_batch_size(batch_size)
432
461
  .with_datetime_format(datetime_format)
433
462
  .with_date_format(date_format)
@@ -449,7 +478,7 @@ impl RbDataFrame {
449
478
  rb_f: Value,
450
479
  compression: Wrap<Option<IpcCompression>>,
451
480
  ) -> RbResult<()> {
452
- if let Ok(s) = rb_f.try_convert::<String>() {
481
+ if let Ok(s) = String::try_convert(rb_f) {
453
482
  let f = std::fs::File::create(s).unwrap();
454
483
  IpcWriter::new(f)
455
484
  .with_compression(compression.0)
@@ -480,14 +509,14 @@ impl RbDataFrame {
480
509
  .get_columns()
481
510
  .iter()
482
511
  .map(|s| match s.dtype() {
483
- DataType::Object(_) => {
512
+ DataType::Object(_, _) => {
484
513
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
485
514
  obj.unwrap().to_object()
486
515
  }
487
516
  _ => Wrap(s.get(idx).unwrap()).into_value(),
488
517
  }),
489
518
  )
490
- .into()
519
+ .as_value()
491
520
  }
492
521
 
493
522
  pub fn row_tuples(&self) -> Value {
@@ -499,7 +528,7 @@ impl RbDataFrame {
499
528
  .get_columns()
500
529
  .iter()
501
530
  .map(|s| match s.dtype() {
502
- DataType::Object(_) => {
531
+ DataType::Object(_, _) => {
503
532
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
504
533
  obj.unwrap().to_object()
505
534
  }
@@ -507,7 +536,7 @@ impl RbDataFrame {
507
536
  }),
508
537
  )
509
538
  }))
510
- .into()
539
+ .as_value()
511
540
  }
512
541
 
513
542
  pub fn to_numo(&self) -> Option<Value> {
@@ -537,7 +566,7 @@ impl RbDataFrame {
537
566
  ) -> RbResult<()> {
538
567
  let compression = parse_parquet_compression(&compression, compression_level)?;
539
568
 
540
- if let Ok(s) = rb_f.try_convert::<String>() {
569
+ if let Ok(s) = String::try_convert(rb_f) {
541
570
  let f = std::fs::File::create(s).unwrap();
542
571
  ParquetWriter::new(f)
543
572
  .with_compression(compression)
@@ -604,7 +633,7 @@ impl RbDataFrame {
604
633
 
605
634
  pub fn sample_n(
606
635
  &self,
607
- n: usize,
636
+ n: &RbSeries,
608
637
  with_replacement: bool,
609
638
  shuffle: bool,
610
639
  seed: Option<u64>,
@@ -612,14 +641,14 @@ impl RbDataFrame {
612
641
  let df = self
613
642
  .df
614
643
  .borrow()
615
- .sample_n(n, with_replacement, shuffle, seed)
644
+ .sample_n(&n.series.borrow(), with_replacement, shuffle, seed)
616
645
  .map_err(RbPolarsErr::from)?;
617
646
  Ok(df.into())
618
647
  }
619
648
 
620
649
  pub fn sample_frac(
621
650
  &self,
622
- frac: f64,
651
+ frac: &RbSeries,
623
652
  with_replacement: bool,
624
653
  shuffle: bool,
625
654
  seed: Option<u64>,
@@ -627,7 +656,7 @@ impl RbDataFrame {
627
656
  let df = self
628
657
  .df
629
658
  .borrow()
630
- .sample_frac(frac, with_replacement, shuffle, seed)
659
+ .sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed)
631
660
  .map_err(RbPolarsErr::from)?;
632
661
  Ok(df.into())
633
662
  }
@@ -761,8 +790,8 @@ impl RbDataFrame {
761
790
  .map(|s| RbSeries::new(s.clone()))
762
791
  }
763
792
 
764
- pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
765
- self.df.borrow().find_idx_by_name(&name)
793
+ pub fn get_column_index(&self, name: String) -> Option<usize> {
794
+ self.df.borrow().get_column_index(&name)
766
795
  }
767
796
 
768
797
  // TODO remove clone
@@ -804,18 +833,18 @@ impl RbDataFrame {
804
833
  Ok(())
805
834
  }
806
835
 
807
- pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
836
+ pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
808
837
  self.df
809
838
  .borrow_mut()
810
- .replace_at_idx(index, new_col.series.borrow().clone())
839
+ .replace_column(index, new_col.series.borrow().clone())
811
840
  .map_err(RbPolarsErr::from)?;
812
841
  Ok(())
813
842
  }
814
843
 
815
- pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
844
+ pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
816
845
  self.df
817
846
  .borrow_mut()
818
- .insert_at_idx(index, new_col.series.borrow().clone())
847
+ .insert_column(index, new_col.series.borrow().clone())
819
848
  .map_err(RbPolarsErr::from)?;
820
849
  Ok(())
821
850
  }
@@ -850,11 +879,11 @@ impl RbDataFrame {
850
879
  Ok(mask.into_series().into())
851
880
  }
852
881
 
853
- pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
882
+ pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
854
883
  if null_equal {
855
- self.df.borrow().frame_equal_missing(&other.df.borrow())
884
+ self.df.borrow().equals_missing(&other.df.borrow())
856
885
  } else {
857
- self.df.borrow().frame_equal(&other.df.borrow())
886
+ self.df.borrow().equals(&other.df.borrow())
858
887
  }
859
888
  }
860
889
 
@@ -942,73 +971,50 @@ impl RbDataFrame {
942
971
  self.df.borrow().clone().lazy().into()
943
972
  }
944
973
 
945
- pub fn max(&self) -> Self {
946
- self.df.borrow().max().into()
947
- }
948
-
949
- pub fn min(&self) -> Self {
950
- self.df.borrow().min().into()
951
- }
952
-
953
- pub fn sum(&self) -> Self {
954
- self.df.borrow().sum().into()
955
- }
956
-
957
- pub fn mean(&self) -> Self {
958
- self.df.borrow().mean().into()
959
- }
960
-
961
- pub fn std(&self, ddof: u8) -> Self {
962
- self.df.borrow().std(ddof).into()
963
- }
964
-
965
- pub fn var(&self, ddof: u8) -> Self {
966
- self.df.borrow().var(ddof).into()
967
- }
968
-
969
- pub fn median(&self) -> Self {
970
- self.df.borrow().median().into()
971
- }
972
-
973
- pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
974
+ pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
974
975
  let s = self
975
976
  .df
976
977
  .borrow()
977
- .hmean(null_strategy.0)
978
+ .max_horizontal()
978
979
  .map_err(RbPolarsErr::from)?;
979
980
  Ok(s.map(|s| s.into()))
980
981
  }
981
982
 
982
- pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
983
- let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
984
- Ok(s.map(|s| s.into()))
985
- }
986
-
987
- pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
988
- let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
983
+ pub fn min_horizontal(&self) -> RbResult<Option<RbSeries>> {
984
+ let s = self
985
+ .df
986
+ .borrow()
987
+ .min_horizontal()
988
+ .map_err(RbPolarsErr::from)?;
989
989
  Ok(s.map(|s| s.into()))
990
990
  }
991
991
 
992
- pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
992
+ pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
993
+ let null_strategy = if ignore_nulls {
994
+ NullStrategy::Ignore
995
+ } else {
996
+ NullStrategy::Propagate
997
+ };
993
998
  let s = self
994
999
  .df
995
1000
  .borrow()
996
- .hsum(null_strategy.0)
1001
+ .sum_horizontal(null_strategy)
997
1002
  .map_err(RbPolarsErr::from)?;
998
1003
  Ok(s.map(|s| s.into()))
999
1004
  }
1000
1005
 
1001
- pub fn quantile(
1002
- &self,
1003
- quantile: f64,
1004
- interpolation: Wrap<QuantileInterpolOptions>,
1005
- ) -> RbResult<Self> {
1006
- let df = self
1006
+ pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult<Option<RbSeries>> {
1007
+ let null_strategy = if ignore_nulls {
1008
+ NullStrategy::Ignore
1009
+ } else {
1010
+ NullStrategy::Propagate
1011
+ };
1012
+ let s = self
1007
1013
  .df
1008
1014
  .borrow()
1009
- .quantile(quantile, interpolation.0)
1015
+ .mean_horizontal(null_strategy)
1010
1016
  .map_err(RbPolarsErr::from)?;
1011
- Ok(df.into())
1017
+ Ok(s.map(|s| s.into()))
1012
1018
  }
1013
1019
 
1014
1020
  pub fn to_dummies(
@@ -1082,13 +1088,13 @@ impl RbDataFrame {
1082
1088
  .into_datetime(tu, tz)
1083
1089
  .into_series()
1084
1090
  }
1085
- Some(DataType::Utf8) => {
1091
+ Some(DataType::String) => {
1086
1092
  apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1087
1093
  }
1088
1094
  _ => return apply_lambda_unknown(df, lambda, inference_size),
1089
1095
  };
1090
1096
 
1091
- Ok((RbSeries::from(out).into(), false))
1097
+ Ok((Obj::wrap(RbSeries::from(out)).as_value(), false))
1092
1098
  }
1093
1099
 
1094
1100
  pub fn shrink_to_fit(&self) {
@@ -1105,17 +1111,20 @@ impl RbDataFrame {
1105
1111
  Ok(hash.into_series().into())
1106
1112
  }
1107
1113
 
1108
- pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
1109
- let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
1110
- if include_header {
1111
- let s = Utf8Chunked::from_iter_values(
1112
- &names,
1113
- self.df.borrow().get_columns().iter().map(|s| s.name()),
1114
- )
1115
- .into_series();
1116
- df.insert_at_idx(0, s).unwrap();
1117
- }
1118
- Ok(df.into())
1114
+ pub fn transpose(&self, keep_names_as: Option<String>, column_names: Value) -> RbResult<Self> {
1115
+ let new_col_names = if let Ok(name) = <Vec<String>>::try_convert(column_names) {
1116
+ Some(Either::Right(name))
1117
+ } else if let Ok(name) = String::try_convert(column_names) {
1118
+ Some(Either::Left(name))
1119
+ } else {
1120
+ None
1121
+ };
1122
+ Ok(self
1123
+ .df
1124
+ .borrow()
1125
+ .transpose(keep_names_as.as_deref(), new_col_names)
1126
+ .map_err(RbPolarsErr::from)?
1127
+ .into())
1119
1128
  }
1120
1129
 
1121
1130
  pub fn upsample(
@@ -1,6 +1,5 @@
1
1
  use magnus::exception;
2
2
  use magnus::Error;
3
- use polars::error::ArrowError;
4
3
  use polars::prelude::PolarsError;
5
4
 
6
5
  pub struct RbPolarsErr {}
@@ -11,10 +10,6 @@ impl RbPolarsErr {
11
10
  Error::new(exception::runtime_error(), e.to_string())
12
11
  }
13
12
 
14
- pub fn arrow(e: ArrowError) -> Error {
15
- Error::new(exception::runtime_error(), e.to_string())
16
- }
17
-
18
13
  pub fn io(e: std::io::Error) -> Error {
19
14
  Error::new(exception::runtime_error(), e.to_string())
20
15
  }
@@ -3,16 +3,28 @@ use polars::prelude::*;
3
3
  use crate::RbExpr;
4
4
 
5
5
  impl RbExpr {
6
- pub fn bin_contains(&self, lit: Vec<u8>) -> Self {
7
- self.inner.clone().binary().contains_literal(lit).into()
6
+ pub fn bin_contains(&self, lit: &RbExpr) -> Self {
7
+ self.inner
8
+ .clone()
9
+ .binary()
10
+ .contains_literal(lit.inner.clone())
11
+ .into()
8
12
  }
9
13
 
10
- pub fn bin_ends_with(&self, sub: Vec<u8>) -> Self {
11
- self.inner.clone().binary().ends_with(sub).into()
14
+ pub fn bin_ends_with(&self, sub: &RbExpr) -> Self {
15
+ self.inner
16
+ .clone()
17
+ .binary()
18
+ .ends_with(sub.inner.clone())
19
+ .into()
12
20
  }
13
21
 
14
- pub fn bin_starts_with(&self, sub: Vec<u8>) -> Self {
15
- self.inner.clone().binary().starts_with(sub).into()
22
+ pub fn bin_starts_with(&self, sub: &RbExpr) -> Self {
23
+ self.inner
24
+ .clone()
25
+ .binary()
26
+ .starts_with(sub.inner.clone())
27
+ .into()
16
28
  }
17
29
 
18
30
  pub fn bin_hex_decode(&self, strict: bool) -> Self {
@@ -5,6 +5,13 @@ use crate::RbExpr;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
8
- self.inner.clone().cat().set_ordering(ordering.0).into()
8
+ self.inner
9
+ .clone()
10
+ .cast(DataType::Categorical(None, ordering.0))
11
+ .into()
12
+ }
13
+
14
+ pub fn cat_get_categories(&self) -> Self {
15
+ self.inner.clone().cat().get_categories().into()
9
16
  }
10
17
  }
@@ -8,9 +8,8 @@ impl RbExpr {
8
8
  self.inner.clone().dt().to_string(&format).into()
9
9
  }
10
10
 
11
- pub fn dt_offset_by(&self, by: String) -> Self {
12
- let by = Duration::parse(&by);
13
- self.inner.clone().dt().offset_by(by).into()
11
+ pub fn dt_offset_by(&self, by: &RbExpr) -> Self {
12
+ self.inner.clone().dt().offset_by(by.inner.clone()).into()
14
13
  }
15
14
 
16
15
  pub fn dt_epoch_seconds(&self) -> Self {
@@ -38,21 +37,20 @@ impl RbExpr {
38
37
  self.inner.clone().dt().cast_time_unit(tu.0).into()
39
38
  }
40
39
 
41
- pub fn dt_replace_time_zone(&self, tz: Option<String>, use_earliest: Option<bool>) -> Self {
40
+ pub fn dt_replace_time_zone(&self, time_zone: Option<String>, ambiguous: &Self) -> Self {
42
41
  self.inner
43
42
  .clone()
44
43
  .dt()
45
- .replace_time_zone(tz, use_earliest)
44
+ .replace_time_zone(time_zone, ambiguous.inner.clone())
46
45
  .into()
47
46
  }
48
47
 
49
- #[allow(deprecated)]
50
- pub fn dt_tz_localize(&self, tz: String) -> Self {
51
- self.inner.clone().dt().tz_localize(tz).into()
52
- }
53
-
54
- pub fn dt_truncate(&self, every: String, offset: String) -> Self {
55
- self.inner.clone().dt().truncate(&every, &offset).into()
48
+ pub fn dt_truncate(&self, every: &Self, offset: String) -> Self {
49
+ self.inner
50
+ .clone()
51
+ .dt()
52
+ .truncate(every.inner.clone(), offset)
53
+ .into()
56
54
  }
57
55
 
58
56
  pub fn dt_month_start(&self) -> Self {