polars-df 0.7.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +17 -6
  8. data/ext/polars/src/batched_csv.rs +6 -7
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
  12. data/ext/polars/src/dataframe.rs +96 -116
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/categorical.rs +8 -1
  15. data/ext/polars/src/expr/datetime.rs +22 -56
  16. data/ext/polars/src/expr/general.rs +124 -37
  17. data/ext/polars/src/expr/list.rs +52 -4
  18. data/ext/polars/src/expr/meta.rs +48 -0
  19. data/ext/polars/src/expr/rolling.rs +16 -10
  20. data/ext/polars/src/expr/string.rs +68 -17
  21. data/ext/polars/src/expr/struct.rs +8 -4
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +103 -48
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/range.rs +5 -10
  26. data/ext/polars/src/functions/string_cache.rs +14 -0
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
  28. data/ext/polars/src/lib.rs +245 -187
  29. data/ext/polars/src/map/dataframe.rs +1 -1
  30. data/ext/polars/src/map/mod.rs +2 -2
  31. data/ext/polars/src/map/series.rs +6 -6
  32. data/ext/polars/src/object.rs +0 -30
  33. data/ext/polars/src/on_startup.rs +32 -0
  34. data/ext/polars/src/series/aggregation.rs +23 -0
  35. data/ext/polars/src/series/construction.rs +1 -1
  36. data/ext/polars/src/series/export.rs +2 -2
  37. data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
  38. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
  39. data/ext/polars/src/utils.rs +1 -1
  40. data/lib/polars/array_expr.rb +449 -0
  41. data/lib/polars/array_name_space.rb +346 -0
  42. data/lib/polars/cat_expr.rb +24 -0
  43. data/lib/polars/cat_name_space.rb +75 -0
  44. data/lib/polars/config.rb +2 -2
  45. data/lib/polars/data_frame.rb +248 -108
  46. data/lib/polars/data_types.rb +195 -29
  47. data/lib/polars/date_time_expr.rb +41 -24
  48. data/lib/polars/date_time_name_space.rb +12 -12
  49. data/lib/polars/exceptions.rb +12 -1
  50. data/lib/polars/expr.rb +1080 -195
  51. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  52. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  53. data/lib/polars/functions/as_datatype.rb +248 -0
  54. data/lib/polars/functions/col.rb +47 -0
  55. data/lib/polars/functions/eager.rb +182 -0
  56. data/lib/polars/functions/lazy.rb +1280 -0
  57. data/lib/polars/functions/len.rb +49 -0
  58. data/lib/polars/functions/lit.rb +35 -0
  59. data/lib/polars/functions/random.rb +16 -0
  60. data/lib/polars/functions/range/date_range.rb +103 -0
  61. data/lib/polars/functions/range/int_range.rb +51 -0
  62. data/lib/polars/functions/repeat.rb +144 -0
  63. data/lib/polars/functions/whenthen.rb +27 -0
  64. data/lib/polars/functions.rb +29 -416
  65. data/lib/polars/group_by.rb +3 -3
  66. data/lib/polars/io.rb +21 -28
  67. data/lib/polars/lazy_frame.rb +390 -76
  68. data/lib/polars/list_expr.rb +152 -6
  69. data/lib/polars/list_name_space.rb +102 -0
  70. data/lib/polars/meta_expr.rb +175 -7
  71. data/lib/polars/series.rb +557 -59
  72. data/lib/polars/sql_context.rb +1 -1
  73. data/lib/polars/string_cache.rb +75 -0
  74. data/lib/polars/string_expr.rb +412 -96
  75. data/lib/polars/string_name_space.rb +4 -4
  76. data/lib/polars/struct_expr.rb +1 -1
  77. data/lib/polars/struct_name_space.rb +1 -1
  78. data/lib/polars/testing.rb +507 -0
  79. data/lib/polars/utils.rb +64 -20
  80. data/lib/polars/version.rb +1 -1
  81. data/lib/polars.rb +15 -2
  82. metadata +40 -9
  83. data/lib/polars/lazy_functions.rb +0 -1197
@@ -6,12 +6,13 @@ use polars::frame::row::{rows_to_schema_supertypes, Row};
6
6
  use polars::frame::NullStrategy;
7
7
  use polars::io::avro::AvroCompression;
8
8
  use polars::io::mmap::ReaderBytes;
9
- use polars::io::RowCount;
9
+ use polars::io::RowIndex;
10
10
  use polars::prelude::pivot::{pivot, pivot_stable};
11
11
  use polars::prelude::*;
12
12
  use polars_core::utils::try_get_supertype;
13
13
  use std::cell::RefCell;
14
14
  use std::io::{BufWriter, Cursor};
15
+ use std::num::NonZeroUsize;
15
16
  use std::ops::Deref;
16
17
 
17
18
  use crate::conversion::*;
@@ -45,44 +46,51 @@ impl RbDataFrame {
45
46
  fn finish_from_rows(
46
47
  rows: Vec<Row>,
47
48
  infer_schema_length: Option<usize>,
48
- schema_overwrite: Option<Schema>,
49
+ schema: Option<Schema>,
50
+ schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
49
51
  ) -> RbResult<Self> {
50
- // object builder must be registered.
51
- crate::object::register_object_builder();
52
+ // Object builder must be registered
53
+ crate::on_startup::register_object_builder();
52
54
 
53
- let schema =
55
+ let mut final_schema =
54
56
  rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
55
57
  .map_err(RbPolarsErr::from)?;
56
- // replace inferred nulls with boolean
57
- let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
58
- DataType::Null => {
59
- // fld.coerce(DataType::Boolean);
60
- fld
61
- }
62
- DataType::Decimal(_, _) => {
63
- fld.coerce(DataType::Decimal(None, None));
64
- fld
58
+
59
+ // Erase scale from inferred decimals.
60
+ for dtype in final_schema.iter_dtypes_mut() {
61
+ if let DataType::Decimal(_, _) = dtype {
62
+ *dtype = DataType::Decimal(None, None)
65
63
  }
66
- _ => fld,
67
- });
68
- let mut schema = Schema::from_iter(fields);
64
+ }
69
65
 
70
- if let Some(schema_overwrite) = schema_overwrite {
71
- for (i, (name, dtype)) in schema_overwrite.into_iter().enumerate() {
72
- if let Some((name_, dtype_)) = schema.get_at_index_mut(i) {
66
+ // Integrate explicit/inferred schema.
67
+ if let Some(schema) = schema {
68
+ for (i, (name, dtype)) in schema.into_iter().enumerate() {
69
+ if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
73
70
  *name_ = name;
74
71
 
75
- // if user sets dtype unknown, we use the inferred datatype
72
+ // If schema dtype is Unknown, overwrite with inferred datatype.
76
73
  if !matches!(dtype, DataType::Unknown) {
77
74
  *dtype_ = dtype;
78
75
  }
79
76
  } else {
80
- schema.with_column(name, dtype);
77
+ final_schema.with_column(name, dtype);
81
78
  }
82
79
  }
83
80
  }
84
81
 
85
- let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
82
+ // Optional per-field overrides; these supersede default/inferred dtypes.
83
+ if let Some(overrides) = schema_overrides_by_idx {
84
+ for (i, dtype) in overrides {
85
+ if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
86
+ if !matches!(dtype, DataType::Unknown) {
87
+ *dtype_ = dtype;
88
+ }
89
+ }
90
+ }
91
+ }
92
+ let df =
93
+ DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
86
94
  Ok(df.into())
87
95
  }
88
96
 
@@ -120,21 +128,20 @@ impl RbDataFrame {
120
128
  // TODO fix
121
129
  let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
122
130
  let low_memory = bool::try_convert(arguments[16])?;
123
- let comment_char = Option::<String>::try_convert(arguments[17])?;
131
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
124
132
  let quote_char = Option::<String>::try_convert(arguments[18])?;
125
133
  let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
126
134
  let try_parse_dates = bool::try_convert(arguments[20])?;
127
135
  let skip_rows_after_header = usize::try_convert(arguments[21])?;
128
- let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
136
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
129
137
  let sample_size = usize::try_convert(arguments[23])?;
130
138
  let eol_char = String::try_convert(arguments[24])?;
131
139
  // end arguments
132
140
 
133
141
  let null_values = null_values.map(|w| w.0);
134
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
135
142
  let eol_char = eol_char.as_bytes()[0];
136
143
 
137
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
144
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
138
145
 
139
146
  let quote_char = if let Some(s) = quote_char {
140
147
  if s.is_empty() {
@@ -181,13 +188,13 @@ impl RbDataFrame {
181
188
  .with_dtypes(overwrite_dtype.map(Arc::new))
182
189
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
183
190
  .low_memory(low_memory)
184
- .with_comment_char(comment_char)
191
+ .with_comment_prefix(comment_prefix.as_deref())
185
192
  .with_null_values(null_values)
186
193
  .with_try_parse_dates(try_parse_dates)
187
194
  .with_quote_char(quote_char)
188
195
  .with_end_of_line_char(eol_char)
189
196
  .with_skip_rows_after_header(skip_rows_after_header)
190
- .with_row_count(row_count)
197
+ .with_row_index(row_index)
191
198
  .sample_size(sample_size)
192
199
  .finish()
193
200
  .map_err(RbPolarsErr::from)?;
@@ -201,19 +208,19 @@ impl RbDataFrame {
201
208
  projection: Option<Vec<usize>>,
202
209
  n_rows: Option<usize>,
203
210
  parallel: Wrap<ParallelStrategy>,
204
- row_count: Option<(String, IdxSize)>,
211
+ row_index: Option<(String, IdxSize)>,
205
212
  low_memory: bool,
206
213
  use_statistics: bool,
207
214
  rechunk: bool,
208
215
  ) -> RbResult<Self> {
209
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
216
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
210
217
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
211
218
  let df = ParquetReader::new(mmap_bytes_r)
212
219
  .with_projection(projection)
213
220
  .with_columns(columns)
214
221
  .read_parallel(parallel.0)
215
222
  .with_n_rows(n_rows)
216
- .with_row_count(row_count)
223
+ .with_row_index(row_index)
217
224
  .set_low_memory(low_memory)
218
225
  .use_statistics(use_statistics)
219
226
  .set_rechunk(rechunk)
@@ -227,16 +234,16 @@ impl RbDataFrame {
227
234
  columns: Option<Vec<String>>,
228
235
  projection: Option<Vec<usize>>,
229
236
  n_rows: Option<usize>,
230
- row_count: Option<(String, IdxSize)>,
237
+ row_index: Option<(String, IdxSize)>,
231
238
  memory_map: bool,
232
239
  ) -> RbResult<Self> {
233
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
240
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
234
241
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
235
242
  let df = IpcReader::new(mmap_bytes_r)
236
243
  .with_projection(projection)
237
244
  .with_columns(columns)
238
245
  .with_n_rows(n_rows)
239
- .with_row_count(row_count)
246
+ .with_row_index(row_index)
240
247
  .memory_mapped(memory_map)
241
248
  .finish()
242
249
  .map_err(RbPolarsErr::from)?;
@@ -297,12 +304,18 @@ impl RbDataFrame {
297
304
  Ok(df) => Ok(df.into()),
298
305
  // try arrow json reader instead
299
306
  // this is row oriented
300
- Err(_) => {
301
- let out = JsonReader::new(mmap_bytes_r)
302
- .with_json_format(JsonFormat::Json)
303
- .finish()
304
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
305
- Ok(out.into())
307
+ Err(e) => {
308
+ let msg = format!("{e}");
309
+ if msg.contains("successful parse invalid data") {
310
+ let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
311
+ Err(e)
312
+ } else {
313
+ let out = JsonReader::new(mmap_bytes_r)
314
+ .with_json_format(JsonFormat::Json)
315
+ .finish()
316
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
317
+ Ok(out.into())
318
+ }
306
319
  }
307
320
  }
308
321
  }
@@ -347,7 +360,7 @@ impl RbDataFrame {
347
360
  pub fn read_rows(
348
361
  rb_rows: RArray,
349
362
  infer_schema_length: Option<usize>,
350
- schema_overwrite: Option<Wrap<Schema>>,
363
+ schema: Option<Wrap<Schema>>,
351
364
  ) -> RbResult<Self> {
352
365
  let mut rows = Vec::with_capacity(rb_rows.len());
353
366
  for v in rb_rows.each() {
@@ -358,30 +371,34 @@ impl RbDataFrame {
358
371
  }
359
372
  rows.push(Row(row));
360
373
  }
361
- Self::finish_from_rows(
362
- rows,
363
- infer_schema_length,
364
- schema_overwrite.map(|wrap| wrap.0),
365
- )
374
+ Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
366
375
  }
367
376
 
368
377
  pub fn read_hashes(
369
378
  dicts: Value,
370
379
  infer_schema_length: Option<usize>,
371
- schema_overwrite: Option<Wrap<Schema>>,
380
+ schema: Option<Wrap<Schema>>,
381
+ schema_overrides: Option<Wrap<Schema>>,
372
382
  ) -> RbResult<Self> {
373
- let (rows, mut names) = dicts_to_rows(&dicts, infer_schema_length.unwrap_or(50))?;
383
+ let mut schema_columns = PlIndexSet::new();
384
+ if let Some(s) = &schema {
385
+ schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
386
+ }
387
+ let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
374
388
 
375
- // ensure the new names are used
376
- if let Some(schema) = &schema_overwrite {
377
- for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
378
- *name = new_name.to_string();
389
+ let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
390
+ if let Some(overrides) = schema_overrides {
391
+ for (idx, name) in names.iter().enumerate() {
392
+ if let Some(dtype) = overrides.0.get(name) {
393
+ schema_overrides_by_idx.push((idx, dtype.clone()));
394
+ }
379
395
  }
380
396
  }
381
397
  let rbdf = Self::finish_from_rows(
382
398
  rows,
383
399
  infer_schema_length,
384
- schema_overwrite.map(|wrap| wrap.0),
400
+ schema.map(|wrap| wrap.0),
401
+ Some(schema_overrides_by_idx),
385
402
  )?;
386
403
 
387
404
  unsafe {
@@ -422,13 +439,14 @@ impl RbDataFrame {
422
439
  include_header: bool,
423
440
  separator: u8,
424
441
  quote_char: u8,
425
- batch_size: usize,
442
+ batch_size: Wrap<NonZeroUsize>,
426
443
  datetime_format: Option<String>,
427
444
  date_format: Option<String>,
428
445
  time_format: Option<String>,
429
446
  float_precision: Option<usize>,
430
447
  null_value: Option<String>,
431
448
  ) -> RbResult<()> {
449
+ let batch_size = batch_size.0;
432
450
  let null = null_value.unwrap_or_default();
433
451
 
434
452
  if let Ok(s) = String::try_convert(rb_f) {
@@ -504,7 +522,7 @@ impl RbDataFrame {
504
522
  .get_columns()
505
523
  .iter()
506
524
  .map(|s| match s.dtype() {
507
- DataType::Object(_) => {
525
+ DataType::Object(_, _) => {
508
526
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
509
527
  obj.unwrap().to_object()
510
528
  }
@@ -523,7 +541,7 @@ impl RbDataFrame {
523
541
  .get_columns()
524
542
  .iter()
525
543
  .map(|s| match s.dtype() {
526
- DataType::Object(_) => {
544
+ DataType::Object(_, _) => {
527
545
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
528
546
  obj.unwrap().to_object()
529
547
  }
@@ -785,16 +803,15 @@ impl RbDataFrame {
785
803
  .map(|s| RbSeries::new(s.clone()))
786
804
  }
787
805
 
788
- pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
789
- self.df.borrow().find_idx_by_name(&name)
806
+ pub fn get_column_index(&self, name: String) -> Option<usize> {
807
+ self.df.borrow().get_column_index(&name)
790
808
  }
791
809
 
792
- // TODO remove clone
793
- pub fn column(&self, name: String) -> RbResult<RbSeries> {
810
+ pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
794
811
  self.df
795
812
  .borrow()
796
813
  .column(&name)
797
- .map(|v| v.clone().into())
814
+ .map(|s| RbSeries::new(s.clone()))
798
815
  .map_err(RbPolarsErr::from)
799
816
  }
800
817
 
@@ -828,18 +845,18 @@ impl RbDataFrame {
828
845
  Ok(())
829
846
  }
830
847
 
831
- pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
848
+ pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
832
849
  self.df
833
850
  .borrow_mut()
834
- .replace_at_idx(index, new_col.series.borrow().clone())
851
+ .replace_column(index, new_col.series.borrow().clone())
835
852
  .map_err(RbPolarsErr::from)?;
836
853
  Ok(())
837
854
  }
838
855
 
839
- pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
856
+ pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
840
857
  self.df
841
858
  .borrow_mut()
842
- .insert_at_idx(index, new_col.series.borrow().clone())
859
+ .insert_column(index, new_col.series.borrow().clone())
843
860
  .map_err(RbPolarsErr::from)?;
844
861
  Ok(())
845
862
  }
@@ -874,19 +891,19 @@ impl RbDataFrame {
874
891
  Ok(mask.into_series().into())
875
892
  }
876
893
 
877
- pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
894
+ pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
878
895
  if null_equal {
879
- self.df.borrow().frame_equal_missing(&other.df.borrow())
896
+ self.df.borrow().equals_missing(&other.df.borrow())
880
897
  } else {
881
- self.df.borrow().frame_equal(&other.df.borrow())
898
+ self.df.borrow().equals(&other.df.borrow())
882
899
  }
883
900
  }
884
901
 
885
- pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
902
+ pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
886
903
  let df = self
887
904
  .df
888
905
  .borrow()
889
- .with_row_count(&name, offset)
906
+ .with_row_index(&name, offset)
890
907
  .map_err(RbPolarsErr::from)?;
891
908
  Ok(df.into())
892
909
  }
@@ -917,9 +934,9 @@ impl RbDataFrame {
917
934
  #[allow(clippy::too_many_arguments)]
918
935
  pub fn pivot_expr(
919
936
  &self,
920
- values: Vec<String>,
921
937
  index: Vec<String>,
922
938
  columns: Vec<String>,
939
+ values: Option<Vec<String>>,
923
940
  maintain_order: bool,
924
941
  sort_columns: bool,
925
942
  aggregate_expr: Option<&RbExpr>,
@@ -932,9 +949,9 @@ impl RbDataFrame {
932
949
  let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
933
950
  let df = fun(
934
951
  &self.df.borrow(),
935
- values,
936
952
  index,
937
953
  columns,
954
+ values,
938
955
  sort_columns,
939
956
  agg_expr,
940
957
  separator.as_deref(),
@@ -966,34 +983,6 @@ impl RbDataFrame {
966
983
  self.df.borrow().clone().lazy().into()
967
984
  }
968
985
 
969
- pub fn max(&self) -> Self {
970
- self.df.borrow().max().into()
971
- }
972
-
973
- pub fn min(&self) -> Self {
974
- self.df.borrow().min().into()
975
- }
976
-
977
- pub fn sum(&self) -> Self {
978
- self.df.borrow().sum().into()
979
- }
980
-
981
- pub fn mean(&self) -> Self {
982
- self.df.borrow().mean().into()
983
- }
984
-
985
- pub fn std(&self, ddof: u8) -> Self {
986
- self.df.borrow().std(ddof).into()
987
- }
988
-
989
- pub fn var(&self, ddof: u8) -> Self {
990
- self.df.borrow().var(ddof).into()
991
- }
992
-
993
- pub fn median(&self) -> Self {
994
- self.df.borrow().median().into()
995
- }
996
-
997
986
  pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
998
987
  let s = self
999
988
  .df
@@ -1040,19 +1029,6 @@ impl RbDataFrame {
1040
1029
  Ok(s.map(|s| s.into()))
1041
1030
  }
1042
1031
 
1043
- pub fn quantile(
1044
- &self,
1045
- quantile: f64,
1046
- interpolation: Wrap<QuantileInterpolOptions>,
1047
- ) -> RbResult<Self> {
1048
- let df = self
1049
- .df
1050
- .borrow()
1051
- .quantile(quantile, interpolation.0)
1052
- .map_err(RbPolarsErr::from)?;
1053
- Ok(df.into())
1054
- }
1055
-
1056
1032
  pub fn to_dummies(
1057
1033
  &self,
1058
1034
  columns: Option<Vec<String>>,
@@ -1124,7 +1100,7 @@ impl RbDataFrame {
1124
1100
  .into_datetime(tu, tz)
1125
1101
  .into_series()
1126
1102
  }
1127
- Some(DataType::Utf8) => {
1103
+ Some(DataType::String) => {
1128
1104
  apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1129
1105
  }
1130
1106
  _ => return apply_lambda_unknown(df, lambda, inference_size),
@@ -1157,7 +1133,7 @@ impl RbDataFrame {
1157
1133
  };
1158
1134
  Ok(self
1159
1135
  .df
1160
- .borrow()
1136
+ .borrow_mut()
1161
1137
  .transpose(keep_names_as.as_deref(), new_col_names)
1162
1138
  .map_err(RbPolarsErr::from)?
1163
1139
  .into())
@@ -1199,4 +1175,8 @@ impl RbDataFrame {
1199
1175
  let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
1200
1176
  Ok(df.into())
1201
1177
  }
1178
+
1179
+ pub fn clear(&self) -> Self {
1180
+ self.df.borrow().clear().into()
1181
+ }
1202
1182
  }
@@ -1,3 +1,5 @@
1
+ use polars::prelude::*;
2
+
1
3
  use crate::RbExpr;
2
4
 
3
5
  impl RbExpr {
@@ -12,4 +14,76 @@ impl RbExpr {
12
14
  pub fn array_sum(&self) -> Self {
13
15
  self.inner.clone().arr().sum().into()
14
16
  }
17
+
18
+ pub fn arr_unique(&self, maintain_order: bool) -> Self {
19
+ if maintain_order {
20
+ self.inner.clone().arr().unique_stable().into()
21
+ } else {
22
+ self.inner.clone().arr().unique().into()
23
+ }
24
+ }
25
+
26
+ pub fn arr_to_list(&self) -> Self {
27
+ self.inner.clone().arr().to_list().into()
28
+ }
29
+
30
+ pub fn arr_all(&self) -> Self {
31
+ self.inner.clone().arr().all().into()
32
+ }
33
+
34
+ pub fn arr_any(&self) -> Self {
35
+ self.inner.clone().arr().any().into()
36
+ }
37
+
38
+ pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
39
+ self.inner
40
+ .clone()
41
+ .arr()
42
+ .sort(SortOptions {
43
+ descending,
44
+ nulls_last,
45
+ ..Default::default()
46
+ })
47
+ .into()
48
+ }
49
+
50
+ pub fn arr_reverse(&self) -> Self {
51
+ self.inner.clone().arr().reverse().into()
52
+ }
53
+
54
+ pub fn arr_arg_min(&self) -> Self {
55
+ self.inner.clone().arr().arg_min().into()
56
+ }
57
+
58
+ pub fn arr_arg_max(&self) -> Self {
59
+ self.inner.clone().arr().arg_max().into()
60
+ }
61
+
62
+ pub fn arr_get(&self, index: &RbExpr) -> Self {
63
+ self.inner.clone().arr().get(index.inner.clone()).into()
64
+ }
65
+
66
+ pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
67
+ self.inner
68
+ .clone()
69
+ .arr()
70
+ .join(separator.inner.clone(), ignore_nulls)
71
+ .into()
72
+ }
73
+
74
+ pub fn arr_contains(&self, other: &RbExpr) -> Self {
75
+ self.inner
76
+ .clone()
77
+ .arr()
78
+ .contains(other.inner.clone())
79
+ .into()
80
+ }
81
+
82
+ pub fn arr_count_matches(&self, expr: &RbExpr) -> Self {
83
+ self.inner
84
+ .clone()
85
+ .arr()
86
+ .count_matches(expr.inner.clone())
87
+ .into()
88
+ }
15
89
  }
@@ -5,6 +5,13 @@ use crate::RbExpr;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
8
- self.inner.clone().cat().set_ordering(ordering.0).into()
8
+ self.inner
9
+ .clone()
10
+ .cast(DataType::Categorical(None, ordering.0))
11
+ .into()
12
+ }
13
+
14
+ pub fn cat_get_categories(&self) -> Self {
15
+ self.inner.clone().cat().get_categories().into()
9
16
  }
10
17
  }
@@ -61,6 +61,14 @@ impl RbExpr {
61
61
  self.inner.clone().dt().month_end().into()
62
62
  }
63
63
 
64
+ pub fn dt_base_utc_offset(&self) -> Self {
65
+ self.inner.clone().dt().base_utc_offset().into()
66
+ }
67
+
68
+ pub fn dt_dst_offset(&self) -> Self {
69
+ self.inner.clone().dt().dst_offset().into()
70
+ }
71
+
64
72
  pub fn dt_round(&self, every: String, offset: String) -> Self {
65
73
  self.inner.clone().dt().round(&every, &offset).into()
66
74
  }
@@ -149,73 +157,31 @@ impl RbExpr {
149
157
  self.inner.clone().dt().timestamp(tu.0).into()
150
158
  }
151
159
 
152
- pub fn duration_days(&self) -> Self {
153
- self.inner
154
- .clone()
155
- .map(
156
- |s| Ok(Some(s.duration()?.days().into_series())),
157
- GetOutput::from_type(DataType::Int64),
158
- )
159
- .into()
160
+ pub fn dt_total_days(&self) -> Self {
161
+ self.inner.clone().dt().total_days().into()
160
162
  }
161
163
 
162
- pub fn duration_hours(&self) -> Self {
163
- self.inner
164
- .clone()
165
- .map(
166
- |s| Ok(Some(s.duration()?.hours().into_series())),
167
- GetOutput::from_type(DataType::Int64),
168
- )
169
- .into()
164
+ pub fn dt_total_hours(&self) -> Self {
165
+ self.inner.clone().dt().total_hours().into()
170
166
  }
171
167
 
172
- pub fn duration_minutes(&self) -> Self {
173
- self.inner
174
- .clone()
175
- .map(
176
- |s| Ok(Some(s.duration()?.minutes().into_series())),
177
- GetOutput::from_type(DataType::Int64),
178
- )
179
- .into()
168
+ pub fn dt_total_minutes(&self) -> Self {
169
+ self.inner.clone().dt().total_minutes().into()
180
170
  }
181
171
 
182
- pub fn duration_seconds(&self) -> Self {
183
- self.inner
184
- .clone()
185
- .map(
186
- |s| Ok(Some(s.duration()?.seconds().into_series())),
187
- GetOutput::from_type(DataType::Int64),
188
- )
189
- .into()
172
+ pub fn dt_total_seconds(&self) -> Self {
173
+ self.inner.clone().dt().total_seconds().into()
190
174
  }
191
175
 
192
- pub fn duration_milliseconds(&self) -> Self {
193
- self.inner
194
- .clone()
195
- .map(
196
- |s| Ok(Some(s.duration()?.milliseconds().into_series())),
197
- GetOutput::from_type(DataType::Int64),
198
- )
199
- .into()
176
+ pub fn dt_total_milliseconds(&self) -> Self {
177
+ self.inner.clone().dt().total_milliseconds().into()
200
178
  }
201
179
 
202
- pub fn duration_microseconds(&self) -> Self {
203
- self.inner
204
- .clone()
205
- .map(
206
- |s| Ok(Some(s.duration()?.microseconds().into_series())),
207
- GetOutput::from_type(DataType::Int64),
208
- )
209
- .into()
180
+ pub fn dt_total_microseconds(&self) -> Self {
181
+ self.inner.clone().dt().total_microseconds().into()
210
182
  }
211
183
 
212
- pub fn duration_nanoseconds(&self) -> Self {
213
- self.inner
214
- .clone()
215
- .map(
216
- |s| Ok(Some(s.duration()?.nanoseconds().into_series())),
217
- GetOutput::from_type(DataType::Int64),
218
- )
219
- .into()
184
+ pub fn dt_total_nanoseconds(&self) -> Self {
185
+ self.inner.clone().dt().total_nanoseconds().into()
220
186
  }
221
187
  }