polars-df 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +41 -0
  3. data/Cargo.lock +353 -237
  4. data/Cargo.toml +0 -3
  5. data/LICENSE.txt +1 -1
  6. data/README.md +2 -2
  7. data/ext/polars/Cargo.toml +17 -6
  8. data/ext/polars/src/batched_csv.rs +6 -7
  9. data/ext/polars/src/conversion/anyvalue.rs +185 -0
  10. data/ext/polars/src/conversion/chunked_array.rs +140 -0
  11. data/ext/polars/src/{conversion.rs → conversion/mod.rs} +268 -347
  12. data/ext/polars/src/dataframe.rs +96 -116
  13. data/ext/polars/src/expr/array.rs +74 -0
  14. data/ext/polars/src/expr/categorical.rs +8 -1
  15. data/ext/polars/src/expr/datetime.rs +22 -56
  16. data/ext/polars/src/expr/general.rs +124 -37
  17. data/ext/polars/src/expr/list.rs +52 -4
  18. data/ext/polars/src/expr/meta.rs +48 -0
  19. data/ext/polars/src/expr/rolling.rs +16 -10
  20. data/ext/polars/src/expr/string.rs +68 -17
  21. data/ext/polars/src/expr/struct.rs +8 -4
  22. data/ext/polars/src/functions/aggregation.rs +6 -0
  23. data/ext/polars/src/functions/lazy.rs +103 -48
  24. data/ext/polars/src/functions/meta.rs +45 -1
  25. data/ext/polars/src/functions/range.rs +5 -10
  26. data/ext/polars/src/functions/string_cache.rs +14 -0
  27. data/ext/polars/src/{lazyframe.rs → lazyframe/mod.rs} +166 -41
  28. data/ext/polars/src/lib.rs +245 -187
  29. data/ext/polars/src/map/dataframe.rs +1 -1
  30. data/ext/polars/src/map/mod.rs +2 -2
  31. data/ext/polars/src/map/series.rs +6 -6
  32. data/ext/polars/src/object.rs +0 -30
  33. data/ext/polars/src/on_startup.rs +32 -0
  34. data/ext/polars/src/series/aggregation.rs +23 -0
  35. data/ext/polars/src/series/construction.rs +1 -1
  36. data/ext/polars/src/series/export.rs +2 -2
  37. data/ext/polars/src/{series.rs → series/mod.rs} +45 -21
  38. data/ext/polars/src/series/{set_at_idx.rs → scatter.rs} +18 -18
  39. data/ext/polars/src/utils.rs +1 -1
  40. data/lib/polars/array_expr.rb +449 -0
  41. data/lib/polars/array_name_space.rb +346 -0
  42. data/lib/polars/cat_expr.rb +24 -0
  43. data/lib/polars/cat_name_space.rb +75 -0
  44. data/lib/polars/config.rb +2 -2
  45. data/lib/polars/data_frame.rb +248 -108
  46. data/lib/polars/data_types.rb +195 -29
  47. data/lib/polars/date_time_expr.rb +41 -24
  48. data/lib/polars/date_time_name_space.rb +12 -12
  49. data/lib/polars/exceptions.rb +12 -1
  50. data/lib/polars/expr.rb +1080 -195
  51. data/lib/polars/functions/aggregation/horizontal.rb +246 -0
  52. data/lib/polars/functions/aggregation/vertical.rb +282 -0
  53. data/lib/polars/functions/as_datatype.rb +248 -0
  54. data/lib/polars/functions/col.rb +47 -0
  55. data/lib/polars/functions/eager.rb +182 -0
  56. data/lib/polars/functions/lazy.rb +1280 -0
  57. data/lib/polars/functions/len.rb +49 -0
  58. data/lib/polars/functions/lit.rb +35 -0
  59. data/lib/polars/functions/random.rb +16 -0
  60. data/lib/polars/functions/range/date_range.rb +103 -0
  61. data/lib/polars/functions/range/int_range.rb +51 -0
  62. data/lib/polars/functions/repeat.rb +144 -0
  63. data/lib/polars/functions/whenthen.rb +27 -0
  64. data/lib/polars/functions.rb +29 -416
  65. data/lib/polars/group_by.rb +3 -3
  66. data/lib/polars/io.rb +21 -28
  67. data/lib/polars/lazy_frame.rb +390 -76
  68. data/lib/polars/list_expr.rb +152 -6
  69. data/lib/polars/list_name_space.rb +102 -0
  70. data/lib/polars/meta_expr.rb +175 -7
  71. data/lib/polars/series.rb +557 -59
  72. data/lib/polars/sql_context.rb +1 -1
  73. data/lib/polars/string_cache.rb +75 -0
  74. data/lib/polars/string_expr.rb +412 -96
  75. data/lib/polars/string_name_space.rb +4 -4
  76. data/lib/polars/struct_expr.rb +1 -1
  77. data/lib/polars/struct_name_space.rb +1 -1
  78. data/lib/polars/testing.rb +507 -0
  79. data/lib/polars/utils.rb +64 -20
  80. data/lib/polars/version.rb +1 -1
  81. data/lib/polars.rb +15 -2
  82. metadata +40 -9
  83. data/lib/polars/lazy_functions.rb +0 -1197
@@ -6,12 +6,13 @@ use polars::frame::row::{rows_to_schema_supertypes, Row};
6
6
  use polars::frame::NullStrategy;
7
7
  use polars::io::avro::AvroCompression;
8
8
  use polars::io::mmap::ReaderBytes;
9
- use polars::io::RowCount;
9
+ use polars::io::RowIndex;
10
10
  use polars::prelude::pivot::{pivot, pivot_stable};
11
11
  use polars::prelude::*;
12
12
  use polars_core::utils::try_get_supertype;
13
13
  use std::cell::RefCell;
14
14
  use std::io::{BufWriter, Cursor};
15
+ use std::num::NonZeroUsize;
15
16
  use std::ops::Deref;
16
17
 
17
18
  use crate::conversion::*;
@@ -45,44 +46,51 @@ impl RbDataFrame {
45
46
  fn finish_from_rows(
46
47
  rows: Vec<Row>,
47
48
  infer_schema_length: Option<usize>,
48
- schema_overwrite: Option<Schema>,
49
+ schema: Option<Schema>,
50
+ schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
49
51
  ) -> RbResult<Self> {
50
- // object builder must be registered.
51
- crate::object::register_object_builder();
52
+ // Object builder must be registered
53
+ crate::on_startup::register_object_builder();
52
54
 
53
- let schema =
55
+ let mut final_schema =
54
56
  rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
55
57
  .map_err(RbPolarsErr::from)?;
56
- // replace inferred nulls with boolean
57
- let fields = schema.iter_fields().map(|mut fld| match fld.data_type() {
58
- DataType::Null => {
59
- // fld.coerce(DataType::Boolean);
60
- fld
61
- }
62
- DataType::Decimal(_, _) => {
63
- fld.coerce(DataType::Decimal(None, None));
64
- fld
58
+
59
+ // Erase scale from inferred decimals.
60
+ for dtype in final_schema.iter_dtypes_mut() {
61
+ if let DataType::Decimal(_, _) = dtype {
62
+ *dtype = DataType::Decimal(None, None)
65
63
  }
66
- _ => fld,
67
- });
68
- let mut schema = Schema::from_iter(fields);
64
+ }
69
65
 
70
- if let Some(schema_overwrite) = schema_overwrite {
71
- for (i, (name, dtype)) in schema_overwrite.into_iter().enumerate() {
72
- if let Some((name_, dtype_)) = schema.get_at_index_mut(i) {
66
+ // Integrate explicit/inferred schema.
67
+ if let Some(schema) = schema {
68
+ for (i, (name, dtype)) in schema.into_iter().enumerate() {
69
+ if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
73
70
  *name_ = name;
74
71
 
75
- // if user sets dtype unknown, we use the inferred datatype
72
+ // If schema dtype is Unknown, overwrite with inferred datatype.
76
73
  if !matches!(dtype, DataType::Unknown) {
77
74
  *dtype_ = dtype;
78
75
  }
79
76
  } else {
80
- schema.with_column(name, dtype);
77
+ final_schema.with_column(name, dtype);
81
78
  }
82
79
  }
83
80
  }
84
81
 
85
- let df = DataFrame::from_rows_and_schema(&rows, &schema).map_err(RbPolarsErr::from)?;
82
+ // Optional per-field overrides; these supersede default/inferred dtypes.
83
+ if let Some(overrides) = schema_overrides_by_idx {
84
+ for (i, dtype) in overrides {
85
+ if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
86
+ if !matches!(dtype, DataType::Unknown) {
87
+ *dtype_ = dtype;
88
+ }
89
+ }
90
+ }
91
+ }
92
+ let df =
93
+ DataFrame::from_rows_and_schema(&rows, &final_schema).map_err(RbPolarsErr::from)?;
86
94
  Ok(df.into())
87
95
  }
88
96
 
@@ -120,21 +128,20 @@ impl RbDataFrame {
120
128
  // TODO fix
121
129
  let overwrite_dtype_slice = Option::<Vec<Wrap<DataType>>>::None; // Option::<Vec<Wrap<DataType>>>::try_convert(arguments[15])?;
122
130
  let low_memory = bool::try_convert(arguments[16])?;
123
- let comment_char = Option::<String>::try_convert(arguments[17])?;
131
+ let comment_prefix = Option::<String>::try_convert(arguments[17])?;
124
132
  let quote_char = Option::<String>::try_convert(arguments[18])?;
125
133
  let null_values = Option::<Wrap<NullValues>>::try_convert(arguments[19])?;
126
134
  let try_parse_dates = bool::try_convert(arguments[20])?;
127
135
  let skip_rows_after_header = usize::try_convert(arguments[21])?;
128
- let row_count = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
136
+ let row_index = Option::<(String, IdxSize)>::try_convert(arguments[22])?;
129
137
  let sample_size = usize::try_convert(arguments[23])?;
130
138
  let eol_char = String::try_convert(arguments[24])?;
131
139
  // end arguments
132
140
 
133
141
  let null_values = null_values.map(|w| w.0);
134
- let comment_char = comment_char.map(|s| s.as_bytes()[0]);
135
142
  let eol_char = eol_char.as_bytes()[0];
136
143
 
137
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
144
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
138
145
 
139
146
  let quote_char = if let Some(s) = quote_char {
140
147
  if s.is_empty() {
@@ -181,13 +188,13 @@ impl RbDataFrame {
181
188
  .with_dtypes(overwrite_dtype.map(Arc::new))
182
189
  .with_dtypes_slice(overwrite_dtype_slice.as_deref())
183
190
  .low_memory(low_memory)
184
- .with_comment_char(comment_char)
191
+ .with_comment_prefix(comment_prefix.as_deref())
185
192
  .with_null_values(null_values)
186
193
  .with_try_parse_dates(try_parse_dates)
187
194
  .with_quote_char(quote_char)
188
195
  .with_end_of_line_char(eol_char)
189
196
  .with_skip_rows_after_header(skip_rows_after_header)
190
- .with_row_count(row_count)
197
+ .with_row_index(row_index)
191
198
  .sample_size(sample_size)
192
199
  .finish()
193
200
  .map_err(RbPolarsErr::from)?;
@@ -201,19 +208,19 @@ impl RbDataFrame {
201
208
  projection: Option<Vec<usize>>,
202
209
  n_rows: Option<usize>,
203
210
  parallel: Wrap<ParallelStrategy>,
204
- row_count: Option<(String, IdxSize)>,
211
+ row_index: Option<(String, IdxSize)>,
205
212
  low_memory: bool,
206
213
  use_statistics: bool,
207
214
  rechunk: bool,
208
215
  ) -> RbResult<Self> {
209
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
216
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
210
217
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
211
218
  let df = ParquetReader::new(mmap_bytes_r)
212
219
  .with_projection(projection)
213
220
  .with_columns(columns)
214
221
  .read_parallel(parallel.0)
215
222
  .with_n_rows(n_rows)
216
- .with_row_count(row_count)
223
+ .with_row_index(row_index)
217
224
  .set_low_memory(low_memory)
218
225
  .use_statistics(use_statistics)
219
226
  .set_rechunk(rechunk)
@@ -227,16 +234,16 @@ impl RbDataFrame {
227
234
  columns: Option<Vec<String>>,
228
235
  projection: Option<Vec<usize>>,
229
236
  n_rows: Option<usize>,
230
- row_count: Option<(String, IdxSize)>,
237
+ row_index: Option<(String, IdxSize)>,
231
238
  memory_map: bool,
232
239
  ) -> RbResult<Self> {
233
- let row_count = row_count.map(|(name, offset)| RowCount { name, offset });
240
+ let row_index = row_index.map(|(name, offset)| RowIndex { name, offset });
234
241
  let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
235
242
  let df = IpcReader::new(mmap_bytes_r)
236
243
  .with_projection(projection)
237
244
  .with_columns(columns)
238
245
  .with_n_rows(n_rows)
239
- .with_row_count(row_count)
246
+ .with_row_index(row_index)
240
247
  .memory_mapped(memory_map)
241
248
  .finish()
242
249
  .map_err(RbPolarsErr::from)?;
@@ -297,12 +304,18 @@ impl RbDataFrame {
297
304
  Ok(df) => Ok(df.into()),
298
305
  // try arrow json reader instead
299
306
  // this is row oriented
300
- Err(_) => {
301
- let out = JsonReader::new(mmap_bytes_r)
302
- .with_json_format(JsonFormat::Json)
303
- .finish()
304
- .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
305
- Ok(out.into())
307
+ Err(e) => {
308
+ let msg = format!("{e}");
309
+ if msg.contains("successful parse invalid data") {
310
+ let e = RbPolarsErr::from(PolarsError::ComputeError(msg.into()));
311
+ Err(e)
312
+ } else {
313
+ let out = JsonReader::new(mmap_bytes_r)
314
+ .with_json_format(JsonFormat::Json)
315
+ .finish()
316
+ .map_err(|e| RbPolarsErr::other(format!("{:?}", e)))?;
317
+ Ok(out.into())
318
+ }
306
319
  }
307
320
  }
308
321
  }
@@ -347,7 +360,7 @@ impl RbDataFrame {
347
360
  pub fn read_rows(
348
361
  rb_rows: RArray,
349
362
  infer_schema_length: Option<usize>,
350
- schema_overwrite: Option<Wrap<Schema>>,
363
+ schema: Option<Wrap<Schema>>,
351
364
  ) -> RbResult<Self> {
352
365
  let mut rows = Vec::with_capacity(rb_rows.len());
353
366
  for v in rb_rows.each() {
@@ -358,30 +371,34 @@ impl RbDataFrame {
358
371
  }
359
372
  rows.push(Row(row));
360
373
  }
361
- Self::finish_from_rows(
362
- rows,
363
- infer_schema_length,
364
- schema_overwrite.map(|wrap| wrap.0),
365
- )
374
+ Self::finish_from_rows(rows, infer_schema_length, schema.map(|wrap| wrap.0), None)
366
375
  }
367
376
 
368
377
  pub fn read_hashes(
369
378
  dicts: Value,
370
379
  infer_schema_length: Option<usize>,
371
- schema_overwrite: Option<Wrap<Schema>>,
380
+ schema: Option<Wrap<Schema>>,
381
+ schema_overrides: Option<Wrap<Schema>>,
372
382
  ) -> RbResult<Self> {
373
- let (rows, mut names) = dicts_to_rows(&dicts, infer_schema_length.unwrap_or(50))?;
383
+ let mut schema_columns = PlIndexSet::new();
384
+ if let Some(s) = &schema {
385
+ schema_columns.extend(s.0.iter_names().map(|n| n.to_string()))
386
+ }
387
+ let (rows, names) = dicts_to_rows(&dicts, infer_schema_length, schema_columns)?;
374
388
 
375
- // ensure the new names are used
376
- if let Some(schema) = &schema_overwrite {
377
- for (new_name, name) in schema.0.iter_names().zip(names.iter_mut()) {
378
- *name = new_name.to_string();
389
+ let mut schema_overrides_by_idx: Vec<(usize, DataType)> = Vec::new();
390
+ if let Some(overrides) = schema_overrides {
391
+ for (idx, name) in names.iter().enumerate() {
392
+ if let Some(dtype) = overrides.0.get(name) {
393
+ schema_overrides_by_idx.push((idx, dtype.clone()));
394
+ }
379
395
  }
380
396
  }
381
397
  let rbdf = Self::finish_from_rows(
382
398
  rows,
383
399
  infer_schema_length,
384
- schema_overwrite.map(|wrap| wrap.0),
400
+ schema.map(|wrap| wrap.0),
401
+ Some(schema_overrides_by_idx),
385
402
  )?;
386
403
 
387
404
  unsafe {
@@ -422,13 +439,14 @@ impl RbDataFrame {
422
439
  include_header: bool,
423
440
  separator: u8,
424
441
  quote_char: u8,
425
- batch_size: usize,
442
+ batch_size: Wrap<NonZeroUsize>,
426
443
  datetime_format: Option<String>,
427
444
  date_format: Option<String>,
428
445
  time_format: Option<String>,
429
446
  float_precision: Option<usize>,
430
447
  null_value: Option<String>,
431
448
  ) -> RbResult<()> {
449
+ let batch_size = batch_size.0;
432
450
  let null = null_value.unwrap_or_default();
433
451
 
434
452
  if let Ok(s) = String::try_convert(rb_f) {
@@ -504,7 +522,7 @@ impl RbDataFrame {
504
522
  .get_columns()
505
523
  .iter()
506
524
  .map(|s| match s.dtype() {
507
- DataType::Object(_) => {
525
+ DataType::Object(_, _) => {
508
526
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
509
527
  obj.unwrap().to_object()
510
528
  }
@@ -523,7 +541,7 @@ impl RbDataFrame {
523
541
  .get_columns()
524
542
  .iter()
525
543
  .map(|s| match s.dtype() {
526
- DataType::Object(_) => {
544
+ DataType::Object(_, _) => {
527
545
  let obj: Option<&ObjectValue> = s.get_object(idx).map(|any| any.into());
528
546
  obj.unwrap().to_object()
529
547
  }
@@ -785,16 +803,15 @@ impl RbDataFrame {
785
803
  .map(|s| RbSeries::new(s.clone()))
786
804
  }
787
805
 
788
- pub fn find_idx_by_name(&self, name: String) -> Option<usize> {
789
- self.df.borrow().find_idx_by_name(&name)
806
+ pub fn get_column_index(&self, name: String) -> Option<usize> {
807
+ self.df.borrow().get_column_index(&name)
790
808
  }
791
809
 
792
- // TODO remove clone
793
- pub fn column(&self, name: String) -> RbResult<RbSeries> {
810
+ pub fn get_column(&self, name: String) -> RbResult<RbSeries> {
794
811
  self.df
795
812
  .borrow()
796
813
  .column(&name)
797
- .map(|v| v.clone().into())
814
+ .map(|s| RbSeries::new(s.clone()))
798
815
  .map_err(RbPolarsErr::from)
799
816
  }
800
817
 
@@ -828,18 +845,18 @@ impl RbDataFrame {
828
845
  Ok(())
829
846
  }
830
847
 
831
- pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
848
+ pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
832
849
  self.df
833
850
  .borrow_mut()
834
- .replace_at_idx(index, new_col.series.borrow().clone())
851
+ .replace_column(index, new_col.series.borrow().clone())
835
852
  .map_err(RbPolarsErr::from)?;
836
853
  Ok(())
837
854
  }
838
855
 
839
- pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
856
+ pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
840
857
  self.df
841
858
  .borrow_mut()
842
- .insert_at_idx(index, new_col.series.borrow().clone())
859
+ .insert_column(index, new_col.series.borrow().clone())
843
860
  .map_err(RbPolarsErr::from)?;
844
861
  Ok(())
845
862
  }
@@ -874,19 +891,19 @@ impl RbDataFrame {
874
891
  Ok(mask.into_series().into())
875
892
  }
876
893
 
877
- pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
894
+ pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool {
878
895
  if null_equal {
879
- self.df.borrow().frame_equal_missing(&other.df.borrow())
896
+ self.df.borrow().equals_missing(&other.df.borrow())
880
897
  } else {
881
- self.df.borrow().frame_equal(&other.df.borrow())
898
+ self.df.borrow().equals(&other.df.borrow())
882
899
  }
883
900
  }
884
901
 
885
- pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
902
+ pub fn with_row_index(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
886
903
  let df = self
887
904
  .df
888
905
  .borrow()
889
- .with_row_count(&name, offset)
906
+ .with_row_index(&name, offset)
890
907
  .map_err(RbPolarsErr::from)?;
891
908
  Ok(df.into())
892
909
  }
@@ -917,9 +934,9 @@ impl RbDataFrame {
917
934
  #[allow(clippy::too_many_arguments)]
918
935
  pub fn pivot_expr(
919
936
  &self,
920
- values: Vec<String>,
921
937
  index: Vec<String>,
922
938
  columns: Vec<String>,
939
+ values: Option<Vec<String>>,
923
940
  maintain_order: bool,
924
941
  sort_columns: bool,
925
942
  aggregate_expr: Option<&RbExpr>,
@@ -932,9 +949,9 @@ impl RbDataFrame {
932
949
  let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone());
933
950
  let df = fun(
934
951
  &self.df.borrow(),
935
- values,
936
952
  index,
937
953
  columns,
954
+ values,
938
955
  sort_columns,
939
956
  agg_expr,
940
957
  separator.as_deref(),
@@ -966,34 +983,6 @@ impl RbDataFrame {
966
983
  self.df.borrow().clone().lazy().into()
967
984
  }
968
985
 
969
- pub fn max(&self) -> Self {
970
- self.df.borrow().max().into()
971
- }
972
-
973
- pub fn min(&self) -> Self {
974
- self.df.borrow().min().into()
975
- }
976
-
977
- pub fn sum(&self) -> Self {
978
- self.df.borrow().sum().into()
979
- }
980
-
981
- pub fn mean(&self) -> Self {
982
- self.df.borrow().mean().into()
983
- }
984
-
985
- pub fn std(&self, ddof: u8) -> Self {
986
- self.df.borrow().std(ddof).into()
987
- }
988
-
989
- pub fn var(&self, ddof: u8) -> Self {
990
- self.df.borrow().var(ddof).into()
991
- }
992
-
993
- pub fn median(&self) -> Self {
994
- self.df.borrow().median().into()
995
- }
996
-
997
986
  pub fn max_horizontal(&self) -> RbResult<Option<RbSeries>> {
998
987
  let s = self
999
988
  .df
@@ -1040,19 +1029,6 @@ impl RbDataFrame {
1040
1029
  Ok(s.map(|s| s.into()))
1041
1030
  }
1042
1031
 
1043
- pub fn quantile(
1044
- &self,
1045
- quantile: f64,
1046
- interpolation: Wrap<QuantileInterpolOptions>,
1047
- ) -> RbResult<Self> {
1048
- let df = self
1049
- .df
1050
- .borrow()
1051
- .quantile(quantile, interpolation.0)
1052
- .map_err(RbPolarsErr::from)?;
1053
- Ok(df.into())
1054
- }
1055
-
1056
1032
  pub fn to_dummies(
1057
1033
  &self,
1058
1034
  columns: Option<Vec<String>>,
@@ -1124,7 +1100,7 @@ impl RbDataFrame {
1124
1100
  .into_datetime(tu, tz)
1125
1101
  .into_series()
1126
1102
  }
1127
- Some(DataType::Utf8) => {
1103
+ Some(DataType::String) => {
1128
1104
  apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series()
1129
1105
  }
1130
1106
  _ => return apply_lambda_unknown(df, lambda, inference_size),
@@ -1157,7 +1133,7 @@ impl RbDataFrame {
1157
1133
  };
1158
1134
  Ok(self
1159
1135
  .df
1160
- .borrow()
1136
+ .borrow_mut()
1161
1137
  .transpose(keep_names_as.as_deref(), new_col_names)
1162
1138
  .map_err(RbPolarsErr::from)?
1163
1139
  .into())
@@ -1199,4 +1175,8 @@ impl RbDataFrame {
1199
1175
  let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
1200
1176
  Ok(df.into())
1201
1177
  }
1178
+
1179
+ pub fn clear(&self) -> Self {
1180
+ self.df.borrow().clear().into()
1181
+ }
1202
1182
  }
@@ -1,3 +1,5 @@
1
+ use polars::prelude::*;
2
+
1
3
  use crate::RbExpr;
2
4
 
3
5
  impl RbExpr {
@@ -12,4 +14,76 @@ impl RbExpr {
12
14
  pub fn array_sum(&self) -> Self {
13
15
  self.inner.clone().arr().sum().into()
14
16
  }
17
+
18
+ pub fn arr_unique(&self, maintain_order: bool) -> Self {
19
+ if maintain_order {
20
+ self.inner.clone().arr().unique_stable().into()
21
+ } else {
22
+ self.inner.clone().arr().unique().into()
23
+ }
24
+ }
25
+
26
+ pub fn arr_to_list(&self) -> Self {
27
+ self.inner.clone().arr().to_list().into()
28
+ }
29
+
30
+ pub fn arr_all(&self) -> Self {
31
+ self.inner.clone().arr().all().into()
32
+ }
33
+
34
+ pub fn arr_any(&self) -> Self {
35
+ self.inner.clone().arr().any().into()
36
+ }
37
+
38
+ pub fn arr_sort(&self, descending: bool, nulls_last: bool) -> Self {
39
+ self.inner
40
+ .clone()
41
+ .arr()
42
+ .sort(SortOptions {
43
+ descending,
44
+ nulls_last,
45
+ ..Default::default()
46
+ })
47
+ .into()
48
+ }
49
+
50
+ pub fn arr_reverse(&self) -> Self {
51
+ self.inner.clone().arr().reverse().into()
52
+ }
53
+
54
+ pub fn arr_arg_min(&self) -> Self {
55
+ self.inner.clone().arr().arg_min().into()
56
+ }
57
+
58
+ pub fn arr_arg_max(&self) -> Self {
59
+ self.inner.clone().arr().arg_max().into()
60
+ }
61
+
62
+ pub fn arr_get(&self, index: &RbExpr) -> Self {
63
+ self.inner.clone().arr().get(index.inner.clone()).into()
64
+ }
65
+
66
+ pub fn arr_join(&self, separator: &RbExpr, ignore_nulls: bool) -> Self {
67
+ self.inner
68
+ .clone()
69
+ .arr()
70
+ .join(separator.inner.clone(), ignore_nulls)
71
+ .into()
72
+ }
73
+
74
+ pub fn arr_contains(&self, other: &RbExpr) -> Self {
75
+ self.inner
76
+ .clone()
77
+ .arr()
78
+ .contains(other.inner.clone())
79
+ .into()
80
+ }
81
+
82
+ pub fn arr_count_matches(&self, expr: &RbExpr) -> Self {
83
+ self.inner
84
+ .clone()
85
+ .arr()
86
+ .count_matches(expr.inner.clone())
87
+ .into()
88
+ }
15
89
  }
@@ -5,6 +5,13 @@ use crate::RbExpr;
5
5
 
6
6
  impl RbExpr {
7
7
  pub fn cat_set_ordering(&self, ordering: Wrap<CategoricalOrdering>) -> Self {
8
- self.inner.clone().cat().set_ordering(ordering.0).into()
8
+ self.inner
9
+ .clone()
10
+ .cast(DataType::Categorical(None, ordering.0))
11
+ .into()
12
+ }
13
+
14
+ pub fn cat_get_categories(&self) -> Self {
15
+ self.inner.clone().cat().get_categories().into()
9
16
  }
10
17
  }
@@ -61,6 +61,14 @@ impl RbExpr {
61
61
  self.inner.clone().dt().month_end().into()
62
62
  }
63
63
 
64
+ pub fn dt_base_utc_offset(&self) -> Self {
65
+ self.inner.clone().dt().base_utc_offset().into()
66
+ }
67
+
68
+ pub fn dt_dst_offset(&self) -> Self {
69
+ self.inner.clone().dt().dst_offset().into()
70
+ }
71
+
64
72
  pub fn dt_round(&self, every: String, offset: String) -> Self {
65
73
  self.inner.clone().dt().round(&every, &offset).into()
66
74
  }
@@ -149,73 +157,31 @@ impl RbExpr {
149
157
  self.inner.clone().dt().timestamp(tu.0).into()
150
158
  }
151
159
 
152
- pub fn duration_days(&self) -> Self {
153
- self.inner
154
- .clone()
155
- .map(
156
- |s| Ok(Some(s.duration()?.days().into_series())),
157
- GetOutput::from_type(DataType::Int64),
158
- )
159
- .into()
160
+ pub fn dt_total_days(&self) -> Self {
161
+ self.inner.clone().dt().total_days().into()
160
162
  }
161
163
 
162
- pub fn duration_hours(&self) -> Self {
163
- self.inner
164
- .clone()
165
- .map(
166
- |s| Ok(Some(s.duration()?.hours().into_series())),
167
- GetOutput::from_type(DataType::Int64),
168
- )
169
- .into()
164
+ pub fn dt_total_hours(&self) -> Self {
165
+ self.inner.clone().dt().total_hours().into()
170
166
  }
171
167
 
172
- pub fn duration_minutes(&self) -> Self {
173
- self.inner
174
- .clone()
175
- .map(
176
- |s| Ok(Some(s.duration()?.minutes().into_series())),
177
- GetOutput::from_type(DataType::Int64),
178
- )
179
- .into()
168
+ pub fn dt_total_minutes(&self) -> Self {
169
+ self.inner.clone().dt().total_minutes().into()
180
170
  }
181
171
 
182
- pub fn duration_seconds(&self) -> Self {
183
- self.inner
184
- .clone()
185
- .map(
186
- |s| Ok(Some(s.duration()?.seconds().into_series())),
187
- GetOutput::from_type(DataType::Int64),
188
- )
189
- .into()
172
+ pub fn dt_total_seconds(&self) -> Self {
173
+ self.inner.clone().dt().total_seconds().into()
190
174
  }
191
175
 
192
- pub fn duration_milliseconds(&self) -> Self {
193
- self.inner
194
- .clone()
195
- .map(
196
- |s| Ok(Some(s.duration()?.milliseconds().into_series())),
197
- GetOutput::from_type(DataType::Int64),
198
- )
199
- .into()
176
+ pub fn dt_total_milliseconds(&self) -> Self {
177
+ self.inner.clone().dt().total_milliseconds().into()
200
178
  }
201
179
 
202
- pub fn duration_microseconds(&self) -> Self {
203
- self.inner
204
- .clone()
205
- .map(
206
- |s| Ok(Some(s.duration()?.microseconds().into_series())),
207
- GetOutput::from_type(DataType::Int64),
208
- )
209
- .into()
180
+ pub fn dt_total_microseconds(&self) -> Self {
181
+ self.inner.clone().dt().total_microseconds().into()
210
182
  }
211
183
 
212
- pub fn duration_nanoseconds(&self) -> Self {
213
- self.inner
214
- .clone()
215
- .map(
216
- |s| Ok(Some(s.duration()?.nanoseconds().into_series())),
217
- GetOutput::from_type(DataType::Int64),
218
- )
219
- .into()
184
+ pub fn dt_total_nanoseconds(&self) -> Self {
185
+ self.inner.clone().dt().total_nanoseconds().into()
220
186
  }
221
187
  }