polars-df 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ use crate::conversion::*;
10
10
  use crate::lazy::apply::*;
11
11
  use crate::lazy::utils::rb_exprs_to_exprs;
12
12
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
13
+ use crate::{RbPolarsErr, RbResult, RbSeries};
14
14
 
15
15
  #[magnus::wrap(class = "Polars::RbExpr")]
16
16
  #[derive(Clone)]
@@ -715,6 +715,18 @@ impl RbExpr {
715
715
  .into()
716
716
  }
717
717
 
718
+ pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
719
+ self.inner.clone().binary().contains_literal(lit).into()
720
+ }
721
+
722
+ pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
723
+ self.inner.clone().binary().ends_with(sub).into()
724
+ }
725
+
726
+ pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
727
+ self.inner.clone().binary().starts_with(sub).into()
728
+ }
729
+
718
730
  pub fn str_hex_encode(&self) -> Self {
719
731
  self.clone()
720
732
  .inner
@@ -763,6 +775,58 @@ impl RbExpr {
763
775
  .into()
764
776
  }
765
777
 
778
+ pub fn binary_hex_encode(&self) -> Self {
779
+ self.clone()
780
+ .inner
781
+ .map(
782
+ move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
783
+ GetOutput::same_type(),
784
+ )
785
+ .with_fmt("binary.hex_encode")
786
+ .into()
787
+ }
788
+
789
+ pub fn binary_hex_decode(&self, strict: bool) -> Self {
790
+ self.clone()
791
+ .inner
792
+ .map(
793
+ move |s| {
794
+ s.binary()?
795
+ .hex_decode(strict)
796
+ .map(|s| Some(s.into_series()))
797
+ },
798
+ GetOutput::same_type(),
799
+ )
800
+ .with_fmt("binary.hex_decode")
801
+ .into()
802
+ }
803
+
804
+ pub fn binary_base64_encode(&self) -> Self {
805
+ self.clone()
806
+ .inner
807
+ .map(
808
+ move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
809
+ GetOutput::same_type(),
810
+ )
811
+ .with_fmt("binary.base64_encode")
812
+ .into()
813
+ }
814
+
815
+ pub fn binary_base64_decode(&self, strict: bool) -> Self {
816
+ self.clone()
817
+ .inner
818
+ .map(
819
+ move |s| {
820
+ s.binary()?
821
+ .base64_decode(strict)
822
+ .map(|s| Some(s.into_series()))
823
+ },
824
+ GetOutput::same_type(),
825
+ )
826
+ .with_fmt("binary.base64_decode")
827
+ .into()
828
+ }
829
+
766
830
  pub fn str_json_path_match(&self, pat: String) -> Self {
767
831
  let function = move |s: Series| {
768
832
  let ca = s.utf8()?;
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
1718
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
1719
  }
1656
1720
 
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1721
+ pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
1722
  let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1723
+ Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
1660
1724
  }
1661
1725
 
1662
1726
  #[magnus::wrap(class = "Polars::RbWhen")]
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1706
1770
 
1707
1771
  pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
1772
  let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1773
+ let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
1774
+ Ok(expr.into())
1710
1775
  }
@@ -73,7 +73,7 @@ fn init() -> RbResult<()> {
73
73
  let class = module.define_class("RbDataFrame", Default::default())?;
74
74
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
75
75
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
76
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
77
77
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
78
78
  class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
79
79
  class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
@@ -151,7 +151,6 @@ fn init() -> RbResult<()> {
151
151
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
152
152
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
153
153
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
154
- class.define_method("unique", method!(RbDataFrame::unique, 3))?;
155
154
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
156
155
  class.define_method("max", method!(RbDataFrame::max, 0))?;
157
156
  class.define_method("min", method!(RbDataFrame::min, 0))?;
@@ -304,10 +303,23 @@ fn init() -> RbResult<()> {
304
303
  class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
305
304
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
306
305
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
306
+ class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
307
+ class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
308
+ class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
307
309
  class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
308
310
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
309
311
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
310
312
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
313
+ class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
314
+ class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
315
+ class.define_method(
316
+ "binary_base64_encode",
317
+ method!(RbExpr::binary_base64_encode, 0),
318
+ )?;
319
+ class.define_method(
320
+ "binary_base64_decode",
321
+ method!(RbExpr::binary_base64_decode, 1),
322
+ )?;
311
323
  class.define_method(
312
324
  "str_json_path_match",
313
325
  method!(RbExpr::str_json_path_match, 1),
@@ -473,7 +485,7 @@ fn init() -> RbResult<()> {
473
485
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
474
486
  )?;
475
487
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
476
- class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
488
+ class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
477
489
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
478
490
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
479
491
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -487,7 +499,7 @@ fn init() -> RbResult<()> {
487
499
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
488
500
  class.define_singleton_method(
489
501
  "new_from_parquet",
490
- function!(RbLazyFrame::new_from_parquet, 7),
502
+ function!(RbLazyFrame::new_from_parquet, 8),
491
503
  )?;
492
504
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
493
505
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -504,6 +516,7 @@ fn init() -> RbResult<()> {
504
516
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
505
517
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
506
518
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
519
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
507
520
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
508
521
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
509
522
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -532,7 +545,7 @@ fn init() -> RbResult<()> {
532
545
  class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
533
546
  class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
534
547
  class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
535
- class.define_method("melt", method!(RbLazyFrame::melt, 4))?;
548
+ class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
536
549
  class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
537
550
  class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
538
551
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
@@ -560,6 +573,7 @@ fn init() -> RbResult<()> {
560
573
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
561
574
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
562
575
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
576
+ class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
563
577
  class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
564
578
  class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
565
579
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
@@ -125,6 +125,12 @@ impl RbSeries {
125
125
  RbSeries::new(s)
126
126
  }
127
127
 
128
+ pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
129
+ let mut s = val.0.into_series();
130
+ s.rename(&name);
131
+ RbSeries::new(s)
132
+ }
133
+
128
134
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
129
135
  let val = val
130
136
  .each()
@@ -504,7 +510,6 @@ impl RbSeries {
504
510
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
505
511
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
506
512
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
507
- DataType::Decimal128(_) => todo!(),
508
513
  DataType::Categorical(_) => {
509
514
  RArray::from_iter(series.categorical().unwrap().iter_str())
510
515
  }
@@ -526,6 +531,13 @@ impl RbSeries {
526
531
  let ca = series.utf8().unwrap();
527
532
  return RArray::from_iter(ca);
528
533
  }
534
+ DataType::Binary => {
535
+ let a = RArray::with_capacity(series.len());
536
+ for v in series.iter() {
537
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
538
+ }
539
+ return a;
540
+ }
529
541
  DataType::Null | DataType::Unknown => {
530
542
  panic!("to_a not implemented for null/unknown")
531
543
  }
@@ -30,7 +30,7 @@ module Polars
30
30
  new_columns: nil
31
31
  )
32
32
  if Utils.pathlike?(file)
33
- path = Utils.format_path(file)
33
+ path = Utils.normalise_filepath(file)
34
34
  end
35
35
 
36
36
  dtype_list = nil
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Namespace for binary related expressions.
3
+ class BinaryExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Check if binaries in Series contain a binary substring.
13
+ #
14
+ # @param lit [String]
15
+ # The binary substring to look for
16
+ #
17
+ # @return [Expr]
18
+ def contains(lit)
19
+ Utils.wrap_expr(_rbexpr.binary_contains(lit))
20
+ end
21
+
22
+ # Check if string values end with a binary substring.
23
+ #
24
+ # @param sub [String]
25
+ # Suffix substring.
26
+ #
27
+ # @return [Expr]
28
+ def ends_with(sub)
29
+ Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
30
+ end
31
+
32
+ # Check if values start with a binary substring.
33
+ #
34
+ # @param sub [String]
35
+ # Prefix substring.
36
+ #
37
+ # @return [Expr]
38
+ def starts_with(sub)
39
+ Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
40
+ end
41
+
42
+ # Decode a value using the provided encoding.
43
+ #
44
+ # @param encoding ["hex", "base64"]
45
+ # The encoding to use.
46
+ # @param strict [Boolean]
47
+ # Raise an error if the underlying value cannot be decoded,
48
+ # otherwise mask out with a null value.
49
+ #
50
+ # @return [Expr]
51
+ def decode(encoding, strict: true)
52
+ if encoding == "hex"
53
+ Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
54
+ elsif encoding == "base64"
55
+ Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
56
+ else
57
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
58
+ end
59
+ end
60
+
61
+ # Encode a value using the provided encoding.
62
+ #
63
+ # @param encoding ["hex", "base64"]
64
+ # The encoding to use.
65
+ #
66
+ # @return [Expr]
67
+ def encode(encoding)
68
+ if encoding == "hex"
69
+ Utils.wrap_expr(_rbexpr.binary_hex_encode)
70
+ elsif encoding == "base64"
71
+ Utils.wrap_expr(_rbexpr.binary_base64_encode)
72
+ else
73
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,66 @@
1
+ module Polars
2
+ # Series.bin namespace.
3
+ class BinaryNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "bin"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Check if binaries in Series contain a binary substring.
14
+ #
15
+ # @param lit [String]
16
+ # The binary substring to look for
17
+ #
18
+ # @return [Series]
19
+ def contains(lit)
20
+ super
21
+ end
22
+
23
+ # Check if string values end with a binary substring.
24
+ #
25
+ # @param sub [String]
26
+ # Suffix substring.
27
+ #
28
+ # @return [Series]
29
+ def ends_with(sub)
30
+ super
31
+ end
32
+
33
+ # Check if values start with a binary substring.
34
+ #
35
+ # @param sub [String]
36
+ # Prefix substring.
37
+ #
38
+ # @return [Series]
39
+ def starts_with(sub)
40
+ super
41
+ end
42
+
43
+ # Decode a value using the provided encoding.
44
+ #
45
+ # @param encoding ["hex", "base64"]
46
+ # The encoding to use.
47
+ # @param strict [Boolean]
48
+ # Raise an error if the underlying value cannot be decoded,
49
+ # otherwise mask out with a null value.
50
+ #
51
+ # @return [Series]
52
+ def decode(encoding, strict: true)
53
+ super
54
+ end
55
+
56
+ # Encode a value using the provided encoding.
57
+ #
58
+ # @param encoding ["hex", "base64"]
59
+ # The encoding to use.
60
+ #
61
+ # @return [Series]
62
+ def encode(encoding)
63
+ super
64
+ end
65
+ end
66
+ end
@@ -97,7 +97,7 @@ module Polars
97
97
  eol_char: "\n"
98
98
  )
99
99
  if Utils.pathlike?(file)
100
- path = Utils.format_path(file)
100
+ path = Utils.normalise_filepath(file)
101
101
  else
102
102
  path = nil
103
103
  # if defined?(StringIO) && file.is_a?(StringIO)
@@ -196,32 +196,56 @@ module Polars
196
196
 
197
197
  # @private
198
198
  def self._read_parquet(
199
- file,
199
+ source,
200
200
  columns: nil,
201
201
  n_rows: nil,
202
202
  parallel: "auto",
203
203
  row_count_name: nil,
204
204
  row_count_offset: 0,
205
- low_memory: false
205
+ low_memory: false,
206
+ use_statistics: true,
207
+ rechunk: true
206
208
  )
207
- if Utils.pathlike?(file)
208
- file = Utils.format_path(file)
209
+ if Utils.pathlike?(source)
210
+ source = Utils.normalise_filepath(source)
211
+ end
212
+ if columns.is_a?(String)
213
+ columns = [columns]
209
214
  end
210
215
 
211
- if file.is_a?(String) && file.include?("*")
212
- raise Todo
216
+ if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
217
+ scan =
218
+ Polars.scan_parquet(
219
+ source,
220
+ n_rows: n_rows,
221
+ rechunk: true,
222
+ parallel: parallel,
223
+ row_count_name: row_count_name,
224
+ row_count_offset: row_count_offset,
225
+ low_memory: low_memory
226
+ )
227
+
228
+ if columns.nil?
229
+ return self._from_rbdf(scan.collect._df)
230
+ elsif Utils.is_str_sequence(columns, allow_str: false)
231
+ return self._from_rbdf(scan.select(columns).collect._df)
232
+ else
233
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
234
+ end
213
235
  end
214
236
 
215
237
  projection, columns = Utils.handle_projection_columns(columns)
216
238
  _from_rbdf(
217
239
  RbDataFrame.read_parquet(
218
- file,
240
+ source,
219
241
  columns,
220
242
  projection,
221
243
  n_rows,
222
244
  parallel,
223
245
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
224
- low_memory
246
+ low_memory,
247
+ use_statistics,
248
+ rechunk
225
249
  )
226
250
  )
227
251
  end
@@ -229,7 +253,7 @@ module Polars
229
253
  # @private
230
254
  def self._read_avro(file, columns: nil, n_rows: nil)
231
255
  if Utils.pathlike?(file)
232
- file = Utils.format_path(file)
256
+ file = Utils.normalise_filepath(file)
233
257
  end
234
258
  projection, columns = Utils.handle_projection_columns(columns)
235
259
  _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
@@ -246,7 +270,7 @@ module Polars
246
270
  memory_map: true
247
271
  )
248
272
  if Utils.pathlike?(file)
249
- file = Utils.format_path(file)
273
+ file = Utils.normalise_filepath(file)
250
274
  end
251
275
  if columns.is_a?(String)
252
276
  columns = [columns]
@@ -272,7 +296,7 @@ module Polars
272
296
  # @private
273
297
  def self._read_json(file)
274
298
  if Utils.pathlike?(file)
275
- file = Utils.format_path(file)
299
+ file = Utils.normalise_filepath(file)
276
300
  end
277
301
 
278
302
  _from_rbdf(RbDataFrame.read_json(file))
@@ -281,7 +305,7 @@ module Polars
281
305
  # @private
282
306
  def self._read_ndjson(file)
283
307
  if Utils.pathlike?(file)
284
- file = Utils.format_path(file)
308
+ file = Utils.normalise_filepath(file)
285
309
  end
286
310
 
287
311
  _from_rbdf(RbDataFrame.read_ndjson(file))
@@ -774,7 +798,7 @@ module Polars
774
798
  row_oriented: false
775
799
  )
776
800
  if Utils.pathlike?(file)
777
- file = Utils.format_path(file)
801
+ file = Utils.normalise_filepath(file)
778
802
  end
779
803
 
780
804
  _df.write_json(file, pretty, row_oriented)
@@ -789,7 +813,7 @@ module Polars
789
813
  # @return [nil]
790
814
  def write_ndjson(file)
791
815
  if Utils.pathlike?(file)
792
- file = Utils.format_path(file)
816
+ file = Utils.normalise_filepath(file)
793
817
  end
794
818
 
795
819
  _df.write_ndjson(file)
@@ -879,7 +903,7 @@ module Polars
879
903
  end
880
904
 
881
905
  if Utils.pathlike?(file)
882
- file = Utils.format_path(file)
906
+ file = Utils.normalise_filepath(file)
883
907
  end
884
908
 
885
909
  _df.write_csv(
@@ -917,7 +941,7 @@ module Polars
917
941
  compression = "uncompressed"
918
942
  end
919
943
  if Utils.pathlike?(file)
920
- file = Utils.format_path(file)
944
+ file = Utils.normalise_filepath(file)
921
945
  end
922
946
 
923
947
  _df.write_avro(file, compression)
@@ -936,7 +960,7 @@ module Polars
936
960
  compression = "uncompressed"
937
961
  end
938
962
  if Utils.pathlike?(file)
939
- file = Utils.format_path(file)
963
+ file = Utils.normalise_filepath(file)
940
964
  end
941
965
 
942
966
  _df.write_ipc(file, compression)
@@ -978,7 +1002,7 @@ module Polars
978
1002
  compression = "uncompressed"
979
1003
  end
980
1004
  if Utils.pathlike?(file)
981
- file = Utils.format_path(file)
1005
+ file = Utils.normalise_filepath(file)
982
1006
  end
983
1007
 
984
1008
  _df.write_parquet(
@@ -3042,24 +3066,28 @@ module Polars
3042
3066
  if aggregate_fn.is_a?(String)
3043
3067
  case aggregate_fn
3044
3068
  when "first"
3045
- aggregate_fn = Polars.element.first
3069
+ aggregate_expr = Polars.element.first._rbexpr
3046
3070
  when "sum"
3047
- aggregate_fn = Polars.element.sum
3071
+ aggregate_expr = Polars.element.sum._rbexpr
3048
3072
  when "max"
3049
- aggregate_fn = Polars.element.max
3073
+ aggregate_expr = Polars.element.max._rbexpr
3050
3074
  when "min"
3051
- aggregate_fn = Polars.element.min
3075
+ aggregate_expr = Polars.element.min._rbexpr
3052
3076
  when "mean"
3053
- aggregate_fn = Polars.element.mean
3077
+ aggregate_expr = Polars.element.mean._rbexpr
3054
3078
  when "median"
3055
- aggregate_fn = Polars.element.median
3079
+ aggregate_expr = Polars.element.median._rbexpr
3056
3080
  when "last"
3057
- aggregate_fn = Polars.element.last
3081
+ aggregate_expr = Polars.element.last._rbexpr
3058
3082
  when "count"
3059
- aggregate_fn = Polars.count
3083
+ aggregate_expr = Polars.count._rbexpr
3060
3084
  else
3061
3085
  raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
3062
3086
  end
3087
+ elsif aggregate_fn.nil?
3088
+ aggregate_expr = nil
3089
+ else
3090
+ aggregate_expr = aggregate_function._rbexpr
3063
3091
  end
3064
3092
 
3065
3093
  _from_rbdf(
@@ -3067,9 +3095,9 @@ module Polars
3067
3095
  values,
3068
3096
  index,
3069
3097
  columns,
3070
- aggregate_fn._rbexpr,
3071
3098
  maintain_order,
3072
3099
  sort_columns,
3100
+ aggregate_expr,
3073
3101
  separator
3074
3102
  )
3075
3103
  )
@@ -3174,7 +3202,7 @@ module Polars
3174
3202
  # # │ B ┆ 1 │
3175
3203
  # # │ C ┆ 2 │
3176
3204
  # # │ D ┆ 3 │
3177
- # # │ ... ...
3205
+ # # │
3178
3206
  # # │ F ┆ 5 │
3179
3207
  # # │ G ┆ 6 │
3180
3208
  # # │ H ┆ 7 │
@@ -4053,15 +4081,12 @@ module Polars
4053
4081
  # # │ 5 ┆ 3.0 ┆ true │
4054
4082
  # # └─────┴─────┴───────┘
4055
4083
  def unique(maintain_order: true, subset: nil, keep: "first")
4056
- if !subset.nil?
4057
- if subset.is_a?(String)
4058
- subset = [subset]
4059
- elsif !subset.is_a?(Array)
4060
- subset = subset.to_a
4061
- end
4062
- end
4063
-
4064
- _from_rbdf(_df.unique(maintain_order, subset, keep))
4084
+ self._from_rbdf(
4085
+ lazy
4086
+ .unique(maintain_order: maintain_order, subset: subset, keep: keep)
4087
+ .collect(no_optimization: true)
4088
+ ._df
4089
+ )
4065
4090
  end
4066
4091
 
4067
4092
  # Return the number of unique rows, or the number of unique row-subsets.
@@ -1130,7 +1130,7 @@ module Polars
1130
1130
  # ]
1131
1131
  # )
1132
1132
  # # =>
1133
- # # shape: (1001, 2)
1133
+ # # shape: (1_001, 2)
1134
1134
  # # ┌─────────────────────────┬───────────────────┐
1135
1135
  # # │ date ┆ milliseconds_diff │
1136
1136
  # # │ --- ┆ --- │
@@ -1140,7 +1140,7 @@ module Polars
1140
1140
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1141
1141
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1142
1142
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1143
- # # │ ... ...
1143
+ # # │
1144
1144
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1145
1145
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1146
1146
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
@@ -1169,7 +1169,7 @@ module Polars
1169
1169
  # ]
1170
1170
  # )
1171
1171
  # # =>
1172
- # # shape: (1001, 2)
1172
+ # # shape: (1_001, 2)
1173
1173
  # # ┌─────────────────────────┬───────────────────┐
1174
1174
  # # │ date ┆ microseconds_diff │
1175
1175
  # # │ --- ┆ --- │
@@ -1179,7 +1179,7 @@ module Polars
1179
1179
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1180
1180
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1181
1181
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1182
- # # │ ... ...
1182
+ # # │
1183
1183
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1184
1184
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1185
1185
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
@@ -1208,7 +1208,7 @@ module Polars
1208
1208
  # ]
1209
1209
  # )
1210
1210
  # # =>
1211
- # # shape: (1001, 2)
1211
+ # # shape: (1_001, 2)
1212
1212
  # # ┌─────────────────────────┬──────────────────┐
1213
1213
  # # │ date ┆ nanoseconds_diff │
1214
1214
  # # │ --- ┆ --- │
@@ -1218,7 +1218,7 @@ module Polars
1218
1218
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1219
1219
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1220
1220
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1221
- # # │ ... ...
1221
+ # # │
1222
1222
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1223
1223
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1224
1224
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │