polars-df 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,7 @@ use crate::conversion::*;
10
10
  use crate::lazy::apply::*;
11
11
  use crate::lazy::utils::rb_exprs_to_exprs;
12
12
  use crate::utils::reinterpret;
13
- use crate::{RbResult, RbSeries};
13
+ use crate::{RbPolarsErr, RbResult, RbSeries};
14
14
 
15
15
  #[magnus::wrap(class = "Polars::RbExpr")]
16
16
  #[derive(Clone)]
@@ -715,6 +715,18 @@ impl RbExpr {
715
715
  .into()
716
716
  }
717
717
 
718
+ pub fn binary_contains(&self, lit: Vec<u8>) -> Self {
719
+ self.inner.clone().binary().contains_literal(lit).into()
720
+ }
721
+
722
+ pub fn binary_ends_with(&self, sub: Vec<u8>) -> Self {
723
+ self.inner.clone().binary().ends_with(sub).into()
724
+ }
725
+
726
+ pub fn binary_starts_with(&self, sub: Vec<u8>) -> Self {
727
+ self.inner.clone().binary().starts_with(sub).into()
728
+ }
729
+
718
730
  pub fn str_hex_encode(&self) -> Self {
719
731
  self.clone()
720
732
  .inner
@@ -763,6 +775,58 @@ impl RbExpr {
763
775
  .into()
764
776
  }
765
777
 
778
+ pub fn binary_hex_encode(&self) -> Self {
779
+ self.clone()
780
+ .inner
781
+ .map(
782
+ move |s| s.binary().map(|s| Some(s.hex_encode().into_series())),
783
+ GetOutput::same_type(),
784
+ )
785
+ .with_fmt("binary.hex_encode")
786
+ .into()
787
+ }
788
+
789
+ pub fn binary_hex_decode(&self, strict: bool) -> Self {
790
+ self.clone()
791
+ .inner
792
+ .map(
793
+ move |s| {
794
+ s.binary()?
795
+ .hex_decode(strict)
796
+ .map(|s| Some(s.into_series()))
797
+ },
798
+ GetOutput::same_type(),
799
+ )
800
+ .with_fmt("binary.hex_decode")
801
+ .into()
802
+ }
803
+
804
+ pub fn binary_base64_encode(&self) -> Self {
805
+ self.clone()
806
+ .inner
807
+ .map(
808
+ move |s| s.binary().map(|s| Some(s.base64_encode().into_series())),
809
+ GetOutput::same_type(),
810
+ )
811
+ .with_fmt("binary.base64_encode")
812
+ .into()
813
+ }
814
+
815
+ pub fn binary_base64_decode(&self, strict: bool) -> Self {
816
+ self.clone()
817
+ .inner
818
+ .map(
819
+ move |s| {
820
+ s.binary()?
821
+ .base64_decode(strict)
822
+ .map(|s| Some(s.into_series()))
823
+ },
824
+ GetOutput::same_type(),
825
+ )
826
+ .with_fmt("binary.base64_decode")
827
+ .into()
828
+ }
829
+
766
830
  pub fn str_json_path_match(&self, pat: String) -> Self {
767
831
  let function = move |s: Series| {
768
832
  let ca = s.utf8()?;
@@ -1654,9 +1718,9 @@ pub fn cov(a: &RbExpr, b: &RbExpr) -> RbExpr {
1654
1718
  polars::lazy::dsl::cov(a.inner.clone(), b.inner.clone()).into()
1655
1719
  }
1656
1720
 
1657
- pub fn argsort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1721
+ pub fn arg_sort_by(by: RArray, reverse: Vec<bool>) -> RbResult<RbExpr> {
1658
1722
  let by = rb_exprs_to_exprs(by)?;
1659
- Ok(polars::lazy::dsl::argsort_by(by, &reverse).into())
1723
+ Ok(polars::lazy::dsl::arg_sort_by(by, &reverse).into())
1660
1724
  }
1661
1725
 
1662
1726
  #[magnus::wrap(class = "Polars::RbWhen")]
@@ -1706,5 +1770,6 @@ pub fn concat_str(s: RArray, sep: String) -> RbResult<RbExpr> {
1706
1770
 
1707
1771
  pub fn concat_lst(s: RArray) -> RbResult<RbExpr> {
1708
1772
  let s = rb_exprs_to_exprs(s)?;
1709
- Ok(dsl::concat_lst(s).into())
1773
+ let expr = dsl::concat_lst(s).map_err(RbPolarsErr::from)?;
1774
+ Ok(expr.into())
1710
1775
  }
@@ -73,7 +73,7 @@ fn init() -> RbResult<()> {
73
73
  let class = module.define_class("RbDataFrame", Default::default())?;
74
74
  class.define_singleton_method("new", function!(RbDataFrame::init, 1))?;
75
75
  class.define_singleton_method("read_csv", function!(RbDataFrame::read_csv, -1))?;
76
- class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 7))?;
76
+ class.define_singleton_method("read_parquet", function!(RbDataFrame::read_parquet, 9))?;
77
77
  class.define_singleton_method("read_ipc", function!(RbDataFrame::read_ipc, 6))?;
78
78
  class.define_singleton_method("read_avro", function!(RbDataFrame::read_avro, 4))?;
79
79
  class.define_singleton_method("read_hashes", function!(RbDataFrame::read_hashes, 3))?;
@@ -151,7 +151,6 @@ fn init() -> RbResult<()> {
151
151
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
152
152
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 2))?;
153
153
  class.define_method("shift", method!(RbDataFrame::shift, 1))?;
154
- class.define_method("unique", method!(RbDataFrame::unique, 3))?;
155
154
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
156
155
  class.define_method("max", method!(RbDataFrame::max, 0))?;
157
156
  class.define_method("min", method!(RbDataFrame::min, 0))?;
@@ -304,10 +303,23 @@ fn init() -> RbResult<()> {
304
303
  class.define_method("str_contains", method!(RbExpr::str_contains, 3))?;
305
304
  class.define_method("str_ends_with", method!(RbExpr::str_ends_with, 1))?;
306
305
  class.define_method("str_starts_with", method!(RbExpr::str_starts_with, 1))?;
306
+ class.define_method("binary_contains", method!(RbExpr::binary_contains, 1))?;
307
+ class.define_method("binary_ends_with", method!(RbExpr::binary_ends_with, 1))?;
308
+ class.define_method("binary_starts_with", method!(RbExpr::binary_starts_with, 1))?;
307
309
  class.define_method("str_hex_encode", method!(RbExpr::str_hex_encode, 0))?;
308
310
  class.define_method("str_hex_decode", method!(RbExpr::str_hex_decode, 1))?;
309
311
  class.define_method("str_base64_encode", method!(RbExpr::str_base64_encode, 0))?;
310
312
  class.define_method("str_base64_decode", method!(RbExpr::str_base64_decode, 1))?;
313
+ class.define_method("binary_hex_encode", method!(RbExpr::binary_hex_encode, 0))?;
314
+ class.define_method("binary_hex_decode", method!(RbExpr::binary_hex_decode, 1))?;
315
+ class.define_method(
316
+ "binary_base64_encode",
317
+ method!(RbExpr::binary_base64_encode, 0),
318
+ )?;
319
+ class.define_method(
320
+ "binary_base64_decode",
321
+ method!(RbExpr::binary_base64_decode, 1),
322
+ )?;
311
323
  class.define_method(
312
324
  "str_json_path_match",
313
325
  method!(RbExpr::str_json_path_match, 1),
@@ -473,7 +485,7 @@ fn init() -> RbResult<()> {
473
485
  function!(crate::lazy::dsl::spearman_rank_corr, 4),
474
486
  )?;
475
487
  class.define_singleton_method("cov", function!(crate::lazy::dsl::cov, 2))?;
476
- class.define_singleton_method("argsort_by", function!(crate::lazy::dsl::argsort_by, 2))?;
488
+ class.define_singleton_method("arg_sort_by", function!(crate::lazy::dsl::arg_sort_by, 2))?;
477
489
  class.define_singleton_method("when", function!(crate::lazy::dsl::when, 1))?;
478
490
  class.define_singleton_method("concat_str", function!(crate::lazy::dsl::concat_str, 2))?;
479
491
  class.define_singleton_method("concat_lst", function!(crate::lazy::dsl::concat_lst, 1))?;
@@ -487,7 +499,7 @@ fn init() -> RbResult<()> {
487
499
  class.define_singleton_method("new_from_csv", function!(RbLazyFrame::new_from_csv, -1))?;
488
500
  class.define_singleton_method(
489
501
  "new_from_parquet",
490
- function!(RbLazyFrame::new_from_parquet, 7),
502
+ function!(RbLazyFrame::new_from_parquet, 8),
491
503
  )?;
492
504
  class.define_singleton_method("new_from_ipc", function!(RbLazyFrame::new_from_ipc, 6))?;
493
505
  class.define_method("write_json", method!(RbLazyFrame::write_json, 1))?;
@@ -504,6 +516,7 @@ fn init() -> RbResult<()> {
504
516
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 3))?;
505
517
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
506
518
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
519
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
507
520
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
508
521
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
509
522
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -532,7 +545,7 @@ fn init() -> RbResult<()> {
532
545
  class.define_method("drop_nulls", method!(RbLazyFrame::drop_nulls, 1))?;
533
546
  class.define_method("slice", method!(RbLazyFrame::slice, 2))?;
534
547
  class.define_method("tail", method!(RbLazyFrame::tail, 1))?;
535
- class.define_method("melt", method!(RbLazyFrame::melt, 4))?;
548
+ class.define_method("melt", method!(RbLazyFrame::melt, 5))?;
536
549
  class.define_method("with_row_count", method!(RbLazyFrame::with_row_count, 2))?;
537
550
  class.define_method("drop_columns", method!(RbLazyFrame::drop_columns, 1))?;
538
551
  class.define_method("_clone", method!(RbLazyFrame::clone, 0))?;
@@ -560,6 +573,7 @@ fn init() -> RbResult<()> {
560
573
  class.define_singleton_method("new_opt_f32", function!(RbSeries::new_opt_f32, 3))?;
561
574
  class.define_singleton_method("new_opt_f64", function!(RbSeries::new_opt_f64, 3))?;
562
575
  class.define_singleton_method("new_str", function!(RbSeries::new_str, 3))?;
576
+ class.define_singleton_method("new_binary", function!(RbSeries::new_binary, 3))?;
563
577
  class.define_singleton_method("new_object", function!(RbSeries::new_object, 3))?;
564
578
  class.define_singleton_method("new_list", function!(RbSeries::new_list, 3))?;
565
579
  class.define_singleton_method("new_opt_date", function!(RbSeries::new_opt_date, 3))?;
@@ -125,6 +125,12 @@ impl RbSeries {
125
125
  RbSeries::new(s)
126
126
  }
127
127
 
128
+ pub fn new_binary(name: String, val: Wrap<BinaryChunked>, _strict: bool) -> Self {
129
+ let mut s = val.0.into_series();
130
+ s.rename(&name);
131
+ RbSeries::new(s)
132
+ }
133
+
128
134
  pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult<Self> {
129
135
  let val = val
130
136
  .each()
@@ -504,7 +510,6 @@ impl RbSeries {
504
510
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()),
505
511
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()),
506
512
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()),
507
- DataType::Decimal128(_) => todo!(),
508
513
  DataType::Categorical(_) => {
509
514
  RArray::from_iter(series.categorical().unwrap().iter_str())
510
515
  }
@@ -526,6 +531,13 @@ impl RbSeries {
526
531
  let ca = series.utf8().unwrap();
527
532
  return RArray::from_iter(ca);
528
533
  }
534
+ DataType::Binary => {
535
+ let a = RArray::with_capacity(series.len());
536
+ for v in series.iter() {
537
+ a.push::<Value>(Wrap(v).into_value()).unwrap();
538
+ }
539
+ return a;
540
+ }
529
541
  DataType::Null | DataType::Unknown => {
530
542
  panic!("to_a not implemented for null/unknown")
531
543
  }
@@ -30,7 +30,7 @@ module Polars
30
30
  new_columns: nil
31
31
  )
32
32
  if Utils.pathlike?(file)
33
- path = Utils.format_path(file)
33
+ path = Utils.normalise_filepath(file)
34
34
  end
35
35
 
36
36
  dtype_list = nil
@@ -0,0 +1,77 @@
1
+ module Polars
2
+ # Namespace for binary related expressions.
3
+ class BinaryExpr
4
+ # @private
5
+ attr_accessor :_rbexpr
6
+
7
+ # @private
8
+ def initialize(expr)
9
+ self._rbexpr = expr._rbexpr
10
+ end
11
+
12
+ # Check if binaries in Series contain a binary substring.
13
+ #
14
+ # @param lit [String]
15
+ # The binary substring to look for
16
+ #
17
+ # @return [Expr]
18
+ def contains(lit)
19
+ Utils.wrap_expr(_rbexpr.binary_contains(lit))
20
+ end
21
+
22
+ # Check if string values end with a binary substring.
23
+ #
24
+ # @param sub [String]
25
+ # Suffix substring.
26
+ #
27
+ # @return [Expr]
28
+ def ends_with(sub)
29
+ Utils.wrap_expr(_rbexpr.binary_ends_with(sub))
30
+ end
31
+
32
+ # Check if values start with a binary substring.
33
+ #
34
+ # @param sub [String]
35
+ # Prefix substring.
36
+ #
37
+ # @return [Expr]
38
+ def starts_with(sub)
39
+ Utils.wrap_expr(_rbexpr.binary_starts_with(sub))
40
+ end
41
+
42
+ # Decode a value using the provided encoding.
43
+ #
44
+ # @param encoding ["hex", "base64"]
45
+ # The encoding to use.
46
+ # @param strict [Boolean]
47
+ # Raise an error if the underlying value cannot be decoded,
48
+ # otherwise mask out with a null value.
49
+ #
50
+ # @return [Expr]
51
+ def decode(encoding, strict: true)
52
+ if encoding == "hex"
53
+ Utils.wrap_expr(_rbexpr.binary_hex_decode(strict))
54
+ elsif encoding == "base64"
55
+ Utils.wrap_expr(_rbexpr.binary_base64_decode(strict))
56
+ else
57
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
58
+ end
59
+ end
60
+
61
+ # Encode a value using the provided encoding.
62
+ #
63
+ # @param encoding ["hex", "base64"]
64
+ # The encoding to use.
65
+ #
66
+ # @return [Expr]
67
+ def encode(encoding)
68
+ if encoding == "hex"
69
+ Utils.wrap_expr(_rbexpr.binary_hex_encode)
70
+ elsif encoding == "base64"
71
+ Utils.wrap_expr(_rbexpr.binary_base64_encode)
72
+ else
73
+ raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,66 @@
1
+ module Polars
2
+ # Series.bin namespace.
3
+ class BinaryNameSpace
4
+ include ExprDispatch
5
+
6
+ self._accessor = "bin"
7
+
8
+ # @private
9
+ def initialize(series)
10
+ self._s = series._s
11
+ end
12
+
13
+ # Check if binaries in Series contain a binary substring.
14
+ #
15
+ # @param lit [String]
16
+ # The binary substring to look for
17
+ #
18
+ # @return [Series]
19
+ def contains(lit)
20
+ super
21
+ end
22
+
23
+ # Check if string values end with a binary substring.
24
+ #
25
+ # @param sub [String]
26
+ # Suffix substring.
27
+ #
28
+ # @return [Series]
29
+ def ends_with(sub)
30
+ super
31
+ end
32
+
33
+ # Check if values start with a binary substring.
34
+ #
35
+ # @param sub [String]
36
+ # Prefix substring.
37
+ #
38
+ # @return [Series]
39
+ def starts_with(sub)
40
+ super
41
+ end
42
+
43
+ # Decode a value using the provided encoding.
44
+ #
45
+ # @param encoding ["hex", "base64"]
46
+ # The encoding to use.
47
+ # @param strict [Boolean]
48
+ # Raise an error if the underlying value cannot be decoded,
49
+ # otherwise mask out with a null value.
50
+ #
51
+ # @return [Series]
52
+ def decode(encoding, strict: true)
53
+ super
54
+ end
55
+
56
+ # Encode a value using the provided encoding.
57
+ #
58
+ # @param encoding ["hex", "base64"]
59
+ # The encoding to use.
60
+ #
61
+ # @return [Series]
62
+ def encode(encoding)
63
+ super
64
+ end
65
+ end
66
+ end
@@ -97,7 +97,7 @@ module Polars
97
97
  eol_char: "\n"
98
98
  )
99
99
  if Utils.pathlike?(file)
100
- path = Utils.format_path(file)
100
+ path = Utils.normalise_filepath(file)
101
101
  else
102
102
  path = nil
103
103
  # if defined?(StringIO) && file.is_a?(StringIO)
@@ -196,32 +196,56 @@ module Polars
196
196
 
197
197
  # @private
198
198
  def self._read_parquet(
199
- file,
199
+ source,
200
200
  columns: nil,
201
201
  n_rows: nil,
202
202
  parallel: "auto",
203
203
  row_count_name: nil,
204
204
  row_count_offset: 0,
205
- low_memory: false
205
+ low_memory: false,
206
+ use_statistics: true,
207
+ rechunk: true
206
208
  )
207
- if Utils.pathlike?(file)
208
- file = Utils.format_path(file)
209
+ if Utils.pathlike?(source)
210
+ source = Utils.normalise_filepath(source)
211
+ end
212
+ if columns.is_a?(String)
213
+ columns = [columns]
209
214
  end
210
215
 
211
- if file.is_a?(String) && file.include?("*")
212
- raise Todo
216
+ if source.is_a?(String) && source.include?("*") && Utils.local_file?(source)
217
+ scan =
218
+ Polars.scan_parquet(
219
+ source,
220
+ n_rows: n_rows,
221
+ rechunk: true,
222
+ parallel: parallel,
223
+ row_count_name: row_count_name,
224
+ row_count_offset: row_count_offset,
225
+ low_memory: low_memory
226
+ )
227
+
228
+ if columns.nil?
229
+ return self._from_rbdf(scan.collect._df)
230
+ elsif Utils.is_str_sequence(columns, allow_str: false)
231
+ return self._from_rbdf(scan.select(columns).collect._df)
232
+ else
233
+ raise ArgumentError, "cannot use glob patterns and integer based projection as `columns` argument; Use columns: Array[String]"
234
+ end
213
235
  end
214
236
 
215
237
  projection, columns = Utils.handle_projection_columns(columns)
216
238
  _from_rbdf(
217
239
  RbDataFrame.read_parquet(
218
- file,
240
+ source,
219
241
  columns,
220
242
  projection,
221
243
  n_rows,
222
244
  parallel,
223
245
  Utils._prepare_row_count_args(row_count_name, row_count_offset),
224
- low_memory
246
+ low_memory,
247
+ use_statistics,
248
+ rechunk
225
249
  )
226
250
  )
227
251
  end
@@ -229,7 +253,7 @@ module Polars
229
253
  # @private
230
254
  def self._read_avro(file, columns: nil, n_rows: nil)
231
255
  if Utils.pathlike?(file)
232
- file = Utils.format_path(file)
256
+ file = Utils.normalise_filepath(file)
233
257
  end
234
258
  projection, columns = Utils.handle_projection_columns(columns)
235
259
  _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
@@ -246,7 +270,7 @@ module Polars
246
270
  memory_map: true
247
271
  )
248
272
  if Utils.pathlike?(file)
249
- file = Utils.format_path(file)
273
+ file = Utils.normalise_filepath(file)
250
274
  end
251
275
  if columns.is_a?(String)
252
276
  columns = [columns]
@@ -272,7 +296,7 @@ module Polars
272
296
  # @private
273
297
  def self._read_json(file)
274
298
  if Utils.pathlike?(file)
275
- file = Utils.format_path(file)
299
+ file = Utils.normalise_filepath(file)
276
300
  end
277
301
 
278
302
  _from_rbdf(RbDataFrame.read_json(file))
@@ -281,7 +305,7 @@ module Polars
281
305
  # @private
282
306
  def self._read_ndjson(file)
283
307
  if Utils.pathlike?(file)
284
- file = Utils.format_path(file)
308
+ file = Utils.normalise_filepath(file)
285
309
  end
286
310
 
287
311
  _from_rbdf(RbDataFrame.read_ndjson(file))
@@ -774,7 +798,7 @@ module Polars
774
798
  row_oriented: false
775
799
  )
776
800
  if Utils.pathlike?(file)
777
- file = Utils.format_path(file)
801
+ file = Utils.normalise_filepath(file)
778
802
  end
779
803
 
780
804
  _df.write_json(file, pretty, row_oriented)
@@ -789,7 +813,7 @@ module Polars
789
813
  # @return [nil]
790
814
  def write_ndjson(file)
791
815
  if Utils.pathlike?(file)
792
- file = Utils.format_path(file)
816
+ file = Utils.normalise_filepath(file)
793
817
  end
794
818
 
795
819
  _df.write_ndjson(file)
@@ -879,7 +903,7 @@ module Polars
879
903
  end
880
904
 
881
905
  if Utils.pathlike?(file)
882
- file = Utils.format_path(file)
906
+ file = Utils.normalise_filepath(file)
883
907
  end
884
908
 
885
909
  _df.write_csv(
@@ -917,7 +941,7 @@ module Polars
917
941
  compression = "uncompressed"
918
942
  end
919
943
  if Utils.pathlike?(file)
920
- file = Utils.format_path(file)
944
+ file = Utils.normalise_filepath(file)
921
945
  end
922
946
 
923
947
  _df.write_avro(file, compression)
@@ -936,7 +960,7 @@ module Polars
936
960
  compression = "uncompressed"
937
961
  end
938
962
  if Utils.pathlike?(file)
939
- file = Utils.format_path(file)
963
+ file = Utils.normalise_filepath(file)
940
964
  end
941
965
 
942
966
  _df.write_ipc(file, compression)
@@ -978,7 +1002,7 @@ module Polars
978
1002
  compression = "uncompressed"
979
1003
  end
980
1004
  if Utils.pathlike?(file)
981
- file = Utils.format_path(file)
1005
+ file = Utils.normalise_filepath(file)
982
1006
  end
983
1007
 
984
1008
  _df.write_parquet(
@@ -3042,24 +3066,28 @@ module Polars
3042
3066
  if aggregate_fn.is_a?(String)
3043
3067
  case aggregate_fn
3044
3068
  when "first"
3045
- aggregate_fn = Polars.element.first
3069
+ aggregate_expr = Polars.element.first._rbexpr
3046
3070
  when "sum"
3047
- aggregate_fn = Polars.element.sum
3071
+ aggregate_expr = Polars.element.sum._rbexpr
3048
3072
  when "max"
3049
- aggregate_fn = Polars.element.max
3073
+ aggregate_expr = Polars.element.max._rbexpr
3050
3074
  when "min"
3051
- aggregate_fn = Polars.element.min
3075
+ aggregate_expr = Polars.element.min._rbexpr
3052
3076
  when "mean"
3053
- aggregate_fn = Polars.element.mean
3077
+ aggregate_expr = Polars.element.mean._rbexpr
3054
3078
  when "median"
3055
- aggregate_fn = Polars.element.median
3079
+ aggregate_expr = Polars.element.median._rbexpr
3056
3080
  when "last"
3057
- aggregate_fn = Polars.element.last
3081
+ aggregate_expr = Polars.element.last._rbexpr
3058
3082
  when "count"
3059
- aggregate_fn = Polars.count
3083
+ aggregate_expr = Polars.count._rbexpr
3060
3084
  else
3061
3085
  raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
3062
3086
  end
3087
+ elsif aggregate_fn.nil?
3088
+ aggregate_expr = nil
3089
+ else
3090
+ aggregate_expr = aggregate_function._rbexpr
3063
3091
  end
3064
3092
 
3065
3093
  _from_rbdf(
@@ -3067,9 +3095,9 @@ module Polars
3067
3095
  values,
3068
3096
  index,
3069
3097
  columns,
3070
- aggregate_fn._rbexpr,
3071
3098
  maintain_order,
3072
3099
  sort_columns,
3100
+ aggregate_expr,
3073
3101
  separator
3074
3102
  )
3075
3103
  )
@@ -3174,7 +3202,7 @@ module Polars
3174
3202
  # # │ B ┆ 1 │
3175
3203
  # # │ C ┆ 2 │
3176
3204
  # # │ D ┆ 3 │
3177
- # # │ ... ...
3205
+ # # │
3178
3206
  # # │ F ┆ 5 │
3179
3207
  # # │ G ┆ 6 │
3180
3208
  # # │ H ┆ 7 │
@@ -4053,15 +4081,12 @@ module Polars
4053
4081
  # # │ 5 ┆ 3.0 ┆ true │
4054
4082
  # # └─────┴─────┴───────┘
4055
4083
  def unique(maintain_order: true, subset: nil, keep: "first")
4056
- if !subset.nil?
4057
- if subset.is_a?(String)
4058
- subset = [subset]
4059
- elsif !subset.is_a?(Array)
4060
- subset = subset.to_a
4061
- end
4062
- end
4063
-
4064
- _from_rbdf(_df.unique(maintain_order, subset, keep))
4084
+ self._from_rbdf(
4085
+ lazy
4086
+ .unique(maintain_order: maintain_order, subset: subset, keep: keep)
4087
+ .collect(no_optimization: true)
4088
+ ._df
4089
+ )
4065
4090
  end
4066
4091
 
4067
4092
  # Return the number of unique rows, or the number of unique row-subsets.
@@ -1130,7 +1130,7 @@ module Polars
1130
1130
  # ]
1131
1131
  # )
1132
1132
  # # =>
1133
- # # shape: (1001, 2)
1133
+ # # shape: (1_001, 2)
1134
1134
  # # ┌─────────────────────────┬───────────────────┐
1135
1135
  # # │ date ┆ milliseconds_diff │
1136
1136
  # # │ --- ┆ --- │
@@ -1140,7 +1140,7 @@ module Polars
1140
1140
  # # │ 2020-01-01 00:00:00.001 ┆ 1 │
1141
1141
  # # │ 2020-01-01 00:00:00.002 ┆ 1 │
1142
1142
  # # │ 2020-01-01 00:00:00.003 ┆ 1 │
1143
- # # │ ... ...
1143
+ # # │
1144
1144
  # # │ 2020-01-01 00:00:00.997 ┆ 1 │
1145
1145
  # # │ 2020-01-01 00:00:00.998 ┆ 1 │
1146
1146
  # # │ 2020-01-01 00:00:00.999 ┆ 1 │
@@ -1169,7 +1169,7 @@ module Polars
1169
1169
  # ]
1170
1170
  # )
1171
1171
  # # =>
1172
- # # shape: (1001, 2)
1172
+ # # shape: (1_001, 2)
1173
1173
  # # ┌─────────────────────────┬───────────────────┐
1174
1174
  # # │ date ┆ microseconds_diff │
1175
1175
  # # │ --- ┆ --- │
@@ -1179,7 +1179,7 @@ module Polars
1179
1179
  # # │ 2020-01-01 00:00:00.001 ┆ 1000 │
1180
1180
  # # │ 2020-01-01 00:00:00.002 ┆ 1000 │
1181
1181
  # # │ 2020-01-01 00:00:00.003 ┆ 1000 │
1182
- # # │ ... ...
1182
+ # # │
1183
1183
  # # │ 2020-01-01 00:00:00.997 ┆ 1000 │
1184
1184
  # # │ 2020-01-01 00:00:00.998 ┆ 1000 │
1185
1185
  # # │ 2020-01-01 00:00:00.999 ┆ 1000 │
@@ -1208,7 +1208,7 @@ module Polars
1208
1208
  # ]
1209
1209
  # )
1210
1210
  # # =>
1211
- # # shape: (1001, 2)
1211
+ # # shape: (1_001, 2)
1212
1212
  # # ┌─────────────────────────┬──────────────────┐
1213
1213
  # # │ date ┆ nanoseconds_diff │
1214
1214
  # # │ --- ┆ --- │
@@ -1218,7 +1218,7 @@ module Polars
1218
1218
  # # │ 2020-01-01 00:00:00.001 ┆ 1000000 │
1219
1219
  # # │ 2020-01-01 00:00:00.002 ┆ 1000000 │
1220
1220
  # # │ 2020-01-01 00:00:00.003 ┆ 1000000 │
1221
- # # │ ... ...
1221
+ # # │
1222
1222
  # # │ 2020-01-01 00:00:00.997 ┆ 1000000 │
1223
1223
  # # │ 2020-01-01 00:00:00.998 ┆ 1000000 │
1224
1224
  # # │ 2020-01-01 00:00:00.999 ┆ 1000000 │