polars-df 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -330,6 +330,7 @@ impl RbLazyFrame {
330
330
  nulls_last: vec![nulls_last],
331
331
  multithreaded,
332
332
  maintain_order,
333
+ limit: None,
333
334
  },
334
335
  )
335
336
  .into()
@@ -353,6 +354,7 @@ impl RbLazyFrame {
353
354
  nulls_last,
354
355
  maintain_order,
355
356
  multithreaded,
357
+ limit: None,
356
358
  },
357
359
  )
358
360
  .into())
@@ -379,6 +381,8 @@ impl RbLazyFrame {
379
381
  row_group_size: Option<usize>,
380
382
  data_page_size: Option<usize>,
381
383
  maintain_order: bool,
384
+ cloud_options: Option<Vec<(String, String)>>,
385
+ retries: usize,
382
386
  ) -> RbResult<()> {
383
387
  let compression = parse_parquet_compression(&compression, compression_level)?;
384
388
 
@@ -390,8 +394,15 @@ impl RbLazyFrame {
390
394
  maintain_order,
391
395
  };
392
396
 
397
+ let cloud_options = {
398
+ let cloud_options =
399
+ parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
400
+ Some(cloud_options.with_max_retries(retries))
401
+ };
402
+
393
403
  let ldf = self.ldf.borrow().clone();
394
- ldf.sink_parquet(path, options).map_err(RbPolarsErr::from)?;
404
+ ldf.sink_parquet(&path, options, cloud_options)
405
+ .map_err(RbPolarsErr::from)?;
395
406
  Ok(())
396
407
  }
397
408
 
@@ -400,14 +411,23 @@ impl RbLazyFrame {
400
411
  path: PathBuf,
401
412
  compression: Option<Wrap<IpcCompression>>,
402
413
  maintain_order: bool,
414
+ cloud_options: Option<Vec<(String, String)>>,
415
+ retries: usize,
403
416
  ) -> RbResult<()> {
404
417
  let options = IpcWriterOptions {
405
418
  compression: compression.map(|c| c.0),
406
419
  maintain_order,
407
420
  };
408
421
 
422
+ let cloud_options = {
423
+ let cloud_options =
424
+ parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
425
+ Some(cloud_options.with_max_retries(retries))
426
+ };
427
+
409
428
  let ldf = self.ldf.borrow().clone();
410
- ldf.sink_ipc(path, options).map_err(RbPolarsErr::from)?;
429
+ ldf.sink_ipc(&path, options, cloud_options)
430
+ .map_err(RbPolarsErr::from)?;
411
431
  Ok(())
412
432
  }
413
433
 
@@ -430,6 +450,9 @@ impl RbLazyFrame {
430
450
  quote_style: Option<Wrap<QuoteStyle>>,
431
451
  maintain_order: bool,
432
452
  ) -> RbResult<()> {
453
+ // TODO
454
+ let cloud_options = None;
455
+
433
456
  let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
434
457
  let null_value = null_value.unwrap_or(SerializeOptions::default().null);
435
458
 
@@ -454,16 +477,36 @@ impl RbLazyFrame {
454
477
  serialize_options,
455
478
  };
456
479
 
480
+ let cloud_options = {
481
+ let cloud_options =
482
+ parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
483
+ Some(cloud_options)
484
+ };
485
+
457
486
  let ldf = self.ldf.borrow().clone();
458
- ldf.sink_csv(path, options).map_err(RbPolarsErr::from)?;
487
+ ldf.sink_csv(&path, options, cloud_options)
488
+ .map_err(RbPolarsErr::from)?;
459
489
  Ok(())
460
490
  }
461
491
 
462
- pub fn sink_json(&self, path: PathBuf, maintain_order: bool) -> RbResult<()> {
492
+ pub fn sink_json(
493
+ &self,
494
+ path: PathBuf,
495
+ maintain_order: bool,
496
+ cloud_options: Option<Vec<(String, String)>>,
497
+ retries: usize,
498
+ ) -> RbResult<()> {
463
499
  let options = JsonWriterOptions { maintain_order };
464
500
 
501
+ let cloud_options = {
502
+ let cloud_options =
503
+ parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
504
+ Some(cloud_options.with_max_retries(retries))
505
+ };
506
+
465
507
  let ldf = self.ldf.borrow().clone();
466
- ldf.sink_json(path, options).map_err(RbPolarsErr::from)?;
508
+ ldf.sink_json(&path, options, cloud_options)
509
+ .map_err(RbPolarsErr::from)?;
467
510
  Ok(())
468
511
  }
469
512
 
@@ -590,6 +633,8 @@ impl RbLazyFrame {
590
633
  tolerance: Option<Wrap<AnyValue<'_>>>,
591
634
  tolerance_str: Option<String>,
592
635
  coalesce: bool,
636
+ allow_eq: bool,
637
+ check_sortedness: bool,
593
638
  ) -> RbResult<Self> {
594
639
  let coalesce = if coalesce {
595
640
  JoinCoalesce::CoalesceColumns
@@ -614,6 +659,8 @@ impl RbLazyFrame {
614
659
  right_by: right_by.map(strings_to_pl_smallstr),
615
660
  tolerance: tolerance.map(|t| t.0.into_static()),
616
661
  tolerance_str: tolerance_str.map(|s| s.into()),
662
+ allow_eq,
663
+ check_sortedness,
617
664
  }))
618
665
  .suffix(suffix)
619
666
  .finish()
@@ -69,10 +69,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
69
69
  class.define_method("estimated_size", method!(RbDataFrame::estimated_size, 0))?;
70
70
  class.define_method("dtype_strings", method!(RbDataFrame::dtype_strings, 0))?;
71
71
  class.define_method("write_avro", method!(RbDataFrame::write_avro, 3))?;
72
- class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
72
+ class.define_method("write_json", method!(RbDataFrame::write_json, 1))?;
73
73
  class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
74
74
  class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
75
- class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 3))?;
75
+ class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
76
76
  class.define_method(
77
77
  "write_ipc_stream",
78
78
  method!(RbDataFrame::write_ipc_stream, 3),
@@ -143,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
143
143
  class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
144
144
  class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
145
145
  class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
146
- class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
147
- class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
148
- class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
149
- class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
150
146
  class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
151
147
  class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
152
148
  class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
@@ -568,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
568
564
  class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
569
565
  class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
570
566
  class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
571
- class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 3))?;
567
+ class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
572
568
  class.define_singleton_method(
573
569
  "spearman_rank_corr",
574
- function!(functions::lazy::spearman_rank_corr, 4),
570
+ function!(functions::lazy::spearman_rank_corr, 3),
575
571
  )?;
576
572
  class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
577
573
  class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
@@ -605,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
605
601
  )?;
606
602
  class.define_singleton_method(
607
603
  "sum_horizontal",
608
- function!(functions::aggregation::sum_horizontal, 1),
604
+ function!(functions::aggregation::sum_horizontal, 2),
609
605
  )?;
610
606
  class.define_singleton_method(
611
607
  "mean_horizontal",
612
- function!(functions::aggregation::mean_horizontal, 1),
608
+ function!(functions::aggregation::mean_horizontal, 2),
613
609
  )?;
614
610
  class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
615
611
  class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
@@ -660,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
660
656
  function!(functions::meta::get_index_type, 0),
661
657
  )?;
662
658
  class.define_singleton_method(
663
- "threadpool_size",
664
- function!(functions::meta::threadpool_size, 0),
659
+ "thread_pool_size",
660
+ function!(functions::meta::thread_pool_size, 0),
665
661
  )?;
666
662
  class.define_singleton_method(
667
663
  "enable_string_cache",
@@ -747,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
747
743
  class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
748
744
  class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
749
745
  class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
750
- class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 7))?;
751
- class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 3))?;
746
+ class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
747
+ class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
752
748
  class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
753
- class.define_method("sink_json", method!(RbLazyFrame::sink_json, 2))?;
749
+ class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
754
750
  class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
755
751
  class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
756
752
  class.define_method("select", method!(RbLazyFrame::select, 1))?;
@@ -762,7 +758,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
762
758
  method!(RbLazyFrame::group_by_dynamic, 9),
763
759
  )?;
764
760
  class.define_method("with_context", method!(RbLazyFrame::with_context, 1))?;
765
- class.define_method("join_asof", method!(RbLazyFrame::join_asof, 12))?;
761
+ class.define_method("join_asof", method!(RbLazyFrame::join_asof, 14))?;
766
762
  class.define_method("join", method!(RbLazyFrame::join, 10))?;
767
763
  class.define_method("with_column", method!(RbLazyFrame::with_column, 1))?;
768
764
  class.define_method("with_columns", method!(RbLazyFrame::with_columns, 1))?;
@@ -1109,7 +1105,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
1109
1105
  class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
1110
1106
 
1111
1107
  // arrow array stream
1112
- let class = module.define_class("RbArrowArrayStream", ruby.class_object())?;
1108
+ let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
1113
1109
  class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
1114
1110
 
1115
1111
  Ok(())
@@ -5,9 +5,9 @@ pub mod series;
5
5
  use magnus::{prelude::*, RHash, Value};
6
6
  use polars::chunked_array::builder::get_list_builder;
7
7
  use polars::prelude::*;
8
- use polars_core::export::rayon::prelude::*;
9
8
  use polars_core::utils::CustomIterTools;
10
9
  use polars_core::POOL;
10
+ use rayon::prelude::*;
11
11
 
12
12
  use crate::{ObjectValue, RbPolarsErr, RbResult, RbSeries, Wrap};
13
13
 
@@ -21,6 +21,7 @@ impl RbSeries {
21
21
  DataType::Int16 => RArray::from_iter(series.i16().unwrap()).into_value(),
22
22
  DataType::Int32 => RArray::from_iter(series.i32().unwrap()).into_value(),
23
23
  DataType::Int64 => RArray::from_iter(series.i64().unwrap()).into_value(),
24
+ DataType::Int128 => todo!(),
24
25
  DataType::Float32 => RArray::from_iter(series.f32().unwrap()).into_value(),
25
26
  DataType::Float64 => RArray::from_iter(series.f64().unwrap()).into_value(),
26
27
  DataType::Categorical(_, _) | DataType::Enum(_, _) => {
@@ -97,29 +97,17 @@ impl RbSeries {
97
97
  }
98
98
 
99
99
  pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
100
- let out = self
101
- .series
102
- .borrow()
103
- .bitand(&other.series.borrow())
104
- .map_err(RbPolarsErr::from)?;
100
+ let out = (&*self.series.borrow() & &*other.series.borrow()).map_err(RbPolarsErr::from)?;
105
101
  Ok(out.into())
106
102
  }
107
103
 
108
104
  pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
109
- let out = self
110
- .series
111
- .borrow()
112
- .bitor(&other.series.borrow())
113
- .map_err(RbPolarsErr::from)?;
105
+ let out = (&*self.series.borrow() | &*other.series.borrow()).map_err(RbPolarsErr::from)?;
114
106
  Ok(out.into())
115
107
  }
116
108
 
117
109
  pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
118
- let out = self
119
- .series
120
- .borrow()
121
- .bitxor(&other.series.borrow())
122
- .map_err(RbPolarsErr::from)?;
110
+ let out = (&*self.series.borrow() ^ &*other.series.borrow()).map_err(RbPolarsErr::from)?;
123
111
  Ok(out.into())
124
112
  }
125
113
 
@@ -1,7 +1,7 @@
1
+ use arrow::array::Array;
2
+ use arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
1
3
  use magnus::prelude::*;
2
4
  use magnus::Value;
3
- use polars::export::arrow::array::Array;
4
- use polars::export::arrow::ffi::{ArrowArrayStream, ArrowArrayStreamReader};
5
5
  use polars::prelude::*;
6
6
 
7
7
  use super::RbSeries;
@@ -39,7 +39,7 @@ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
39
39
 
40
40
  // Series::try_from fails for an empty vec of chunks
41
41
  let s = if produced_arrays.is_empty() {
42
- let polars_dt = DataType::from_arrow(stream.field().dtype(), false);
42
+ let polars_dt = DataType::from_arrow_field(stream.field());
43
43
  Series::new_empty(stream.field().name.clone(), &polars_dt)
44
44
  } else {
45
45
  Series::try_from((stream.field(), produced_arrays)).unwrap()
@@ -1,4 +1,4 @@
1
- use polars::export::arrow::array::Array;
1
+ use arrow::array::Array;
2
2
  use polars::prelude::*;
3
3
 
4
4
  use crate::error::RbPolarsErr;
@@ -604,10 +604,6 @@ module Polars
604
604
  #
605
605
  # @param file [String]
606
606
  # File path to which the result should be written.
607
- # @param pretty [Boolean]
608
- # Pretty serialize json.
609
- # @param row_oriented [Boolean]
610
- # Write to row oriented json. This is slower, but more common.
611
607
  #
612
608
  # @return [nil]
613
609
  #
@@ -619,16 +615,8 @@ module Polars
619
615
  # }
620
616
  # )
621
617
  # df.write_json
622
- # # => "{\"columns\":[{\"name\":\"foo\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[1,2,3]},{\"name\":\"bar\",\"datatype\":\"Int64\",\"bit_settings\":\"\",\"values\":[6,7,8]}]}"
623
- #
624
- # @example
625
- # df.write_json(row_oriented: true)
626
618
  # # => "[{\"foo\":1,\"bar\":6},{\"foo\":2,\"bar\":7},{\"foo\":3,\"bar\":8}]"
627
- def write_json(
628
- file = nil,
629
- pretty: false,
630
- row_oriented: false
631
- )
619
+ def write_json(file = nil)
632
620
  if Utils.pathlike?(file)
633
621
  file = Utils.normalize_filepath(file)
634
622
  end
@@ -636,7 +624,7 @@ module Polars
636
624
  if file.nil? || to_string_io
637
625
  buf = StringIO.new
638
626
  buf.set_encoding(Encoding::BINARY)
639
- _df.write_json(buf, pretty, row_oriented)
627
+ _df.write_json(buf)
640
628
  json_bytes = buf.string
641
629
 
642
630
  json_str = json_bytes.force_encoding(Encoding::UTF_8)
@@ -646,7 +634,7 @@ module Polars
646
634
  return json_str
647
635
  end
648
636
  else
649
- _df.write_json(file, pretty, row_oriented)
637
+ _df.write_json(file)
650
638
  end
651
639
  nil
652
640
  end
@@ -831,7 +819,13 @@ module Polars
831
819
  # Compression method. Defaults to "uncompressed".
832
820
  #
833
821
  # @return [nil]
834
- def write_ipc(file, compression: "uncompressed", compat_level: nil)
822
+ def write_ipc(
823
+ file,
824
+ compression: "uncompressed",
825
+ compat_level: nil,
826
+ storage_options: nil,
827
+ retries: 2
828
+ )
835
829
  return_bytes = file.nil?
836
830
  if return_bytes
837
831
  file = StringIO.new
@@ -849,7 +843,13 @@ module Polars
849
843
  compression = "uncompressed"
850
844
  end
851
845
 
852
- _df.write_ipc(file, compression, compat_level)
846
+ if storage_options&.any?
847
+ storage_options = storage_options.to_a
848
+ else
849
+ storage_options = nil
850
+ end
851
+
852
+ _df.write_ipc(file, compression, compat_level, storage_options, retries)
853
853
  return_bytes ? file.string : nil
854
854
  end
855
855
 
@@ -961,6 +961,61 @@ module Polars
961
961
  )
962
962
  end
963
963
 
964
+ # Write DataFrame as delta table.
965
+ #
966
+ # @param target [Object]
967
+ # URI of a table or a DeltaTable object.
968
+ # @param mode ["error", "append", "overwrite", "ignore", "merge"]
969
+ # How to handle existing data.
970
+ # @param storage_options [Hash]
971
+ # Extra options for the storage backends supported by `deltalake-rb`.
972
+ # @param delta_write_options [Hash]
973
+ # Additional keyword arguments while writing a Delta lake Table.
974
+ # @param delta_merge_options [Hash]
975
+ # Keyword arguments which are required to `MERGE` a Delta lake Table.
976
+ #
977
+ # @return [nil]
978
+ def write_delta(
979
+ target,
980
+ mode: "error",
981
+ storage_options: nil,
982
+ delta_write_options: nil,
983
+ delta_merge_options: nil
984
+ )
985
+ Polars.send(:_check_if_delta_available)
986
+
987
+ if Utils.pathlike?(target)
988
+ target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
989
+ end
990
+
991
+ data = self
992
+
993
+ if mode == "merge"
994
+ if delta_merge_options.nil?
995
+ msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
996
+ raise ArgumentError, msg
997
+ end
998
+ if target.is_a?(::String)
999
+ dt = DeltaLake::Table.new(target, storage_options: storage_options)
1000
+ else
1001
+ dt = target
1002
+ end
1003
+
1004
+ predicate = delta_merge_options.delete(:predicate)
1005
+ dt.merge(data, predicate, **delta_merge_options)
1006
+ else
1007
+ delta_write_options ||= {}
1008
+
1009
+ DeltaLake.write(
1010
+ target,
1011
+ data,
1012
+ mode: mode,
1013
+ storage_options: storage_options,
1014
+ **delta_write_options
1015
+ )
1016
+ end
1017
+ end
1018
+
964
1019
  # Return an estimation of the total (heap) allocated size of the DataFrame.
965
1020
  #
966
1021
  # Estimated size is given in the specified unit (bytes by default).
@@ -2227,6 +2282,14 @@ module Polars
2227
2282
  # keys are within this distance. If an asof join is done on columns of dtype
2228
2283
  # "Date", "Datetime", "Duration" or "Time" you use the following string
2229
2284
  # language:
2285
+ # @param allow_exact_matches [Boolean]
2286
+ # Whether exact matches are valid join predicates.
2287
+ # - If true, allow matching with the same `on` value (i.e. less-than-or-equal-to / greater-than-or-equal-to).
2288
+ # - If false, don't match the same `on` value (i.e., strictly less-than / strictly greater-than).
2289
+ # @param check_sortedness [Boolean]
2290
+ # Check the sortedness of the asof keys. If the keys are not sorted Polars
2291
+ # will error, or in case of 'by' argument raise a warning. This might become
2292
+ # a hard error in the future.
2230
2293
  #
2231
2294
  # - 1ns (1 nanosecond)
2232
2295
  # - 1us (1 microsecond)
@@ -2308,7 +2371,9 @@ module Polars
2308
2371
  tolerance: nil,
2309
2372
  allow_parallel: true,
2310
2373
  force_parallel: false,
2311
- coalesce: true
2374
+ coalesce: true,
2375
+ allow_exact_matches: true,
2376
+ check_sortedness: true
2312
2377
  )
2313
2378
  lazy
2314
2379
  .join_asof(
@@ -2324,7 +2389,9 @@ module Polars
2324
2389
  tolerance: tolerance,
2325
2390
  allow_parallel: allow_parallel,
2326
2391
  force_parallel: force_parallel,
2327
- coalesce: coalesce
2392
+ coalesce: coalesce,
2393
+ allow_exact_matches: allow_exact_matches,
2394
+ check_sortedness: check_sortedness
2328
2395
  )
2329
2396
  .collect(no_optimization: true)
2330
2397
  end
@@ -3939,14 +4006,32 @@ module Polars
3939
4006
  # # ╞═════╪═════╪═════╡
3940
4007
  # # │ 3 ┆ 8 ┆ c │
3941
4008
  # # └─────┴─────┴─────┘
3942
- def max(axis: 0)
3943
- if axis == 0
3944
- lazy.max.collect(_eager: true)
3945
- elsif axis == 1
3946
- Utils.wrap_s(_df.max_horizontal)
3947
- else
3948
- raise ArgumentError, "Axis should be 0 or 1."
3949
- end
4009
+ def max
4010
+ lazy.max.collect(_eager: true)
4011
+ end
4012
+
4013
+ # Get the maximum value horizontally across columns.
4014
+ #
4015
+ # @return [Series]
4016
+ #
4017
+ # @example
4018
+ # df = Polars::DataFrame.new(
4019
+ # {
4020
+ # "foo" => [1, 2, 3],
4021
+ # "bar" => [4.0, 5.0, 6.0]
4022
+ # }
4023
+ # )
4024
+ # df.max_horizontal
4025
+ # # =>
4026
+ # # shape: (3,)
4027
+ # # Series: 'max' [f64]
4028
+ # # [
4029
+ # # 4.0
4030
+ # # 5.0
4031
+ # # 6.0
4032
+ # # ]
4033
+ def max_horizontal
4034
+ select(max: F.max_horizontal(F.all)).to_series
3950
4035
  end
3951
4036
 
3952
4037
  # Aggregate the columns of this DataFrame to their minimum value.
@@ -3971,22 +4056,35 @@ module Polars
3971
4056
  # # ╞═════╪═════╪═════╡
3972
4057
  # # │ 1 ┆ 6 ┆ a │
3973
4058
  # # └─────┴─────┴─────┘
3974
- def min(axis: 0)
3975
- if axis == 0
3976
- lazy.min.collect(_eager: true)
3977
- elsif axis == 1
3978
- Utils.wrap_s(_df.min_horizontal)
3979
- else
3980
- raise ArgumentError, "Axis should be 0 or 1."
3981
- end
4059
+ def min
4060
+ lazy.min.collect(_eager: true)
3982
4061
  end
3983
4062
 
3984
- # Aggregate the columns of this DataFrame to their sum value.
4063
+ # Get the minimum value horizontally across columns.
3985
4064
  #
3986
- # @param axis [Integer]
3987
- # Either 0 or 1.
3988
- # @param null_strategy ["ignore", "propagate"]
3989
- # This argument is only used if axis == 1.
4065
+ # @return [Series]
4066
+ #
4067
+ # @example
4068
+ # df = Polars::DataFrame.new(
4069
+ # {
4070
+ # "foo" => [1, 2, 3],
4071
+ # "bar" => [4.0, 5.0, 6.0]
4072
+ # }
4073
+ # )
4074
+ # df.min_horizontal
4075
+ # # =>
4076
+ # # shape: (3,)
4077
+ # # Series: 'min' [f64]
4078
+ # # [
4079
+ # # 1.0
4080
+ # # 2.0
4081
+ # # 3.0
4082
+ # # ]
4083
+ def min_horizontal
4084
+ select(min: F.min_horizontal(F.all)).to_series
4085
+ end
4086
+
4087
+ # Aggregate the columns of this DataFrame to their sum value.
3990
4088
  #
3991
4089
  # @return [DataFrame]
3992
4090
  #
@@ -4008,35 +4106,42 @@ module Polars
4008
4106
  # # ╞═════╪═════╪══════╡
4009
4107
  # # │ 6 ┆ 21 ┆ null │
4010
4108
  # # └─────┴─────┴──────┘
4109
+ def sum
4110
+ lazy.sum.collect(_eager: true)
4111
+ end
4112
+
4113
+ # Sum all values horizontally across columns.
4114
+ #
4115
+ # @param ignore_nulls [Boolean]
4116
+ # Ignore null values (default).
4117
+ # If set to `false`, any null value in the input will lead to a null output.
4118
+ #
4119
+ # @return [Series]
4011
4120
  #
4012
4121
  # @example
4013
- # df.sum(axis: 1)
4122
+ # df = Polars::DataFrame.new(
4123
+ # {
4124
+ # "foo" => [1, 2, 3],
4125
+ # "bar" => [4.0, 5.0, 6.0]
4126
+ # }
4127
+ # )
4128
+ # df.sum_horizontal
4014
4129
  # # =>
4015
4130
  # # shape: (3,)
4016
- # # Series: 'foo' [str]
4131
+ # # Series: 'sum' [f64]
4017
4132
  # # [
4018
- # # "16a"
4019
- # # "27b"
4020
- # # "38c"
4133
+ # # 5.0
4134
+ # # 7.0
4135
+ # # 9.0
4021
4136
  # # ]
4022
- def sum(axis: 0, null_strategy: "ignore")
4023
- case axis
4024
- when 0
4025
- lazy.sum.collect(_eager: true)
4026
- when 1
4027
- Utils.wrap_s(_df.sum_horizontal(null_strategy))
4028
- else
4029
- raise ArgumentError, "Axis should be 0 or 1."
4030
- end
4137
+ def sum_horizontal(ignore_nulls: true)
4138
+ select(
4139
+ sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
4140
+ ).to_series
4031
4141
  end
4032
4142
 
4033
4143
  # Aggregate the columns of this DataFrame to their mean value.
4034
4144
  #
4035
- # @param axis [Integer]
4036
- # Either 0 or 1.
4037
- # @param null_strategy ["ignore", "propagate"]
4038
- # This argument is only used if axis == 1.
4039
- #
4040
4145
  # @return [DataFrame]
4041
4146
  #
4042
4147
  # @example
@@ -4057,15 +4162,38 @@ module Polars
4057
4162
  # # ╞═════╪═════╪══════╡
4058
4163
  # # │ 2.0 ┆ 7.0 ┆ null │
4059
4164
  # # └─────┴─────┴──────┘
4060
- def mean(axis: 0, null_strategy: "ignore")
4061
- case axis
4062
- when 0
4063
- lazy.mean.collect(_eager: true)
4064
- when 1
4065
- Utils.wrap_s(_df.mean_horizontal(null_strategy))
4066
- else
4067
- raise ArgumentError, "Axis should be 0 or 1."
4068
- end
4165
+ def mean
4166
+ lazy.mean.collect(_eager: true)
4167
+ end
4168
+
4169
+ # Take the mean of all values horizontally across columns.
4170
+ #
4171
+ # @param ignore_nulls [Boolean]
4172
+ # Ignore null values (default).
4173
+ # If set to `false`, any null value in the input will lead to a null output.
4174
+ #
4175
+ # @return [Series]
4176
+ #
4177
+ # @example
4178
+ # df = Polars::DataFrame.new(
4179
+ # {
4180
+ # "foo" => [1, 2, 3],
4181
+ # "bar" => [4.0, 5.0, 6.0]
4182
+ # }
4183
+ # )
4184
+ # df.mean_horizontal
4185
+ # # =>
4186
+ # # shape: (3,)
4187
+ # # Series: 'mean' [f64]
4188
+ # # [
4189
+ # # 2.5
4190
+ # # 3.5
4191
+ # # 4.5
4192
+ # # ]
4193
+ def mean_horizontal(ignore_nulls: true)
4194
+ select(
4195
+ mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
4196
+ ).to_series
4069
4197
  end
4070
4198
 
4071
4199
  # Aggregate the columns of this DataFrame to their standard deviation value.