polars-df 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Cargo.lock +588 -456
- data/README.md +37 -2
- data/ext/polars/Cargo.toml +7 -7
- data/ext/polars/src/conversion/mod.rs +31 -21
- data/ext/polars/src/dataframe/general.rs +1 -48
- data/ext/polars/src/dataframe/io.rs +13 -9
- data/ext/polars/src/expr/general.rs +3 -0
- data/ext/polars/src/expr/meta.rs +6 -2
- data/ext/polars/src/file.rs +21 -3
- data/ext/polars/src/functions/aggregation.rs +4 -4
- data/ext/polars/src/functions/io.rs +34 -13
- data/ext/polars/src/functions/lazy.rs +5 -4
- data/ext/polars/src/functions/meta.rs +1 -1
- data/ext/polars/src/interop/arrow/to_ruby.rs +2 -2
- data/ext/polars/src/lazyframe/general.rs +48 -5
- data/ext/polars/src/lib.rs +11 -15
- data/ext/polars/src/series/general.rs +3 -15
- data/ext/polars/src/series/import.rs +1 -1
- data/lib/polars/data_frame.rb +179 -51
- data/lib/polars/data_types.rb +1 -1
- data/lib/polars/functions/aggregation/horizontal.rb +10 -4
- data/lib/polars/functions/lazy.rb +7 -3
- data/lib/polars/io/delta.rb +126 -0
- data/lib/polars/lazy_frame.rb +35 -5
- data/lib/polars/selectors.rb +85 -3
- data/lib/polars/version.rb +1 -1
- data/lib/polars.rb +15 -0
- metadata +5 -8
@@ -330,6 +330,7 @@ impl RbLazyFrame {
|
|
330
330
|
nulls_last: vec![nulls_last],
|
331
331
|
multithreaded,
|
332
332
|
maintain_order,
|
333
|
+
limit: None,
|
333
334
|
},
|
334
335
|
)
|
335
336
|
.into()
|
@@ -353,6 +354,7 @@ impl RbLazyFrame {
|
|
353
354
|
nulls_last,
|
354
355
|
maintain_order,
|
355
356
|
multithreaded,
|
357
|
+
limit: None,
|
356
358
|
},
|
357
359
|
)
|
358
360
|
.into())
|
@@ -379,6 +381,8 @@ impl RbLazyFrame {
|
|
379
381
|
row_group_size: Option<usize>,
|
380
382
|
data_page_size: Option<usize>,
|
381
383
|
maintain_order: bool,
|
384
|
+
cloud_options: Option<Vec<(String, String)>>,
|
385
|
+
retries: usize,
|
382
386
|
) -> RbResult<()> {
|
383
387
|
let compression = parse_parquet_compression(&compression, compression_level)?;
|
384
388
|
|
@@ -390,8 +394,15 @@ impl RbLazyFrame {
|
|
390
394
|
maintain_order,
|
391
395
|
};
|
392
396
|
|
397
|
+
let cloud_options = {
|
398
|
+
let cloud_options =
|
399
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
400
|
+
Some(cloud_options.with_max_retries(retries))
|
401
|
+
};
|
402
|
+
|
393
403
|
let ldf = self.ldf.borrow().clone();
|
394
|
-
ldf.sink_parquet(path, options)
|
404
|
+
ldf.sink_parquet(&path, options, cloud_options)
|
405
|
+
.map_err(RbPolarsErr::from)?;
|
395
406
|
Ok(())
|
396
407
|
}
|
397
408
|
|
@@ -400,14 +411,23 @@ impl RbLazyFrame {
|
|
400
411
|
path: PathBuf,
|
401
412
|
compression: Option<Wrap<IpcCompression>>,
|
402
413
|
maintain_order: bool,
|
414
|
+
cloud_options: Option<Vec<(String, String)>>,
|
415
|
+
retries: usize,
|
403
416
|
) -> RbResult<()> {
|
404
417
|
let options = IpcWriterOptions {
|
405
418
|
compression: compression.map(|c| c.0),
|
406
419
|
maintain_order,
|
407
420
|
};
|
408
421
|
|
422
|
+
let cloud_options = {
|
423
|
+
let cloud_options =
|
424
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
425
|
+
Some(cloud_options.with_max_retries(retries))
|
426
|
+
};
|
427
|
+
|
409
428
|
let ldf = self.ldf.borrow().clone();
|
410
|
-
ldf.sink_ipc(path, options)
|
429
|
+
ldf.sink_ipc(&path, options, cloud_options)
|
430
|
+
.map_err(RbPolarsErr::from)?;
|
411
431
|
Ok(())
|
412
432
|
}
|
413
433
|
|
@@ -430,6 +450,9 @@ impl RbLazyFrame {
|
|
430
450
|
quote_style: Option<Wrap<QuoteStyle>>,
|
431
451
|
maintain_order: bool,
|
432
452
|
) -> RbResult<()> {
|
453
|
+
// TODO
|
454
|
+
let cloud_options = None;
|
455
|
+
|
433
456
|
let quote_style = quote_style.map_or(QuoteStyle::default(), |wrap| wrap.0);
|
434
457
|
let null_value = null_value.unwrap_or(SerializeOptions::default().null);
|
435
458
|
|
@@ -454,16 +477,36 @@ impl RbLazyFrame {
|
|
454
477
|
serialize_options,
|
455
478
|
};
|
456
479
|
|
480
|
+
let cloud_options = {
|
481
|
+
let cloud_options =
|
482
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
483
|
+
Some(cloud_options)
|
484
|
+
};
|
485
|
+
|
457
486
|
let ldf = self.ldf.borrow().clone();
|
458
|
-
ldf.sink_csv(path, options)
|
487
|
+
ldf.sink_csv(&path, options, cloud_options)
|
488
|
+
.map_err(RbPolarsErr::from)?;
|
459
489
|
Ok(())
|
460
490
|
}
|
461
491
|
|
462
|
-
pub fn sink_json(
|
492
|
+
pub fn sink_json(
|
493
|
+
&self,
|
494
|
+
path: PathBuf,
|
495
|
+
maintain_order: bool,
|
496
|
+
cloud_options: Option<Vec<(String, String)>>,
|
497
|
+
retries: usize,
|
498
|
+
) -> RbResult<()> {
|
463
499
|
let options = JsonWriterOptions { maintain_order };
|
464
500
|
|
501
|
+
let cloud_options = {
|
502
|
+
let cloud_options =
|
503
|
+
parse_cloud_options(path.to_str().unwrap(), cloud_options.unwrap_or_default())?;
|
504
|
+
Some(cloud_options.with_max_retries(retries))
|
505
|
+
};
|
506
|
+
|
465
507
|
let ldf = self.ldf.borrow().clone();
|
466
|
-
ldf.sink_json(path, options)
|
508
|
+
ldf.sink_json(&path, options, cloud_options)
|
509
|
+
.map_err(RbPolarsErr::from)?;
|
467
510
|
Ok(())
|
468
511
|
}
|
469
512
|
|
data/ext/polars/src/lib.rs
CHANGED
@@ -72,7 +72,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
72
72
|
class.define_method("write_json", method!(RbDataFrame::write_json, 3))?;
|
73
73
|
class.define_method("write_ndjson", method!(RbDataFrame::write_ndjson, 1))?;
|
74
74
|
class.define_method("write_csv", method!(RbDataFrame::write_csv, 10))?;
|
75
|
-
class.define_method("write_ipc", method!(RbDataFrame::write_ipc,
|
75
|
+
class.define_method("write_ipc", method!(RbDataFrame::write_ipc, 5))?;
|
76
76
|
class.define_method(
|
77
77
|
"write_ipc_stream",
|
78
78
|
method!(RbDataFrame::write_ipc_stream, 3),
|
@@ -143,10 +143,6 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
143
143
|
class.define_method("pivot_expr", method!(RbDataFrame::pivot_expr, 7))?;
|
144
144
|
class.define_method("partition_by", method!(RbDataFrame::partition_by, 3))?;
|
145
145
|
class.define_method("lazy", method!(RbDataFrame::lazy, 0))?;
|
146
|
-
class.define_method("mean_horizontal", method!(RbDataFrame::mean_horizontal, 1))?;
|
147
|
-
class.define_method("max_horizontal", method!(RbDataFrame::max_horizontal, 0))?;
|
148
|
-
class.define_method("min_horizontal", method!(RbDataFrame::min_horizontal, 0))?;
|
149
|
-
class.define_method("sum_horizontal", method!(RbDataFrame::sum_horizontal, 1))?;
|
150
146
|
class.define_method("to_dummies", method!(RbDataFrame::to_dummies, 3))?;
|
151
147
|
class.define_method("null_count", method!(RbDataFrame::null_count, 0))?;
|
152
148
|
class.define_method("map_rows", method!(RbDataFrame::map_rows, 3))?;
|
@@ -568,10 +564,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
568
564
|
class.define_singleton_method("int_range", function!(functions::range::int_range, 4))?;
|
569
565
|
class.define_singleton_method("int_ranges", function!(functions::range::int_ranges, 4))?;
|
570
566
|
class.define_singleton_method("repeat", function!(functions::lazy::repeat, 3))?;
|
571
|
-
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr,
|
567
|
+
class.define_singleton_method("pearson_corr", function!(functions::lazy::pearson_corr, 2))?;
|
572
568
|
class.define_singleton_method(
|
573
569
|
"spearman_rank_corr",
|
574
|
-
function!(functions::lazy::spearman_rank_corr,
|
570
|
+
function!(functions::lazy::spearman_rank_corr, 3),
|
575
571
|
)?;
|
576
572
|
class.define_singleton_method("sql_expr", function!(functions::lazy::sql_expr, 1))?;
|
577
573
|
class.define_singleton_method("cov", function!(functions::lazy::cov, 3))?;
|
@@ -605,11 +601,11 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
605
601
|
)?;
|
606
602
|
class.define_singleton_method(
|
607
603
|
"sum_horizontal",
|
608
|
-
function!(functions::aggregation::sum_horizontal,
|
604
|
+
function!(functions::aggregation::sum_horizontal, 2),
|
609
605
|
)?;
|
610
606
|
class.define_singleton_method(
|
611
607
|
"mean_horizontal",
|
612
|
-
function!(functions::aggregation::mean_horizontal,
|
608
|
+
function!(functions::aggregation::mean_horizontal, 2),
|
613
609
|
)?;
|
614
610
|
class.define_singleton_method("as_struct", function!(functions::lazy::as_struct, 1))?;
|
615
611
|
class.define_singleton_method("coalesce", function!(functions::lazy::coalesce, 1))?;
|
@@ -660,8 +656,8 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
660
656
|
function!(functions::meta::get_index_type, 0),
|
661
657
|
)?;
|
662
658
|
class.define_singleton_method(
|
663
|
-
"
|
664
|
-
function!(functions::meta::
|
659
|
+
"thread_pool_size",
|
660
|
+
function!(functions::meta::thread_pool_size, 0),
|
665
661
|
)?;
|
666
662
|
class.define_singleton_method(
|
667
663
|
"enable_string_cache",
|
@@ -747,10 +743,10 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
747
743
|
class.define_method("sort_by_exprs", method!(RbLazyFrame::sort_by_exprs, 5))?;
|
748
744
|
class.define_method("cache", method!(RbLazyFrame::cache, 0))?;
|
749
745
|
class.define_method("collect", method!(RbLazyFrame::collect, 0))?;
|
750
|
-
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet,
|
751
|
-
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc,
|
746
|
+
class.define_method("sink_parquet", method!(RbLazyFrame::sink_parquet, 9))?;
|
747
|
+
class.define_method("sink_ipc", method!(RbLazyFrame::sink_ipc, 5))?;
|
752
748
|
class.define_method("sink_csv", method!(RbLazyFrame::sink_csv, 15))?;
|
753
|
-
class.define_method("sink_json", method!(RbLazyFrame::sink_json,
|
749
|
+
class.define_method("sink_json", method!(RbLazyFrame::sink_json, 4))?;
|
754
750
|
class.define_method("fetch", method!(RbLazyFrame::fetch, 1))?;
|
755
751
|
class.define_method("filter", method!(RbLazyFrame::filter, 1))?;
|
756
752
|
class.define_method("select", method!(RbLazyFrame::select, 1))?;
|
@@ -1109,7 +1105,7 @@ fn init(ruby: &Ruby) -> RbResult<()> {
|
|
1109
1105
|
class.define_singleton_method("hold", function!(RbStringCacheHolder::hold, 0))?;
|
1110
1106
|
|
1111
1107
|
// arrow array stream
|
1112
|
-
let class = module.define_class("
|
1108
|
+
let class = module.define_class("ArrowArrayStream", ruby.class_object())?;
|
1113
1109
|
class.define_method("to_i", method!(RbArrowArrayStream::to_i, 0))?;
|
1114
1110
|
|
1115
1111
|
Ok(())
|
@@ -97,29 +97,17 @@ impl RbSeries {
|
|
97
97
|
}
|
98
98
|
|
99
99
|
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
|
100
|
-
let out = self
|
101
|
-
.series
|
102
|
-
.borrow()
|
103
|
-
.bitand(&other.series.borrow())
|
104
|
-
.map_err(RbPolarsErr::from)?;
|
100
|
+
let out = (&*self.series.borrow() & &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
105
101
|
Ok(out.into())
|
106
102
|
}
|
107
103
|
|
108
104
|
pub fn bitor(&self, other: &RbSeries) -> RbResult<Self> {
|
109
|
-
let out = self
|
110
|
-
.series
|
111
|
-
.borrow()
|
112
|
-
.bitor(&other.series.borrow())
|
113
|
-
.map_err(RbPolarsErr::from)?;
|
105
|
+
let out = (&*self.series.borrow() | &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
114
106
|
Ok(out.into())
|
115
107
|
}
|
116
108
|
|
117
109
|
pub fn bitxor(&self, other: &RbSeries) -> RbResult<Self> {
|
118
|
-
let out = self
|
119
|
-
.series
|
120
|
-
.borrow()
|
121
|
-
.bitxor(&other.series.borrow())
|
122
|
-
.map_err(RbPolarsErr::from)?;
|
110
|
+
let out = (&*self.series.borrow() ^ &*other.series.borrow()).map_err(RbPolarsErr::from)?;
|
123
111
|
Ok(out.into())
|
124
112
|
}
|
125
113
|
|
@@ -39,7 +39,7 @@ pub(crate) fn import_stream_rbcapsule(capsule: Value) -> RbResult<RbSeries> {
|
|
39
39
|
|
40
40
|
// Series::try_from fails for an empty vec of chunks
|
41
41
|
let s = if produced_arrays.is_empty() {
|
42
|
-
let polars_dt = DataType::
|
42
|
+
let polars_dt = DataType::from_arrow_field(stream.field());
|
43
43
|
Series::new_empty(stream.field().name.clone(), &polars_dt)
|
44
44
|
} else {
|
45
45
|
Series::try_from((stream.field(), produced_arrays)).unwrap()
|
data/lib/polars/data_frame.rb
CHANGED
@@ -831,7 +831,13 @@ module Polars
|
|
831
831
|
# Compression method. Defaults to "uncompressed".
|
832
832
|
#
|
833
833
|
# @return [nil]
|
834
|
-
def write_ipc(
|
834
|
+
def write_ipc(
|
835
|
+
file,
|
836
|
+
compression: "uncompressed",
|
837
|
+
compat_level: nil,
|
838
|
+
storage_options: nil,
|
839
|
+
retries: 2
|
840
|
+
)
|
835
841
|
return_bytes = file.nil?
|
836
842
|
if return_bytes
|
837
843
|
file = StringIO.new
|
@@ -849,7 +855,13 @@ module Polars
|
|
849
855
|
compression = "uncompressed"
|
850
856
|
end
|
851
857
|
|
852
|
-
|
858
|
+
if storage_options&.any?
|
859
|
+
storage_options = storage_options.to_a
|
860
|
+
else
|
861
|
+
storage_options = nil
|
862
|
+
end
|
863
|
+
|
864
|
+
_df.write_ipc(file, compression, compat_level, storage_options, retries)
|
853
865
|
return_bytes ? file.string : nil
|
854
866
|
end
|
855
867
|
|
@@ -961,6 +973,61 @@ module Polars
|
|
961
973
|
)
|
962
974
|
end
|
963
975
|
|
976
|
+
# Write DataFrame as delta table.
|
977
|
+
#
|
978
|
+
# @param target [Object]
|
979
|
+
# URI of a table or a DeltaTable object.
|
980
|
+
# @param mode ["error", "append", "overwrite", "ignore", "merge"]
|
981
|
+
# How to handle existing data.
|
982
|
+
# @param storage_options [Hash]
|
983
|
+
# Extra options for the storage backends supported by `deltalake-rb`.
|
984
|
+
# @param delta_write_options [Hash]
|
985
|
+
# Additional keyword arguments while writing a Delta lake Table.
|
986
|
+
# @param delta_merge_options [Hash]
|
987
|
+
# Keyword arguments which are required to `MERGE` a Delta lake Table.
|
988
|
+
#
|
989
|
+
# @return [nil]
|
990
|
+
def write_delta(
|
991
|
+
target,
|
992
|
+
mode: "error",
|
993
|
+
storage_options: nil,
|
994
|
+
delta_write_options: nil,
|
995
|
+
delta_merge_options: nil
|
996
|
+
)
|
997
|
+
Polars.send(:_check_if_delta_available)
|
998
|
+
|
999
|
+
if Utils.pathlike?(target)
|
1000
|
+
target = Polars.send(:_resolve_delta_lake_uri, target.to_s, strict: false)
|
1001
|
+
end
|
1002
|
+
|
1003
|
+
data = self
|
1004
|
+
|
1005
|
+
if mode == "merge"
|
1006
|
+
if delta_merge_options.nil?
|
1007
|
+
msg = "You need to pass delta_merge_options with at least a given predicate for `MERGE` to work."
|
1008
|
+
raise ArgumentError, msg
|
1009
|
+
end
|
1010
|
+
if target.is_a?(::String)
|
1011
|
+
dt = DeltaLake::Table.new(target, storage_options: storage_options)
|
1012
|
+
else
|
1013
|
+
dt = target
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
predicate = delta_merge_options.delete(:predicate)
|
1017
|
+
dt.merge(data, predicate, **delta_merge_options)
|
1018
|
+
else
|
1019
|
+
delta_write_options ||= {}
|
1020
|
+
|
1021
|
+
DeltaLake.write(
|
1022
|
+
target,
|
1023
|
+
data,
|
1024
|
+
mode: mode,
|
1025
|
+
storage_options: storage_options,
|
1026
|
+
**delta_write_options
|
1027
|
+
)
|
1028
|
+
end
|
1029
|
+
end
|
1030
|
+
|
964
1031
|
# Return an estimation of the total (heap) allocated size of the DataFrame.
|
965
1032
|
#
|
966
1033
|
# Estimated size is given in the specified unit (bytes by default).
|
@@ -3939,14 +4006,32 @@ module Polars
|
|
3939
4006
|
# # ╞═════╪═════╪═════╡
|
3940
4007
|
# # │ 3 ┆ 8 ┆ c │
|
3941
4008
|
# # └─────┴─────┴─────┘
|
3942
|
-
def max
|
3943
|
-
|
3944
|
-
|
3945
|
-
|
3946
|
-
|
3947
|
-
|
3948
|
-
|
3949
|
-
|
4009
|
+
def max
|
4010
|
+
lazy.max.collect(_eager: true)
|
4011
|
+
end
|
4012
|
+
|
4013
|
+
# Get the maximum value horizontally across columns.
|
4014
|
+
#
|
4015
|
+
# @return [Series]
|
4016
|
+
#
|
4017
|
+
# @example
|
4018
|
+
# df = Polars::DataFrame.new(
|
4019
|
+
# {
|
4020
|
+
# "foo" => [1, 2, 3],
|
4021
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4022
|
+
# }
|
4023
|
+
# )
|
4024
|
+
# df.max_horizontal
|
4025
|
+
# # =>
|
4026
|
+
# # shape: (3,)
|
4027
|
+
# # Series: 'max' [f64]
|
4028
|
+
# # [
|
4029
|
+
# # 4.0
|
4030
|
+
# # 5.0
|
4031
|
+
# # 6.0
|
4032
|
+
# # ]
|
4033
|
+
def max_horizontal
|
4034
|
+
select(max: F.max_horizontal(F.all)).to_series
|
3950
4035
|
end
|
3951
4036
|
|
3952
4037
|
# Aggregate the columns of this DataFrame to their minimum value.
|
@@ -3971,22 +4056,35 @@ module Polars
|
|
3971
4056
|
# # ╞═════╪═════╪═════╡
|
3972
4057
|
# # │ 1 ┆ 6 ┆ a │
|
3973
4058
|
# # └─────┴─────┴─────┘
|
3974
|
-
def min
|
3975
|
-
|
3976
|
-
lazy.min.collect(_eager: true)
|
3977
|
-
elsif axis == 1
|
3978
|
-
Utils.wrap_s(_df.min_horizontal)
|
3979
|
-
else
|
3980
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
3981
|
-
end
|
4059
|
+
def min
|
4060
|
+
lazy.min.collect(_eager: true)
|
3982
4061
|
end
|
3983
4062
|
|
3984
|
-
#
|
4063
|
+
# Get the minimum value horizontally across columns.
|
3985
4064
|
#
|
3986
|
-
# @
|
3987
|
-
#
|
3988
|
-
# @
|
3989
|
-
#
|
4065
|
+
# @return [Series]
|
4066
|
+
#
|
4067
|
+
# @example
|
4068
|
+
# df = Polars::DataFrame.new(
|
4069
|
+
# {
|
4070
|
+
# "foo" => [1, 2, 3],
|
4071
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4072
|
+
# }
|
4073
|
+
# )
|
4074
|
+
# df.min_horizontal
|
4075
|
+
# # =>
|
4076
|
+
# # shape: (3,)
|
4077
|
+
# # Series: 'min' [f64]
|
4078
|
+
# # [
|
4079
|
+
# # 1.0
|
4080
|
+
# # 2.0
|
4081
|
+
# # 3.0
|
4082
|
+
# # ]
|
4083
|
+
def min_horizontal
|
4084
|
+
select(min: F.min_horizontal(F.all)).to_series
|
4085
|
+
end
|
4086
|
+
|
4087
|
+
# Aggregate the columns of this DataFrame to their sum value.
|
3990
4088
|
#
|
3991
4089
|
# @return [DataFrame]
|
3992
4090
|
#
|
@@ -4008,35 +4106,42 @@ module Polars
|
|
4008
4106
|
# # ╞═════╪═════╪══════╡
|
4009
4107
|
# # │ 6 ┆ 21 ┆ null │
|
4010
4108
|
# # └─────┴─────┴──────┘
|
4109
|
+
def sum
|
4110
|
+
lazy.sum.collect(_eager: true)
|
4111
|
+
end
|
4112
|
+
|
4113
|
+
# Sum all values horizontally across columns.
|
4114
|
+
#
|
4115
|
+
# @param ignore_nulls [Boolean]
|
4116
|
+
# Ignore null values (default).
|
4117
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4118
|
+
#
|
4119
|
+
# @return [Series]
|
4011
4120
|
#
|
4012
4121
|
# @example
|
4013
|
-
# df.
|
4122
|
+
# df = Polars::DataFrame.new(
|
4123
|
+
# {
|
4124
|
+
# "foo" => [1, 2, 3],
|
4125
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4126
|
+
# }
|
4127
|
+
# )
|
4128
|
+
# df.sum_horizontal
|
4014
4129
|
# # =>
|
4015
4130
|
# # shape: (3,)
|
4016
|
-
# # Series: '
|
4131
|
+
# # Series: 'sum' [f64]
|
4017
4132
|
# # [
|
4018
|
-
# #
|
4019
|
-
# #
|
4020
|
-
# #
|
4133
|
+
# # 5.0
|
4134
|
+
# # 7.0
|
4135
|
+
# # 9.0
|
4021
4136
|
# # ]
|
4022
|
-
def
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
when 1
|
4027
|
-
Utils.wrap_s(_df.sum_horizontal(null_strategy))
|
4028
|
-
else
|
4029
|
-
raise ArgumentError, "Axis should be 0 or 1."
|
4030
|
-
end
|
4137
|
+
def sum_horizontal(ignore_nulls: true)
|
4138
|
+
select(
|
4139
|
+
sum: F.sum_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4140
|
+
).to_series
|
4031
4141
|
end
|
4032
4142
|
|
4033
4143
|
# Aggregate the columns of this DataFrame to their mean value.
|
4034
4144
|
#
|
4035
|
-
# @param axis [Integer]
|
4036
|
-
# Either 0 or 1.
|
4037
|
-
# @param null_strategy ["ignore", "propagate"]
|
4038
|
-
# This argument is only used if axis == 1.
|
4039
|
-
#
|
4040
4145
|
# @return [DataFrame]
|
4041
4146
|
#
|
4042
4147
|
# @example
|
@@ -4057,15 +4162,38 @@ module Polars
|
|
4057
4162
|
# # ╞═════╪═════╪══════╡
|
4058
4163
|
# # │ 2.0 ┆ 7.0 ┆ null │
|
4059
4164
|
# # └─────┴─────┴──────┘
|
4060
|
-
def mean
|
4061
|
-
|
4062
|
-
|
4063
|
-
|
4064
|
-
|
4065
|
-
|
4066
|
-
|
4067
|
-
|
4068
|
-
|
4165
|
+
def mean
|
4166
|
+
lazy.mean.collect(_eager: true)
|
4167
|
+
end
|
4168
|
+
|
4169
|
+
# Take the mean of all values horizontally across columns.
|
4170
|
+
#
|
4171
|
+
# @param ignore_nulls [Boolean]
|
4172
|
+
# Ignore null values (default).
|
4173
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
4174
|
+
#
|
4175
|
+
# @return [Series]
|
4176
|
+
#
|
4177
|
+
# @example
|
4178
|
+
# df = Polars::DataFrame.new(
|
4179
|
+
# {
|
4180
|
+
# "foo" => [1, 2, 3],
|
4181
|
+
# "bar" => [4.0, 5.0, 6.0]
|
4182
|
+
# }
|
4183
|
+
# )
|
4184
|
+
# df.mean_horizontal
|
4185
|
+
# # =>
|
4186
|
+
# # shape: (3,)
|
4187
|
+
# # Series: 'mean' [f64]
|
4188
|
+
# # [
|
4189
|
+
# # 2.5
|
4190
|
+
# # 3.5
|
4191
|
+
# # 4.5
|
4192
|
+
# # ]
|
4193
|
+
def mean_horizontal(ignore_nulls: true)
|
4194
|
+
select(
|
4195
|
+
mean: F.mean_horizontal(F.all, ignore_nulls: ignore_nulls)
|
4196
|
+
).to_series
|
4069
4197
|
end
|
4070
4198
|
|
4071
4199
|
# Aggregate the columns of this DataFrame to their standard deviation value.
|
data/lib/polars/data_types.rb
CHANGED
@@ -143,6 +143,9 @@ module Polars
|
|
143
143
|
# @param exprs [Array]
|
144
144
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
145
145
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
146
|
+
# @param ignore_nulls [Boolean]
|
147
|
+
# Ignore null values (default).
|
148
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
146
149
|
#
|
147
150
|
# @return [Expr]
|
148
151
|
#
|
@@ -166,9 +169,9 @@ module Polars
|
|
166
169
|
# # │ 8 ┆ 5 ┆ y ┆ 13 │
|
167
170
|
# # │ 3 ┆ null ┆ z ┆ 3 │
|
168
171
|
# # └─────┴──────┴─────┴─────┘
|
169
|
-
def sum_horizontal(*exprs)
|
172
|
+
def sum_horizontal(*exprs, ignore_nulls: true)
|
170
173
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
171
|
-
Utils.wrap_expr(Plr.sum_horizontal(rbexprs))
|
174
|
+
Utils.wrap_expr(Plr.sum_horizontal(rbexprs, ignore_nulls))
|
172
175
|
end
|
173
176
|
|
174
177
|
# Compute the mean of all values horizontally across columns.
|
@@ -176,6 +179,9 @@ module Polars
|
|
176
179
|
# @param exprs [Array]
|
177
180
|
# Column(s) to use in the aggregation. Accepts expression input. Strings are
|
178
181
|
# parsed as column names, other non-expression inputs are parsed as literals.
|
182
|
+
# @param ignore_nulls [Boolean]
|
183
|
+
# Ignore null values (default).
|
184
|
+
# If set to `false`, any null value in the input will lead to a null output.
|
179
185
|
#
|
180
186
|
# @return [Expr]
|
181
187
|
#
|
@@ -199,9 +205,9 @@ module Polars
|
|
199
205
|
# # │ 8 ┆ 5 ┆ y ┆ 6.5 │
|
200
206
|
# # │ 3 ┆ null ┆ z ┆ 3.0 │
|
201
207
|
# # └─────┴──────┴─────┴──────┘
|
202
|
-
def mean_horizontal(*exprs)
|
208
|
+
def mean_horizontal(*exprs, ignore_nulls: true)
|
203
209
|
rbexprs = Utils.parse_into_list_of_expressions(*exprs)
|
204
|
-
Utils.wrap_expr(Plr.mean_horizontal(rbexprs))
|
210
|
+
Utils.wrap_expr(Plr.mean_horizontal(rbexprs, ignore_nulls))
|
205
211
|
end
|
206
212
|
|
207
213
|
# Cumulatively sum all values horizontally across columns.
|
@@ -729,16 +729,20 @@ module Polars
|
|
729
729
|
a,
|
730
730
|
b,
|
731
731
|
method: "pearson",
|
732
|
-
ddof:
|
732
|
+
ddof: nil,
|
733
733
|
propagate_nans: false
|
734
734
|
)
|
735
|
+
if !ddof.nil?
|
736
|
+
warn "The `ddof` parameter has no effect. Do not use it."
|
737
|
+
end
|
738
|
+
|
735
739
|
a = Utils.parse_into_expression(a)
|
736
740
|
b = Utils.parse_into_expression(b)
|
737
741
|
|
738
742
|
if method == "pearson"
|
739
|
-
Utils.wrap_expr(Plr.pearson_corr(a, b
|
743
|
+
Utils.wrap_expr(Plr.pearson_corr(a, b))
|
740
744
|
elsif method == "spearman"
|
741
|
-
Utils.wrap_expr(Plr.spearman_rank_corr(a, b,
|
745
|
+
Utils.wrap_expr(Plr.spearman_rank_corr(a, b, propagate_nans))
|
742
746
|
else
|
743
747
|
msg = "method must be one of {{'pearson', 'spearman'}}, got #{method}"
|
744
748
|
raise ArgumentError, msg
|