enumerable-statistics 2.0.6 → 2.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yml +20 -3
- data/CHANGELOG.md +10 -0
- data/ext/enumerable/statistics/extension/statistics.c +242 -104
- data/lib/enumerable_statistics/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 815b5f1b539e61935a709b7f859aeae73dd379959d8aa5e56551b650d20c7abc
|
4
|
+
data.tar.gz: be09795e4203a023e23a20b99c2bdd051ace5292ea7c67d6000cda0c3ed7c822
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b2af05ed047b86529af90b7c18091d04055b1ded985614a6e4c7422706bd1453bb993a3b84e4b2821c8ff725efc62edb09505e1f3e59865faa71921296cd45d
|
7
|
+
data.tar.gz: 97d2f6044a4ebb1a1ca4d068f999192c3255bf8213262c6d038007404be79ca36cc6eaeefab0830e0aa45a658d4e083024ad3a116040ee75e9acbda31984b69a
|
data/.github/workflows/ci.yml
CHANGED
@@ -16,6 +16,9 @@ jobs:
|
|
16
16
|
- macos-latest
|
17
17
|
- windows-latest
|
18
18
|
ruby:
|
19
|
+
- 3.3
|
20
|
+
- 3.2
|
21
|
+
- 3.1
|
19
22
|
- 3.0
|
20
23
|
- 2.7
|
21
24
|
- 2.6
|
@@ -23,8 +26,10 @@ jobs:
|
|
23
26
|
- 2.4
|
24
27
|
- debug
|
25
28
|
exclude:
|
26
|
-
- os:
|
27
|
-
ruby:
|
29
|
+
- os: macos-latest
|
30
|
+
ruby: 2.5
|
31
|
+
- os: macos-latest
|
32
|
+
ruby: 2.4
|
28
33
|
- os: windows-latest
|
29
34
|
ruby: debug
|
30
35
|
|
@@ -36,7 +41,19 @@ jobs:
|
|
36
41
|
with:
|
37
42
|
ruby-version: ${{ matrix.ruby }}
|
38
43
|
|
39
|
-
-
|
44
|
+
- name: Detect installable bundler version
|
45
|
+
run: |
|
46
|
+
case "${{ matrix.ruby }}" in
|
47
|
+
2.7|2.6) bundler_version="2.4.22" ;;
|
48
|
+
2.5|2.4) bundler_version="2.3.27" ;;
|
49
|
+
*) bundler_version="" ;;
|
50
|
+
esac
|
51
|
+
echo "bundler_version=$bundler_version" >> $GITHUB_ENV
|
52
|
+
shell: bash
|
53
|
+
|
54
|
+
- run: gem install bundler${bundler_version:+ -v $bundler_version}
|
55
|
+
shell: bash
|
56
|
+
|
40
57
|
- run: bundle install
|
41
58
|
|
42
59
|
- run: rake --trace compile
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
# 2.0.8
|
2
|
+
|
3
|
+
- Prohibit the use of both `nbins` and `edges` kwargs simultaneously in the `histogram` method.
|
4
|
+
- Support `skip_na` kwarg in `sum` and related methods.
|
5
|
+
- Support Ruby 3.4+.
|
6
|
+
|
7
|
+
# 2.0.7
|
8
|
+
|
9
|
+
- Fix the bug of histogram with bin range that is smaller than value range
|
10
|
+
|
1
11
|
# 2.0.6
|
2
12
|
|
3
13
|
- Add edges parameter in histogram
|
@@ -96,11 +96,14 @@ static VALUE half_in_rational;
|
|
96
96
|
static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
|
97
97
|
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
|
98
98
|
static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
|
99
|
+
static ID id_skip_na;
|
99
100
|
|
100
|
-
static VALUE sym_auto, sym_left, sym_right;
|
101
|
+
static VALUE sym_auto, sym_left, sym_right, sym_sturges;
|
101
102
|
|
102
103
|
static VALUE cHistogram;
|
103
104
|
|
105
|
+
static VALUE orig_enum_sum, orig_ary_sum;
|
106
|
+
|
104
107
|
inline static VALUE
|
105
108
|
f_add(VALUE x, VALUE y)
|
106
109
|
{
|
@@ -135,7 +138,7 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
|
|
135
138
|
{
|
136
139
|
assert(!RB_TYPE_P(real, T_COMPLEX));
|
137
140
|
|
138
|
-
|
141
|
+
VALUE obj = rb_get_alloc_func(klass)(klass);
|
139
142
|
|
140
143
|
RCOMPLEX_SET_REAL(obj, real);
|
141
144
|
RCOMPLEX_SET_IMAG(obj, imag);
|
@@ -535,9 +538,11 @@ f_gcd(VALUE x, VALUE y)
|
|
535
538
|
inline static VALUE
|
536
539
|
nurat_s_new_internal(VALUE klass, VALUE num, VALUE den)
|
537
540
|
{
|
538
|
-
|
541
|
+
VALUE obj = rb_get_alloc_func(klass)(klass);
|
542
|
+
|
539
543
|
RRATIONAL_SET_NUM(obj, num);
|
540
544
|
RRATIONAL_SET_DEN(obj, den);
|
545
|
+
|
541
546
|
return (VALUE)obj;
|
542
547
|
}
|
543
548
|
|
@@ -632,39 +637,67 @@ rb_rational_plus(VALUE self, VALUE other)
|
|
632
637
|
}
|
633
638
|
#endif
|
634
639
|
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
640
|
+
static inline int
|
641
|
+
is_na(VALUE v)
|
642
|
+
{
|
643
|
+
if (NIL_P(v))
|
644
|
+
return 1;
|
645
|
+
|
646
|
+
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
647
|
+
return 1;
|
648
|
+
|
649
|
+
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
650
|
+
return 1;
|
651
|
+
|
652
|
+
return 0;
|
653
|
+
}
|
654
|
+
|
655
|
+
static int opt_skip_na(VALUE opts)
|
656
|
+
{
|
657
|
+
VALUE skip_na = Qfalse;
|
658
|
+
|
659
|
+
if (!NIL_P(opts)) {
|
660
|
+
#ifdef HAVE_RB_GET_KWARGS
|
661
|
+
ID kwargs = id_skip_na;
|
662
|
+
rb_get_kwargs(opts, &kwargs, 0, 1, &skip_na);
|
663
|
+
#else
|
664
|
+
VALUE val = rb_hash_aref(opts, ID2SYM(id_skip_na));
|
665
|
+
skip_na = NIL_P(val) ? skip_na : val;
|
666
|
+
#endif
|
667
|
+
}
|
668
|
+
|
669
|
+
return RTEST(skip_na);
|
670
|
+
}
|
671
|
+
|
672
|
+
VALUE
|
673
|
+
ary_calculate_sum(VALUE ary, VALUE init, int skip_na, long *na_count_out)
|
649
674
|
{
|
650
675
|
VALUE e, v, r;
|
651
676
|
long i, n;
|
652
677
|
int block_given;
|
653
|
-
|
654
|
-
if (rb_scan_args(argc, argv, "01", &v) == 0)
|
655
|
-
v = LONG2FIX(0);
|
678
|
+
long na_count = 0;
|
656
679
|
|
657
680
|
block_given = rb_block_given_p();
|
658
681
|
|
659
|
-
if (RARRAY_LEN(ary) == 0)
|
660
|
-
|
682
|
+
if (RARRAY_LEN(ary) == 0) {
|
683
|
+
if (na_count_out != NULL) {
|
684
|
+
*na_count_out = 0;
|
685
|
+
}
|
686
|
+
return init;
|
687
|
+
}
|
661
688
|
|
662
689
|
n = 0;
|
663
690
|
r = Qundef;
|
691
|
+
v = init;
|
664
692
|
for (i = 0; i < RARRAY_LEN(ary); i++) {
|
665
693
|
e = RARRAY_AREF(ary, i);
|
666
694
|
if (block_given)
|
667
695
|
e = rb_yield(e);
|
696
|
+
if (skip_na && is_na(e)) {
|
697
|
+
++na_count;
|
698
|
+
continue;
|
699
|
+
}
|
700
|
+
|
668
701
|
if (FIXNUM_P(e)) {
|
669
702
|
n += FIX2LONG(e); /* should not overflow long type */
|
670
703
|
if (!FIXABLE(n)) {
|
@@ -688,7 +721,7 @@ ary_sum(int argc, VALUE* argv, VALUE ary)
|
|
688
721
|
v = rb_fix_plus(LONG2FIX(n), v);
|
689
722
|
if (r != Qundef)
|
690
723
|
v = rb_rational_plus(r, v);
|
691
|
-
|
724
|
+
goto finish;
|
692
725
|
|
693
726
|
not_exact:
|
694
727
|
if (n != 0)
|
@@ -708,6 +741,11 @@ not_exact:
|
|
708
741
|
e = RARRAY_AREF(ary, i);
|
709
742
|
if (block_given)
|
710
743
|
e = rb_yield(e);
|
744
|
+
if (skip_na && is_na(e)) {
|
745
|
+
++na_count;
|
746
|
+
continue;
|
747
|
+
}
|
748
|
+
|
711
749
|
if (RB_FLOAT_TYPE_P(e))
|
712
750
|
has_float_value:
|
713
751
|
x = RFLOAT_VALUE(e);
|
@@ -725,7 +763,9 @@ not_exact:
|
|
725
763
|
c = (t - f) - y;
|
726
764
|
f = t;
|
727
765
|
}
|
728
|
-
|
766
|
+
|
767
|
+
v = DBL2NUM(f);
|
768
|
+
goto finish;
|
729
769
|
|
730
770
|
not_float:
|
731
771
|
v = DBL2NUM(f);
|
@@ -736,13 +776,53 @@ not_exact:
|
|
736
776
|
e = RARRAY_AREF(ary, i);
|
737
777
|
if (block_given)
|
738
778
|
e = rb_yield(e);
|
779
|
+
if (skip_na && is_na(e)) {
|
780
|
+
++na_count;
|
781
|
+
continue;
|
782
|
+
}
|
739
783
|
has_some_value:
|
740
784
|
v = rb_funcall(v, idPLUS, 1, e);
|
741
785
|
}
|
742
786
|
|
787
|
+
finish:
|
788
|
+
if (na_count_out != NULL) {
|
789
|
+
*na_count_out = na_count;
|
790
|
+
}
|
743
791
|
return v;
|
744
792
|
}
|
745
793
|
|
794
|
+
/* call-seq:
|
795
|
+
* ary.sum(skip_na: false)
|
796
|
+
*
|
797
|
+
* Calculate the sum of the values in `ary`.
|
798
|
+
* This method utilizes
|
799
|
+
* [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
|
800
|
+
* to compensate the result precision when the `ary` includes Float values.
|
801
|
+
*
|
802
|
+
* Note that This library does not redefine `sum` method introduced in Ruby 2.4.
|
803
|
+
*
|
804
|
+
* @return [Number] A summation value
|
805
|
+
*/
|
806
|
+
static VALUE
|
807
|
+
ary_sum(int argc, VALUE* argv, VALUE ary)
|
808
|
+
{
|
809
|
+
VALUE v, opts;
|
810
|
+
int skip_na;
|
811
|
+
|
812
|
+
if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) {
|
813
|
+
v = LONG2FIX(0);
|
814
|
+
}
|
815
|
+
skip_na = opt_skip_na(opts);
|
816
|
+
|
817
|
+
#ifndef HAVE_ENUM_SUM
|
818
|
+
if (!skip_na) {
|
819
|
+
return rb_funcall(orig_ary_sum, rb_intern("call"), argc, &v);
|
820
|
+
}
|
821
|
+
#endif
|
822
|
+
|
823
|
+
return ary_calculate_sum(ary, v, skip_na, NULL);
|
824
|
+
}
|
825
|
+
|
746
826
|
static void
|
747
827
|
calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
|
748
828
|
{
|
@@ -771,9 +851,10 @@ calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
|
|
771
851
|
}
|
772
852
|
|
773
853
|
static void
|
774
|
-
ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
854
|
+
ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof, int skip_na)
|
775
855
|
{
|
776
856
|
long i;
|
857
|
+
long na_count;
|
777
858
|
size_t n = 0;
|
778
859
|
double m = 0.0, m2 = 0.0, f = 0.0, c = 0.0;
|
779
860
|
|
@@ -797,8 +878,8 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
797
878
|
|
798
879
|
if (variance_ptr == NULL) {
|
799
880
|
VALUE init = DBL2NUM(0.0);
|
800
|
-
VALUE const sum =
|
801
|
-
long const n = RARRAY_LEN(ary);
|
881
|
+
VALUE const sum = ary_calculate_sum(ary, init, skip_na, &na_count);
|
882
|
+
long const n = RARRAY_LEN(ary) - na_count;
|
802
883
|
calculate_and_set_mean(mean_ptr, sum, n);
|
803
884
|
return;
|
804
885
|
}
|
@@ -839,26 +920,46 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
839
920
|
}
|
840
921
|
}
|
841
922
|
|
842
|
-
|
843
|
-
|
923
|
+
struct variance_opts {
|
924
|
+
int population;
|
925
|
+
int skip_na;
|
926
|
+
};
|
927
|
+
|
928
|
+
static void
|
929
|
+
get_variance_opts(VALUE opts, struct variance_opts *out)
|
844
930
|
{
|
845
|
-
|
931
|
+
assert(out != NULL);
|
932
|
+
|
933
|
+
out->population = 0;
|
934
|
+
out->skip_na = 0;
|
846
935
|
|
847
936
|
if (!NIL_P(opts)) {
|
848
937
|
#ifdef HAVE_RB_GET_KWARGS
|
849
|
-
ID
|
850
|
-
|
938
|
+
static ID kwarg_keys[2];
|
939
|
+
VALUE kwarg_vals;
|
940
|
+
|
941
|
+
if (!kwarg_keys[0]) {
|
942
|
+
kwarg_keys[0] = id_population;
|
943
|
+
kwarg_keys[1] = id_skip_na;
|
944
|
+
}
|
945
|
+
|
946
|
+
rb_get_kwargs(opts, &kwarg_keys, 0, 2, kwarg_vals);
|
947
|
+
out->population = (kwarg_vals[0] != Qundef) ? RTEST(kwarg_vals[0]) : out->population;
|
948
|
+
out->skip_na = (kwarg_vals[1] != Qundef) ? RTEST(kwarg_vals[1]) : out->skip_na;
|
851
949
|
#else
|
852
|
-
VALUE val
|
853
|
-
|
950
|
+
VALUE val;
|
951
|
+
|
952
|
+
val = rb_hash_aref(opts, ID2SYM(id_population));
|
953
|
+
out->population = NIL_P(val) ? out->population : RTEST(val);
|
954
|
+
|
955
|
+
val = rb_hash_aref(opts, ID2SYM(id_skip_na));
|
956
|
+
out->skip_na = NIL_P(val) ? out->skip_na : RTEST(val);
|
854
957
|
#endif
|
855
958
|
}
|
856
|
-
|
857
|
-
return RTEST(population);
|
858
959
|
}
|
859
960
|
|
860
961
|
/* call-seq:
|
861
|
-
* ary.mean_variance(population: false)
|
962
|
+
* ary.mean_variance(population: false, skip_na: false)
|
862
963
|
*
|
863
964
|
* Calculate a mean and a variance of the values in `ary`.
|
864
965
|
* The first element of the result array is the mean, and the second is the variance.
|
@@ -876,19 +977,21 @@ opt_population_p(VALUE opts)
|
|
876
977
|
static VALUE
|
877
978
|
ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
|
878
979
|
{
|
879
|
-
|
980
|
+
struct variance_opts options;
|
981
|
+
VALUE opts, mean = Qnil, variance = Qnil;
|
880
982
|
size_t ddof = 1;
|
881
983
|
|
882
984
|
rb_scan_args(argc, argv, "0:", &opts);
|
883
|
-
|
985
|
+
get_variance_opts(opts, &options);
|
986
|
+
if (options.population)
|
884
987
|
ddof = 0;
|
885
988
|
|
886
|
-
ary_mean_variance(ary, &mean, &variance, ddof);
|
989
|
+
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
|
887
990
|
return rb_assoc_new(mean, variance);
|
888
991
|
}
|
889
992
|
|
890
993
|
/* call-seq:
|
891
|
-
* ary.mean
|
994
|
+
* ary.mean(skip_na: false)
|
892
995
|
*
|
893
996
|
* Calculate a mean of the values in `ary`.
|
894
997
|
* This method utilizes
|
@@ -898,15 +1001,20 @@ ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
|
|
898
1001
|
* @return [Number] A mean value
|
899
1002
|
*/
|
900
1003
|
static VALUE
|
901
|
-
ary_mean(VALUE ary)
|
1004
|
+
ary_mean(int argc, VALUE *argv, VALUE ary)
|
902
1005
|
{
|
903
|
-
VALUE mean;
|
904
|
-
|
1006
|
+
VALUE mean = Qnil, opts;
|
1007
|
+
int skip_na;
|
1008
|
+
|
1009
|
+
rb_scan_args(argc, argv, ":", &opts);
|
1010
|
+
skip_na = opt_skip_na(opts);
|
1011
|
+
|
1012
|
+
ary_mean_variance(ary, &mean, NULL, 1, skip_na);
|
905
1013
|
return mean;
|
906
1014
|
}
|
907
1015
|
|
908
1016
|
/* call-seq:
|
909
|
-
* ary.variance(population: false)
|
1017
|
+
* ary.variance(population: false, skip_na: false)
|
910
1018
|
*
|
911
1019
|
* Calculate a variance of the values in `ary`.
|
912
1020
|
* This method scan values in `ary` only once,
|
@@ -922,14 +1030,16 @@ ary_mean(VALUE ary)
|
|
922
1030
|
static VALUE
|
923
1031
|
ary_variance(int argc, VALUE* argv, VALUE ary)
|
924
1032
|
{
|
1033
|
+
struct variance_opts options;
|
925
1034
|
VALUE opts, variance;
|
926
1035
|
size_t ddof = 1;
|
927
1036
|
|
928
1037
|
rb_scan_args(argc, argv, "0:", &opts);
|
929
|
-
|
1038
|
+
get_variance_opts(opts, &options);
|
1039
|
+
if (options.population)
|
930
1040
|
ddof = 0;
|
931
1041
|
|
932
|
-
ary_mean_variance(ary, NULL, &variance, ddof);
|
1042
|
+
ary_mean_variance(ary, NULL, &variance, ddof, options.skip_na);
|
933
1043
|
return variance;
|
934
1044
|
}
|
935
1045
|
|
@@ -943,6 +1053,7 @@ struct enum_sum_memo {
|
|
943
1053
|
double f, c;
|
944
1054
|
int block_given;
|
945
1055
|
int float_value;
|
1056
|
+
int skip_na;
|
946
1057
|
};
|
947
1058
|
|
948
1059
|
static void
|
@@ -956,8 +1067,12 @@ sum_iter(VALUE e, struct enum_sum_memo *memo)
|
|
956
1067
|
double f = memo->f;
|
957
1068
|
double c = memo->c;
|
958
1069
|
|
959
|
-
if (memo->block_given)
|
1070
|
+
if (memo->block_given) {
|
960
1071
|
e = rb_yield(e);
|
1072
|
+
}
|
1073
|
+
if (memo->skip_na && is_na(e)) {
|
1074
|
+
return;
|
1075
|
+
}
|
961
1076
|
|
962
1077
|
memo->count += 1;
|
963
1078
|
|
@@ -1090,7 +1205,7 @@ int_range_sum_count(VALUE beg, VALUE end, int excl,
|
|
1090
1205
|
}
|
1091
1206
|
|
1092
1207
|
static void
|
1093
|
-
enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
1208
|
+
enum_sum_count(VALUE obj, VALUE init, int skip_na, VALUE *sum_ptr, long *count_ptr)
|
1094
1209
|
{
|
1095
1210
|
struct enum_sum_memo memo;
|
1096
1211
|
VALUE beg, end;
|
@@ -1101,6 +1216,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1101
1216
|
memo.block_given = rb_block_given_p();
|
1102
1217
|
memo.n = 0;
|
1103
1218
|
memo.r = Qundef;
|
1219
|
+
memo.skip_na = skip_na;
|
1104
1220
|
|
1105
1221
|
if ((memo.float_value = RB_FLOAT_TYPE_P(memo.v))) {
|
1106
1222
|
memo.f = RFLOAT_VALUE(memo.v);
|
@@ -1138,9 +1254,8 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1138
1254
|
*count_ptr = memo.count;
|
1139
1255
|
}
|
1140
1256
|
|
1141
|
-
#ifndef HAVE_ENUM_SUM
|
1142
1257
|
/* call-seq:
|
1143
|
-
* enum.sum
|
1258
|
+
* enum.sum(skip_na: false)
|
1144
1259
|
*
|
1145
1260
|
* Calculate the sum of the values in `enum`.
|
1146
1261
|
* This method utilizes
|
@@ -1154,16 +1269,27 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1154
1269
|
static VALUE
|
1155
1270
|
enum_sum(int argc, VALUE* argv, VALUE obj)
|
1156
1271
|
{
|
1157
|
-
VALUE sum, init;
|
1272
|
+
VALUE sum, init, opts;
|
1273
|
+
int skip_na;
|
1158
1274
|
|
1159
|
-
if (rb_scan_args(argc, argv, "01", &init) == 0)
|
1275
|
+
if (rb_scan_args(argc, argv, "01:", &init, &opts) == 0) {
|
1160
1276
|
init = LONG2FIX(0);
|
1277
|
+
}
|
1278
|
+
skip_na = opt_skip_na(opts);
|
1161
1279
|
|
1162
|
-
|
1280
|
+
#ifndef HAVE_ENUM_SUM
|
1281
|
+
if (skip_na) {
|
1282
|
+
enum_sum_count(obj, init, skip_na, &sum, NULL);
|
1283
|
+
}
|
1284
|
+
else {
|
1285
|
+
rb_funcall(orig_enum_sum, rb_intern("call"), argc, &init);
|
1286
|
+
}
|
1287
|
+
#else
|
1288
|
+
enum_sum_count(obj, init, skip_na, &sum, NULL);
|
1289
|
+
#endif
|
1163
1290
|
|
1164
1291
|
return sum;
|
1165
1292
|
}
|
1166
|
-
#endif
|
1167
1293
|
|
1168
1294
|
struct enum_mean_variance_memo {
|
1169
1295
|
int block_given;
|
@@ -1253,7 +1379,7 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
1253
1379
|
long n;
|
1254
1380
|
VALUE sum;
|
1255
1381
|
VALUE init = DBL2NUM(0.0);
|
1256
|
-
enum_sum_count(obj, init, &sum, &n);
|
1382
|
+
enum_sum_count(obj, init, 0, &sum, &n); /* TODO: skip_na */
|
1257
1383
|
if (n > 0)
|
1258
1384
|
calculate_and_set_mean(mean_ptr, sum, n);
|
1259
1385
|
return;
|
@@ -1303,11 +1429,13 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
1303
1429
|
static VALUE
|
1304
1430
|
enum_mean_variance_m(int argc, VALUE* argv, VALUE obj)
|
1305
1431
|
{
|
1432
|
+
struct variance_opts options;
|
1306
1433
|
VALUE opts, mean, variance;
|
1307
1434
|
size_t ddof = 1;
|
1308
1435
|
|
1309
1436
|
rb_scan_args(argc, argv, "0:", &opts);
|
1310
|
-
|
1437
|
+
get_variance_opts(opts, &options);
|
1438
|
+
if (options.population)
|
1311
1439
|
ddof = 0;
|
1312
1440
|
|
1313
1441
|
enum_mean_variance(obj, &mean, &variance, ddof);
|
@@ -1349,11 +1477,13 @@ enum_mean(VALUE obj)
|
|
1349
1477
|
static VALUE
|
1350
1478
|
enum_variance(int argc, VALUE* argv, VALUE obj)
|
1351
1479
|
{
|
1480
|
+
struct variance_opts options;
|
1352
1481
|
VALUE opts, variance;
|
1353
1482
|
size_t ddof = 1;
|
1354
1483
|
|
1355
1484
|
rb_scan_args(argc, argv, "0:", &opts);
|
1356
|
-
|
1485
|
+
get_variance_opts(opts, &options);
|
1486
|
+
if (options.population)
|
1357
1487
|
ddof = 0;
|
1358
1488
|
|
1359
1489
|
enum_mean_variance(obj, NULL, &variance, ddof);
|
@@ -1392,11 +1522,13 @@ sqrt_value(VALUE x)
|
|
1392
1522
|
static VALUE
|
1393
1523
|
enum_mean_stdev(int argc, VALUE* argv, VALUE obj)
|
1394
1524
|
{
|
1525
|
+
struct variance_opts options;
|
1395
1526
|
VALUE opts, mean, variance;
|
1396
1527
|
size_t ddof = 1;
|
1397
1528
|
|
1398
1529
|
rb_scan_args(argc, argv, "0:", &opts);
|
1399
|
-
|
1530
|
+
get_variance_opts(opts, &options);
|
1531
|
+
if (options.population)
|
1400
1532
|
ddof = 0;
|
1401
1533
|
|
1402
1534
|
enum_mean_variance(obj, &mean, &variance, ddof);
|
@@ -1446,14 +1578,16 @@ enum_stdev(int argc, VALUE* argv, VALUE obj)
|
|
1446
1578
|
static VALUE
|
1447
1579
|
ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
|
1448
1580
|
{
|
1581
|
+
struct variance_opts options;
|
1449
1582
|
VALUE opts, mean, variance;
|
1450
1583
|
size_t ddof = 1;
|
1451
1584
|
|
1452
1585
|
rb_scan_args(argc, argv, "0:", &opts);
|
1453
|
-
|
1586
|
+
get_variance_opts(opts, &options);
|
1587
|
+
if (options.population)
|
1454
1588
|
ddof = 0;
|
1455
1589
|
|
1456
|
-
ary_mean_variance(ary, &mean, &variance, ddof);
|
1590
|
+
ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
|
1457
1591
|
VALUE stdev = sqrt_value(variance);
|
1458
1592
|
return rb_assoc_new(mean, stdev);
|
1459
1593
|
}
|
@@ -1479,21 +1613,6 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
|
|
1479
1613
|
return stdev;
|
1480
1614
|
}
|
1481
1615
|
|
1482
|
-
static inline int
|
1483
|
-
is_na(VALUE v)
|
1484
|
-
{
|
1485
|
-
if (NIL_P(v))
|
1486
|
-
return 1;
|
1487
|
-
|
1488
|
-
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
1489
|
-
return 1;
|
1490
|
-
|
1491
|
-
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
1492
|
-
return 1;
|
1493
|
-
|
1494
|
-
return 0;
|
1495
|
-
}
|
1496
|
-
|
1497
1616
|
static int
|
1498
1617
|
ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
|
1499
1618
|
{
|
@@ -1900,7 +2019,7 @@ any_value_counts(int argc, VALUE *argv, VALUE obj,
|
|
1900
2019
|
struct value_counts_opts opts;
|
1901
2020
|
struct value_counts_memo memo;
|
1902
2021
|
|
1903
|
-
rb_scan_args(argc, argv, ":", &kwargs);
|
2022
|
+
rb_scan_args(argc, argv, "0:", &kwargs);
|
1904
2023
|
value_counts_extract_opts(kwargs, &opts);
|
1905
2024
|
|
1906
2025
|
memo.result = rb_hash_new();
|
@@ -2104,10 +2223,15 @@ histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
|
|
2104
2223
|
static void
|
2105
2224
|
histogram_weights_push_values(VALUE bin_weights, VALUE edge, VALUE values, VALUE weight_array, int left_p)
|
2106
2225
|
{
|
2226
|
+
const VALUE one = INT2FIX(1);
|
2227
|
+
long bi, i, n, n_bins, weighted = 0;
|
2107
2228
|
VALUE x, cur;
|
2108
|
-
|
2229
|
+
|
2230
|
+
assert(RB_TYPE_P(edge, T_ARRAY));
|
2231
|
+
assert(RB_TYPE_P(values, T_ARRAY));
|
2109
2232
|
|
2110
2233
|
n = RARRAY_LEN(values);
|
2234
|
+
n_bins = RARRAY_LEN(edge) - 1;
|
2111
2235
|
|
2112
2236
|
if (! NIL_P(weight_array)) {
|
2113
2237
|
assert(RB_TYPE_P(weight_array, T_ARRAY));
|
@@ -2115,7 +2239,6 @@ histogram_weights_push_values(VALUE bin_weights, VALUE edge, VALUE values, VALUE
|
|
2115
2239
|
weighted = 1;
|
2116
2240
|
}
|
2117
2241
|
|
2118
|
-
one = INT2FIX(1);
|
2119
2242
|
for (i = 0; i < n; ++i) {
|
2120
2243
|
x = RARRAY_AREF(values, i);
|
2121
2244
|
|
@@ -2143,15 +2266,17 @@ histogram_weights_push_values(VALUE bin_weights, VALUE edge, VALUE values, VALUE
|
|
2143
2266
|
|
2144
2267
|
bi = histogram_edge_bin_index(edge, x, left_p);
|
2145
2268
|
|
2146
|
-
|
2147
|
-
|
2148
|
-
cur
|
2149
|
-
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2269
|
+
if (0 <= bi && bi < n_bins) {
|
2270
|
+
cur = rb_ary_entry(bin_weights, bi);
|
2271
|
+
if (NIL_P(cur)) {
|
2272
|
+
cur = w;
|
2273
|
+
}
|
2274
|
+
else {
|
2275
|
+
cur = rb_funcall(cur, idPLUS, 1, w);
|
2276
|
+
}
|
2153
2277
|
|
2154
|
-
|
2278
|
+
rb_ary_store(bin_weights, bi, cur);
|
2279
|
+
}
|
2155
2280
|
}
|
2156
2281
|
return;
|
2157
2282
|
|
@@ -2249,9 +2374,9 @@ ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long
|
|
2249
2374
|
}
|
2250
2375
|
|
2251
2376
|
static VALUE
|
2252
|
-
ary_histogram_calculate_edge(VALUE ary,
|
2377
|
+
ary_histogram_calculate_edge(VALUE ary, VALUE arg0, const int left_p)
|
2253
2378
|
{
|
2254
|
-
long n;
|
2379
|
+
long n, nbins;
|
2255
2380
|
VALUE minmax;
|
2256
2381
|
VALUE edge = Qnil;
|
2257
2382
|
double lo, hi;
|
@@ -2259,6 +2384,22 @@ ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
|
|
2259
2384
|
Check_Type(ary, T_ARRAY);
|
2260
2385
|
n = RARRAY_LEN(ary);
|
2261
2386
|
|
2387
|
+
if (NIL_P(arg0)) {
|
2388
|
+
arg0 = sym_auto;
|
2389
|
+
}
|
2390
|
+
|
2391
|
+
if (RB_TYPE_P(arg0, T_SYMBOL)) {
|
2392
|
+
if (arg0 != sym_auto && arg0 != sym_sturges) {
|
2393
|
+
rb_raise(rb_eArgError, "Unknown method to calculate bin width: %+"PRIsVALUE, arg0);
|
2394
|
+
}
|
2395
|
+
else {
|
2396
|
+
nbins = sturges(n);
|
2397
|
+
}
|
2398
|
+
}
|
2399
|
+
else {
|
2400
|
+
nbins = NUM2LONG(arg0);
|
2401
|
+
}
|
2402
|
+
|
2262
2403
|
if (n == 0 && nbins < 0) {
|
2263
2404
|
rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
|
2264
2405
|
}
|
@@ -2331,19 +2472,13 @@ static VALUE
|
|
2331
2472
|
ary_histogram(int argc, VALUE *argv, VALUE ary)
|
2332
2473
|
{
|
2333
2474
|
VALUE arg0, kwargs, bin_weights;
|
2334
|
-
long
|
2475
|
+
long n_bin_weights, i;
|
2335
2476
|
|
2336
2477
|
VALUE weight_array = Qnil;
|
2337
2478
|
VALUE edges = Qnil;
|
2338
2479
|
int left_p = 1;
|
2339
2480
|
|
2340
2481
|
rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
|
2341
|
-
if (NIL_P(arg0) || arg0 == sym_auto) {
|
2342
|
-
nbins = sturges(RARRAY_LEN(ary));
|
2343
|
-
}
|
2344
|
-
else {
|
2345
|
-
nbins = NUM2LONG(arg0);
|
2346
|
-
}
|
2347
2482
|
|
2348
2483
|
if (!NIL_P(kwargs)) {
|
2349
2484
|
enum { kw_weights, kw_edges, kw_closed };
|
@@ -2364,7 +2499,10 @@ ary_histogram(int argc, VALUE *argv, VALUE ary)
|
|
2364
2499
|
}
|
2365
2500
|
|
2366
2501
|
if (NIL_P(edges)) {
|
2367
|
-
edges = ary_histogram_calculate_edge(ary,
|
2502
|
+
edges = ary_histogram_calculate_edge(ary, arg0, left_p);
|
2503
|
+
}
|
2504
|
+
else if (! NIL_P(arg0)) {
|
2505
|
+
rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together");
|
2368
2506
|
}
|
2369
2507
|
|
2370
2508
|
n_bin_weights = RARRAY_LEN(edges) - 1;
|
@@ -2389,10 +2527,12 @@ Init_extension(void)
|
|
2389
2527
|
rb_ext_ractor_safe(true);
|
2390
2528
|
#endif
|
2391
2529
|
|
2392
|
-
|
2393
|
-
|
2394
|
-
|
2530
|
+
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2531
|
+
|
2532
|
+
orig_enum_sum = rb_funcall(rb_mEnumerable, rb_intern("public_instance_method"), 1, rb_str_new_cstr("sum"));
|
2533
|
+
orig_ary_sum = rb_funcall(rb_cArray, rb_intern("public_instance_method"), 1, rb_str_new_cstr("sum"));
|
2395
2534
|
|
2535
|
+
rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
|
2396
2536
|
rb_define_method(rb_mEnumerable, "mean_variance", enum_mean_variance_m, -1);
|
2397
2537
|
rb_define_method(rb_mEnumerable, "mean", enum_mean, 0);
|
2398
2538
|
rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
|
@@ -2400,11 +2540,9 @@ Init_extension(void)
|
|
2400
2540
|
rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
|
2401
2541
|
rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
|
2402
2542
|
|
2403
|
-
#ifndef HAVE_ARRAY_SUM
|
2404
2543
|
rb_define_method(rb_cArray, "sum", ary_sum, -1);
|
2405
|
-
#endif
|
2406
2544
|
rb_define_method(rb_cArray, "mean_variance", ary_mean_variance_m, -1);
|
2407
|
-
rb_define_method(rb_cArray, "mean", ary_mean,
|
2545
|
+
rb_define_method(rb_cArray, "mean", ary_mean, -1);
|
2408
2546
|
rb_define_method(rb_cArray, "variance", ary_variance, -1);
|
2409
2547
|
rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
|
2410
2548
|
rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
|
@@ -2417,7 +2555,6 @@ Init_extension(void)
|
|
2417
2555
|
half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
|
2418
2556
|
rb_gc_register_mark_object(half_in_rational);
|
2419
2557
|
|
2420
|
-
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2421
2558
|
cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
|
2422
2559
|
|
2423
2560
|
rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
|
@@ -2443,6 +2580,7 @@ Init_extension(void)
|
|
2443
2580
|
id_population = rb_intern("population");
|
2444
2581
|
id_closed = rb_intern("closed");
|
2445
2582
|
id_edge = rb_intern("edge");
|
2583
|
+
id_skip_na = rb_intern("skip_na");
|
2446
2584
|
|
2447
2585
|
sym_auto = ID2SYM(rb_intern("auto"));
|
2448
2586
|
sym_left = ID2SYM(rb_intern("left"));
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: enumerable-statistics
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kenta Murata
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -289,7 +289,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
289
289
|
- !ruby/object:Gem::Version
|
290
290
|
version: '0'
|
291
291
|
requirements: []
|
292
|
-
rubygems_version: 3.
|
292
|
+
rubygems_version: 3.5.9
|
293
293
|
signing_key:
|
294
294
|
specification_version: 4
|
295
295
|
summary: Statistics features for Enumerable
|