enumerable-statistics 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: caf1b8203b177aef325480c7d7d5a4c0ddb7cecdad3d12944d4b2caa71eebe13
4
- data.tar.gz: 00d91f25e099eb2577eb1f6078dafb5c884538fe3863eb1b6795de58c0c3b642
3
+ metadata.gz: 79ac26fd5a37cf391fdc61c8cea73c3b29723ce5b6847f226d131f19279743af
4
+ data.tar.gz: 239e35a3a9e93f442057e77542611e7c53448ae2c5d418f46441c4c9b0aeecb1
5
5
  SHA512:
6
- metadata.gz: 2c9ab8a29752eb46cd00bf8f3f9787d6f151bc10e06d2179fa5f80e3ebb4625b96144a49fd907eb52c02e8f88767f9da1717761643d87e1489677a9eab227af9
7
- data.tar.gz: de2d3d348ead4d6e99cffdb66f95a56ac7cfe603a4450db23deaddb6eeb82e454f79e84c236ea89142ea334999a82c6c307e16ac7085b1afe2895c63a6a8c5d4
6
+ metadata.gz: eb0310d69650f1f5c6e9803356e04ba15f2a67741134fb7acdf50462afb95a51c4a1e6d43cd6820f8ab890ebd1b5ceca4b2b0648fd0eb19c1c2a907d2629cc98
7
+ data.tar.gz: c840a3d0ec6a7393999408d756778aafa2f0d14f271f176cdac590a16f1e81832c220939f311613ec7db05141754f733e561ab318dcae87f3da53e4c052eb35c
@@ -0,0 +1,6 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
@@ -1,7 +1,11 @@
1
1
  name: CI
2
2
 
3
3
  on:
4
- - push
4
+ push:
5
+ branches:
6
+ - "**"
7
+ - "!dependabot/**"
8
+ pull_request:
5
9
 
6
10
  jobs:
7
11
  cruby:
@@ -12,36 +16,54 @@ jobs:
12
16
  fail-fast: false
13
17
  matrix:
14
18
  os:
15
- - ubuntu-latest
16
- - macos-latest
17
- - windows-latest
19
+ - ubuntu-latest
20
+ - macos-latest
21
+ - windows-latest
18
22
  ruby:
19
- - 3.0
20
- - 2.7
21
- - 2.6
22
- - 2.5
23
- - 2.4
24
- - debug
23
+ - 3.4
24
+ - 3.3
25
+ - 3.2
26
+ - 3.1
27
+ - 3.0
28
+ - 2.7
29
+ - 2.6
30
+ - 2.5
31
+ - 2.4
32
+ - debug
25
33
  exclude:
26
- - os: windows-latest
27
- ruby: 3.0
28
- - os: windows-latest
29
- ruby: debug
34
+ - os: macos-latest
35
+ ruby: 2.5
36
+ - os: macos-latest
37
+ ruby: 2.4
38
+ - os: windows-latest
39
+ ruby: debug
30
40
 
31
41
  steps:
32
- - uses: actions/checkout@v2
42
+ - uses: actions/checkout@v6
33
43
 
34
- - name: Setup Ruby
35
- uses: ruby/setup-ruby@v1
36
- with:
37
- ruby-version: ${{ matrix.ruby }}
44
+ - name: Setup Ruby
45
+ uses: ruby/setup-ruby@v1
46
+ with:
47
+ ruby-version: ${{ matrix.ruby }}
38
48
 
39
- - run: gem install bundler
40
- - run: bundle install
49
+ - name: Detect installable bundler version
50
+ run: |
51
+ case "${{ matrix.ruby }}" in
52
+ 2.7|2.6) bundler_version="2.4.22" ;;
53
+ 2.5|2.4) bundler_version="2.3.27" ;;
54
+ *) bundler_version="" ;;
55
+ esac
56
+ echo "bundler_version=$bundler_version" >> $GITHUB_ENV
57
+ shell: bash
41
58
 
42
- - run: rake --trace compile
59
+ - run: gem install bundler${bundler_version:+ -v $bundler_version}
60
+ shell: bash
43
61
 
44
- - run: rake build
45
- - run: gem install pkg/*gem
62
+ - run: bundle install
46
63
 
47
- - run: rake
64
+ - run: rake --trace compile
65
+
66
+ - run: rake build
67
+ - run: gem install pkg/*gem
68
+
69
+ - run: rake
@@ -0,0 +1,46 @@
1
+ name: Release
2
+ on:
3
+ push:
4
+ tags:
5
+ - "*"
6
+ jobs:
7
+ github:
8
+ name: GitHub
9
+ runs-on: ubuntu-latest
10
+ timeout-minutes: 10
11
+ steps:
12
+ - uses: actions/checkout@v6
13
+ - name: Extract release note
14
+ run: |
15
+ ruby \
16
+ -e 'print("## Enumerable::Statistics: "); \
17
+ puts(ARGF.read.split(/^# /)[1].strip)' \
18
+ CHANGELOG.md > release-note.md
19
+ - name: Upload to release
20
+ run: |
21
+ title=$(head -n1 release-note.md | sed -e 's/^## //')
22
+ tail -n +2 release-note.md > release-note-without-version.md
23
+ gh release create ${GITHUB_REF_NAME} \
24
+ --discussion-category Announcements \
25
+ --notes-file release-note-without-version.md \
26
+ --title "${title}"
27
+ env:
28
+ GH_TOKEN: ${{ github.token }}
29
+
30
+ rubygems:
31
+ name: RubyGems
32
+ runs-on: ubuntu-latest
33
+ timeout-minutes: 10
34
+ permissions:
35
+ id-token: write
36
+ environment: release
37
+ steps:
38
+ - uses: actions/checkout@v6
39
+ - uses: ruby/setup-ruby@v1
40
+ with:
41
+ ruby-version: ruby
42
+ bundler-cache: true
43
+ - uses: rubygems/configure-rubygems-credentials@v1.0.0
44
+ - name: Push gems
45
+ run: |
46
+ bundle exec rake release:rubygem_push
data/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ # 2.0.9
2
+
3
+ - Add missing `#include <float.h>`
4
+ - [GH-35](https://github.com/red-data-tools/enumerable-statistics/issues/35)
5
+ - Patch by Tadashi Saito
6
+ - Suppress `method redefined` warnings
7
+ - [GH-43](https://github.com/red-data-tools/enumerable-statistics/issues/43)
8
+ - [GH-46](https://github.com/red-data-tools/enumerable-statistics/issues/46)
9
+ - Reported by forthrin
10
+ - Patch by kojix2
11
+
12
+ ## Thanks
13
+
14
+ - Tadashi Saito
15
+ - forthrin
16
+ - kojix2
17
+
18
+ # 2.0.8
19
+
20
+ - Prohibit the use of both `nbins` and `edges` kwargs simultaneously in the `histogram` method.
21
+ - Support `skip_na` kwarg in `sum` and related methods.
22
+ - Support Ruby 3.4+.
23
+
1
24
  # 2.0.7
2
25
 
3
26
  - Fix the bug of histogram with bin range that is smaller than value range
data/Gemfile CHANGED
@@ -2,3 +2,12 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in enumerable-statistics.gemspec
4
4
  gemspec
5
+
6
+ gem "benchmark-driver"
7
+ gem "bundler"
8
+ gem "fuubar"
9
+ gem "rake"
10
+ gem "rake-compiler", ">= 0.9.8"
11
+ gem "rspec", ">= 3.4"
12
+ gem "test-unit"
13
+ gem "yard"
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Enumerable::Statistics
2
2
 
3
- [![Build Status](https://travis-ci.org/mrkn/enumerable-statistics.svg?branch=master)](https://travis-ci.org/mrkn/enumerable-statistics)
3
+ [![Build Status](https://github.com/red-data-tools/enumerable-statistics/actions/workflows/ci.yml/badge.svg)](https://github.com/red-data-tools/enumerable-statistics/actions/workflows/ci.yml)
4
4
 
5
5
  Enumerable::Statistics provides some methods to calculate statistical summary in arrays and enumerables.
6
6
 
@@ -30,6 +30,9 @@ require 'enumerable/statistics'
30
30
 
31
31
  The following methods are supplied by this library:
32
32
 
33
+ - `Array#sum`, `Enumerable#sum`
34
+ - Calculates a sum of values in an array or an enumerable
35
+ - Supports `skip_na: true` to skip `nil` and `NaN` values
33
36
  - `Array#mean`, `Enumerable#mean`
34
37
  - Calculates a mean of values in an array or an enumerable
35
38
  - `Array#variance`, `Enumerable#variance`
@@ -96,5 +99,5 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
96
99
 
97
100
  ## Contributing
98
101
 
99
- Bug reports and pull requests are welcome on GitHub at https://github.com/mrkn/enumerable-statistics.
102
+ Bug reports and pull requests are welcome on GitHub at https://github.com/red-data-tools/enumerable-statistics.
100
103
 
data/Rakefile CHANGED
@@ -29,3 +29,13 @@ task :bench do
29
29
  end
30
30
 
31
31
  task default: [:test, :spec]
32
+
33
+ release_task = Rake.application["release"]
34
+ # We use Trusted Publishing.
35
+ release_task.prerequisites.delete("build")
36
+ release_task.prerequisites.delete("release:rubygem_push")
37
+ release_task_comment = release_task.comment
38
+ if release_task_comment
39
+ release_task.clear_comments
40
+ release_task.comment = release_task_comment.gsub(/ and build.*$/, "")
41
+ end
@@ -17,7 +17,7 @@ Gem::Specification.new do |spec|
17
17
 
18
18
  spec.summary = %q{Statistics features for Enumerable}
19
19
  spec.description = %q{This library provides statistics features for Enumerable}
20
- spec.homepage = "https://github.com/mrkn/enumerable-statistics"
20
+ spec.homepage = "https://github.com/red-data-tools/enumerable-statistics"
21
21
 
22
22
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
23
  spec.bindir = "exe"
@@ -27,12 +27,11 @@ Gem::Specification.new do |spec|
27
27
 
28
28
  spec.required_ruby_version = '>= 2.4'
29
29
 
30
- spec.add_development_dependency "bundler"
31
- spec.add_development_dependency "rake"
32
- spec.add_development_dependency "rake-compiler", ">= 0.9.8"
33
- spec.add_development_dependency "rspec", ">= 3.4"
34
- spec.add_development_dependency "test-unit"
35
- spec.add_development_dependency "fuubar"
36
- spec.add_development_dependency "yard"
37
- spec.add_development_dependency "benchmark-driver"
30
+ spec.metadata = {
31
+ "bug_tracker_uri" => "https://github.com/red-data-tools/enumerable-statistics/issues",
32
+ "changelog_uri" =>
33
+ "https://github.com/red-data-tools/enumerable-statistics/releases/tag/v#{spec.version}",
34
+ "homepage_uri" => spec.homepage,
35
+ "source_code_uri" => "https://github.com/red-data-tools/enumerable-statistics.git",
36
+ }
38
37
  end
@@ -2,6 +2,7 @@
2
2
  #include <ruby/util.h>
3
3
  #include <ruby/version.h>
4
4
  #include <assert.h>
5
+ #include <float.h>
5
6
  #include <math.h>
6
7
 
7
8
  #if RUBY_API_VERSION_CODE >= 20400
@@ -96,11 +97,14 @@ static VALUE half_in_rational;
96
97
  static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
97
98
  static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
98
99
  static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
100
+ static ID id_skip_na;
99
101
 
100
- static VALUE sym_auto, sym_left, sym_right;
102
+ static VALUE sym_auto, sym_left, sym_right, sym_sturges;
101
103
 
102
104
  static VALUE cHistogram;
103
105
 
106
+ static ID id_builtin_sum;
107
+
104
108
  inline static VALUE
105
109
  f_add(VALUE x, VALUE y)
106
110
  {
@@ -135,7 +139,7 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
135
139
  {
136
140
  assert(!RB_TYPE_P(real, T_COMPLEX));
137
141
 
138
- NEWOBJ_OF(obj, struct RComplex, klass, T_COMPLEX | (RGENGC_WB_PROTECTED_COMPLEX ? FL_WB_PROTECTED : 0));
142
+ VALUE obj = rb_get_alloc_func(klass)(klass);
139
143
 
140
144
  RCOMPLEX_SET_REAL(obj, real);
141
145
  RCOMPLEX_SET_IMAG(obj, imag);
@@ -535,9 +539,11 @@ f_gcd(VALUE x, VALUE y)
535
539
  inline static VALUE
536
540
  nurat_s_new_internal(VALUE klass, VALUE num, VALUE den)
537
541
  {
538
- NEWOBJ_OF(obj, struct RRational, klass, T_RATIONAL | (RGENGC_WB_PROTECTED_RATIONAL ? FL_WB_PROTECTED : 0));
542
+ VALUE obj = rb_get_alloc_func(klass)(klass);
543
+
539
544
  RRATIONAL_SET_NUM(obj, num);
540
545
  RRATIONAL_SET_DEN(obj, den);
546
+
541
547
  return (VALUE)obj;
542
548
  }
543
549
 
@@ -632,39 +638,67 @@ rb_rational_plus(VALUE self, VALUE other)
632
638
  }
633
639
  #endif
634
640
 
635
- /* call-seq:
636
- * ary.sum
637
- *
638
- * Calculate the sum of the values in `ary`.
639
- * This method utilizes
640
- * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
641
- * to compensate the result precision when the `ary` includes Float values.
642
- *
643
- * Note that This library does not redefine `sum` method introduced in Ruby 2.4.
644
- *
645
- * @return [Number] A summation value
646
- */
647
- static VALUE
648
- ary_sum(int argc, VALUE* argv, VALUE ary)
641
+ static inline int
642
+ is_na(VALUE v)
643
+ {
644
+ if (NIL_P(v))
645
+ return 1;
646
+
647
+ if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
648
+ return 1;
649
+
650
+ if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
651
+ return 1;
652
+
653
+ return 0;
654
+ }
655
+
656
+ static int opt_skip_na(VALUE opts)
657
+ {
658
+ VALUE skip_na = Qfalse;
659
+
660
+ if (!NIL_P(opts)) {
661
+ #ifdef HAVE_RB_GET_KWARGS
662
+ ID kwargs = id_skip_na;
663
+ rb_get_kwargs(opts, &kwargs, 0, 1, &skip_na);
664
+ #else
665
+ VALUE val = rb_hash_aref(opts, ID2SYM(id_skip_na));
666
+ skip_na = NIL_P(val) ? skip_na : val;
667
+ #endif
668
+ }
669
+
670
+ return RTEST(skip_na);
671
+ }
672
+
673
+ VALUE
674
+ ary_calculate_sum(VALUE ary, VALUE init, int skip_na, long *na_count_out)
649
675
  {
650
676
  VALUE e, v, r;
651
677
  long i, n;
652
678
  int block_given;
653
-
654
- if (rb_scan_args(argc, argv, "01", &v) == 0)
655
- v = LONG2FIX(0);
679
+ long na_count = 0;
656
680
 
657
681
  block_given = rb_block_given_p();
658
682
 
659
- if (RARRAY_LEN(ary) == 0)
660
- return v;
683
+ if (RARRAY_LEN(ary) == 0) {
684
+ if (na_count_out != NULL) {
685
+ *na_count_out = 0;
686
+ }
687
+ return init;
688
+ }
661
689
 
662
690
  n = 0;
663
691
  r = Qundef;
692
+ v = init;
664
693
  for (i = 0; i < RARRAY_LEN(ary); i++) {
665
694
  e = RARRAY_AREF(ary, i);
666
695
  if (block_given)
667
696
  e = rb_yield(e);
697
+ if (skip_na && is_na(e)) {
698
+ ++na_count;
699
+ continue;
700
+ }
701
+
668
702
  if (FIXNUM_P(e)) {
669
703
  n += FIX2LONG(e); /* should not overflow long type */
670
704
  if (!FIXABLE(n)) {
@@ -688,7 +722,7 @@ ary_sum(int argc, VALUE* argv, VALUE ary)
688
722
  v = rb_fix_plus(LONG2FIX(n), v);
689
723
  if (r != Qundef)
690
724
  v = rb_rational_plus(r, v);
691
- return v;
725
+ goto finish;
692
726
 
693
727
  not_exact:
694
728
  if (n != 0)
@@ -708,6 +742,11 @@ not_exact:
708
742
  e = RARRAY_AREF(ary, i);
709
743
  if (block_given)
710
744
  e = rb_yield(e);
745
+ if (skip_na && is_na(e)) {
746
+ ++na_count;
747
+ continue;
748
+ }
749
+
711
750
  if (RB_FLOAT_TYPE_P(e))
712
751
  has_float_value:
713
752
  x = RFLOAT_VALUE(e);
@@ -725,7 +764,9 @@ not_exact:
725
764
  c = (t - f) - y;
726
765
  f = t;
727
766
  }
728
- return DBL2NUM(f);
767
+
768
+ v = DBL2NUM(f);
769
+ goto finish;
729
770
 
730
771
  not_float:
731
772
  v = DBL2NUM(f);
@@ -736,13 +777,53 @@ not_exact:
736
777
  e = RARRAY_AREF(ary, i);
737
778
  if (block_given)
738
779
  e = rb_yield(e);
780
+ if (skip_na && is_na(e)) {
781
+ ++na_count;
782
+ continue;
783
+ }
739
784
  has_some_value:
740
785
  v = rb_funcall(v, idPLUS, 1, e);
741
786
  }
742
787
 
788
+ finish:
789
+ if (na_count_out != NULL) {
790
+ *na_count_out = na_count;
791
+ }
743
792
  return v;
744
793
  }
745
794
 
795
+ /* call-seq:
796
+ * ary.sum(skip_na: false)
797
+ *
798
+ * Calculate the sum of the values in `ary`.
799
+ * This method utilizes
800
+ * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
801
+ * to compensate the result precision when the `ary` includes Float values.
802
+ *
803
+ * Redefines `sum` (Ruby >= 2.4). Original is aliased as `__sum__`.
804
+ *
805
+ * @return [Number] A summation value
806
+ */
807
+ static VALUE
808
+ ary_sum(int argc, VALUE* argv, VALUE ary)
809
+ {
810
+ VALUE v, opts;
811
+ int skip_na;
812
+
813
+ if (rb_scan_args(argc, argv, "01:", &v, &opts) == 0) {
814
+ v = LONG2FIX(0);
815
+ }
816
+ skip_na = opt_skip_na(opts);
817
+
818
+ #ifndef HAVE_ENUM_SUM
819
+ if (!skip_na) {
820
+ return rb_funcall(ary, id_builtin_sum, argc, &v);
821
+ }
822
+ #endif
823
+
824
+ return ary_calculate_sum(ary, v, skip_na, NULL);
825
+ }
826
+
746
827
  static void
747
828
  calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
748
829
  {
@@ -771,9 +852,10 @@ calculate_and_set_mean(VALUE *mean_ptr, VALUE sum, long const n)
771
852
  }
772
853
 
773
854
  static void
774
- ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
855
+ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof, int skip_na)
775
856
  {
776
857
  long i;
858
+ long na_count;
777
859
  size_t n = 0;
778
860
  double m = 0.0, m2 = 0.0, f = 0.0, c = 0.0;
779
861
 
@@ -797,8 +879,8 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
797
879
 
798
880
  if (variance_ptr == NULL) {
799
881
  VALUE init = DBL2NUM(0.0);
800
- VALUE const sum = ary_sum(1, &init, ary);
801
- long const n = RARRAY_LEN(ary);
882
+ VALUE const sum = ary_calculate_sum(ary, init, skip_na, &na_count);
883
+ long const n = RARRAY_LEN(ary) - na_count;
802
884
  calculate_and_set_mean(mean_ptr, sum, n);
803
885
  return;
804
886
  }
@@ -839,26 +921,46 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
839
921
  }
840
922
  }
841
923
 
842
- static int
843
- opt_population_p(VALUE opts)
924
+ struct variance_opts {
925
+ int population;
926
+ int skip_na;
927
+ };
928
+
929
+ static void
930
+ get_variance_opts(VALUE opts, struct variance_opts *out)
844
931
  {
845
- VALUE population = Qfalse;
932
+ assert(out != NULL);
933
+
934
+ out->population = 0;
935
+ out->skip_na = 0;
846
936
 
847
937
  if (!NIL_P(opts)) {
848
938
  #ifdef HAVE_RB_GET_KWARGS
849
- ID kwargs = id_population;
850
- rb_get_kwargs(opts, &kwargs, 0, 1, &population);
939
+ static ID kwarg_keys[2];
940
+ VALUE kwarg_vals;
941
+
942
+ if (!kwarg_keys[0]) {
943
+ kwarg_keys[0] = id_population;
944
+ kwarg_keys[1] = id_skip_na;
945
+ }
946
+
947
+ rb_get_kwargs(opts, &kwarg_keys, 0, 2, kwarg_vals);
948
+ out->population = (kwarg_vals[0] != Qundef) ? RTEST(kwarg_vals[0]) : out->population;
949
+ out->skip_na = (kwarg_vals[1] != Qundef) ? RTEST(kwarg_vals[1]) : out->skip_na;
851
950
  #else
852
- VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
853
- population = NIL_P(val) ? population : val;
951
+ VALUE val;
952
+
953
+ val = rb_hash_aref(opts, ID2SYM(id_population));
954
+ out->population = NIL_P(val) ? out->population : RTEST(val);
955
+
956
+ val = rb_hash_aref(opts, ID2SYM(id_skip_na));
957
+ out->skip_na = NIL_P(val) ? out->skip_na : RTEST(val);
854
958
  #endif
855
959
  }
856
-
857
- return RTEST(population);
858
960
  }
859
961
 
860
962
  /* call-seq:
861
- * ary.mean_variance(population: false)
963
+ * ary.mean_variance(population: false, skip_na: false)
862
964
  *
863
965
  * Calculate a mean and a variance of the values in `ary`.
864
966
  * The first element of the result array is the mean, and the second is the variance.
@@ -876,19 +978,21 @@ opt_population_p(VALUE opts)
876
978
  static VALUE
877
979
  ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
878
980
  {
879
- VALUE opts, mean, variance;
981
+ struct variance_opts options;
982
+ VALUE opts, mean = Qnil, variance = Qnil;
880
983
  size_t ddof = 1;
881
984
 
882
985
  rb_scan_args(argc, argv, "0:", &opts);
883
- if (opt_population_p(opts))
986
+ get_variance_opts(opts, &options);
987
+ if (options.population)
884
988
  ddof = 0;
885
989
 
886
- ary_mean_variance(ary, &mean, &variance, ddof);
990
+ ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
887
991
  return rb_assoc_new(mean, variance);
888
992
  }
889
993
 
890
994
  /* call-seq:
891
- * ary.mean
995
+ * ary.mean(skip_na: false)
892
996
  *
893
997
  * Calculate a mean of the values in `ary`.
894
998
  * This method utilizes
@@ -898,15 +1002,20 @@ ary_mean_variance_m(int argc, VALUE* argv, VALUE ary)
898
1002
  * @return [Number] A mean value
899
1003
  */
900
1004
  static VALUE
901
- ary_mean(VALUE ary)
1005
+ ary_mean(int argc, VALUE *argv, VALUE ary)
902
1006
  {
903
- VALUE mean;
904
- ary_mean_variance(ary, &mean, NULL, 1);
1007
+ VALUE mean = Qnil, opts;
1008
+ int skip_na;
1009
+
1010
+ rb_scan_args(argc, argv, ":", &opts);
1011
+ skip_na = opt_skip_na(opts);
1012
+
1013
+ ary_mean_variance(ary, &mean, NULL, 1, skip_na);
905
1014
  return mean;
906
1015
  }
907
1016
 
908
1017
  /* call-seq:
909
- * ary.variance(population: false)
1018
+ * ary.variance(population: false, skip_na: false)
910
1019
  *
911
1020
  * Calculate a variance of the values in `ary`.
912
1021
  * This method scan values in `ary` only once,
@@ -922,14 +1031,16 @@ ary_mean(VALUE ary)
922
1031
  static VALUE
923
1032
  ary_variance(int argc, VALUE* argv, VALUE ary)
924
1033
  {
1034
+ struct variance_opts options;
925
1035
  VALUE opts, variance;
926
1036
  size_t ddof = 1;
927
1037
 
928
1038
  rb_scan_args(argc, argv, "0:", &opts);
929
- if (opt_population_p(opts))
1039
+ get_variance_opts(opts, &options);
1040
+ if (options.population)
930
1041
  ddof = 0;
931
1042
 
932
- ary_mean_variance(ary, NULL, &variance, ddof);
1043
+ ary_mean_variance(ary, NULL, &variance, ddof, options.skip_na);
933
1044
  return variance;
934
1045
  }
935
1046
 
@@ -943,6 +1054,7 @@ struct enum_sum_memo {
943
1054
  double f, c;
944
1055
  int block_given;
945
1056
  int float_value;
1057
+ int skip_na;
946
1058
  };
947
1059
 
948
1060
  static void
@@ -956,8 +1068,12 @@ sum_iter(VALUE e, struct enum_sum_memo *memo)
956
1068
  double f = memo->f;
957
1069
  double c = memo->c;
958
1070
 
959
- if (memo->block_given)
1071
+ if (memo->block_given) {
960
1072
  e = rb_yield(e);
1073
+ }
1074
+ if (memo->skip_na && is_na(e)) {
1075
+ return;
1076
+ }
961
1077
 
962
1078
  memo->count += 1;
963
1079
 
@@ -1090,7 +1206,7 @@ int_range_sum_count(VALUE beg, VALUE end, int excl,
1090
1206
  }
1091
1207
 
1092
1208
  static void
1093
- enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
1209
+ enum_sum_count(VALUE obj, VALUE init, int skip_na, VALUE *sum_ptr, long *count_ptr)
1094
1210
  {
1095
1211
  struct enum_sum_memo memo;
1096
1212
  VALUE beg, end;
@@ -1101,6 +1217,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
1101
1217
  memo.block_given = rb_block_given_p();
1102
1218
  memo.n = 0;
1103
1219
  memo.r = Qundef;
1220
+ memo.skip_na = skip_na;
1104
1221
 
1105
1222
  if ((memo.float_value = RB_FLOAT_TYPE_P(memo.v))) {
1106
1223
  memo.f = RFLOAT_VALUE(memo.v);
@@ -1138,32 +1255,42 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
1138
1255
  *count_ptr = memo.count;
1139
1256
  }
1140
1257
 
1141
- #ifndef HAVE_ENUM_SUM
1142
1258
  /* call-seq:
1143
- * enum.sum
1259
+ * enum.sum(skip_na: false)
1144
1260
  *
1145
1261
  * Calculate the sum of the values in `enum`.
1146
1262
  * This method utilizes
1147
1263
  * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm)
1148
1264
  * to compensate the result precision when the `enum` includes Float values.
1149
1265
  *
1150
- * Note that This library does not redefine `sum` method introduced in Ruby 2.4.
1266
+ * Redefines `sum` (Ruby >= 2.4). Original is aliased as `__sum__`.
1151
1267
  *
1152
1268
  * @return [Number] A summation value
1153
1269
  */
1154
1270
  static VALUE
1155
1271
  enum_sum(int argc, VALUE* argv, VALUE obj)
1156
1272
  {
1157
- VALUE sum, init;
1273
+ VALUE sum, init, opts;
1274
+ int skip_na;
1158
1275
 
1159
- if (rb_scan_args(argc, argv, "01", &init) == 0)
1276
+ if (rb_scan_args(argc, argv, "01:", &init, &opts) == 0) {
1160
1277
  init = LONG2FIX(0);
1278
+ }
1279
+ skip_na = opt_skip_na(opts);
1161
1280
 
1162
- enum_sum_count(obj, init, &sum, NULL);
1281
+ #ifndef HAVE_ENUM_SUM
1282
+ if (skip_na) {
1283
+ enum_sum_count(obj, init, skip_na, &sum, NULL);
1284
+ }
1285
+ else {
1286
+ sum = rb_funcall(obj, id_builtin_sum, argc, &init);
1287
+ }
1288
+ #else
1289
+ enum_sum_count(obj, init, skip_na, &sum, NULL);
1290
+ #endif
1163
1291
 
1164
1292
  return sum;
1165
1293
  }
1166
- #endif
1167
1294
 
1168
1295
  struct enum_mean_variance_memo {
1169
1296
  int block_given;
@@ -1253,7 +1380,7 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
1253
1380
  long n;
1254
1381
  VALUE sum;
1255
1382
  VALUE init = DBL2NUM(0.0);
1256
- enum_sum_count(obj, init, &sum, &n);
1383
+ enum_sum_count(obj, init, 0, &sum, &n); /* TODO: skip_na */
1257
1384
  if (n > 0)
1258
1385
  calculate_and_set_mean(mean_ptr, sum, n);
1259
1386
  return;
@@ -1303,11 +1430,13 @@ enum_mean_variance(VALUE obj, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
1303
1430
  static VALUE
1304
1431
  enum_mean_variance_m(int argc, VALUE* argv, VALUE obj)
1305
1432
  {
1433
+ struct variance_opts options;
1306
1434
  VALUE opts, mean, variance;
1307
1435
  size_t ddof = 1;
1308
1436
 
1309
1437
  rb_scan_args(argc, argv, "0:", &opts);
1310
- if (opt_population_p(opts))
1438
+ get_variance_opts(opts, &options);
1439
+ if (options.population)
1311
1440
  ddof = 0;
1312
1441
 
1313
1442
  enum_mean_variance(obj, &mean, &variance, ddof);
@@ -1349,11 +1478,13 @@ enum_mean(VALUE obj)
1349
1478
  static VALUE
1350
1479
  enum_variance(int argc, VALUE* argv, VALUE obj)
1351
1480
  {
1481
+ struct variance_opts options;
1352
1482
  VALUE opts, variance;
1353
1483
  size_t ddof = 1;
1354
1484
 
1355
1485
  rb_scan_args(argc, argv, "0:", &opts);
1356
- if (opt_population_p(opts))
1486
+ get_variance_opts(opts, &options);
1487
+ if (options.population)
1357
1488
  ddof = 0;
1358
1489
 
1359
1490
  enum_mean_variance(obj, NULL, &variance, ddof);
@@ -1392,11 +1523,13 @@ sqrt_value(VALUE x)
1392
1523
  static VALUE
1393
1524
  enum_mean_stdev(int argc, VALUE* argv, VALUE obj)
1394
1525
  {
1526
+ struct variance_opts options;
1395
1527
  VALUE opts, mean, variance;
1396
1528
  size_t ddof = 1;
1397
1529
 
1398
1530
  rb_scan_args(argc, argv, "0:", &opts);
1399
- if (opt_population_p(opts))
1531
+ get_variance_opts(opts, &options);
1532
+ if (options.population)
1400
1533
  ddof = 0;
1401
1534
 
1402
1535
  enum_mean_variance(obj, &mean, &variance, ddof);
@@ -1446,14 +1579,16 @@ enum_stdev(int argc, VALUE* argv, VALUE obj)
1446
1579
  static VALUE
1447
1580
  ary_mean_stdev(int argc, VALUE* argv, VALUE ary)
1448
1581
  {
1582
+ struct variance_opts options;
1449
1583
  VALUE opts, mean, variance;
1450
1584
  size_t ddof = 1;
1451
1585
 
1452
1586
  rb_scan_args(argc, argv, "0:", &opts);
1453
- if (opt_population_p(opts))
1587
+ get_variance_opts(opts, &options);
1588
+ if (options.population)
1454
1589
  ddof = 0;
1455
1590
 
1456
- ary_mean_variance(ary, &mean, &variance, ddof);
1591
+ ary_mean_variance(ary, &mean, &variance, ddof, options.skip_na);
1457
1592
  VALUE stdev = sqrt_value(variance);
1458
1593
  return rb_assoc_new(mean, stdev);
1459
1594
  }
@@ -1479,21 +1614,6 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
1479
1614
  return stdev;
1480
1615
  }
1481
1616
 
1482
- static inline int
1483
- is_na(VALUE v)
1484
- {
1485
- if (NIL_P(v))
1486
- return 1;
1487
-
1488
- if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
1489
- return 1;
1490
-
1491
- if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
1492
- return 1;
1493
-
1494
- return 0;
1495
- }
1496
-
1497
1617
  static int
1498
1618
  ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
1499
1619
  {
@@ -1900,7 +2020,7 @@ any_value_counts(int argc, VALUE *argv, VALUE obj,
1900
2020
  struct value_counts_opts opts;
1901
2021
  struct value_counts_memo memo;
1902
2022
 
1903
- rb_scan_args(argc, argv, ":", &kwargs);
2023
+ rb_scan_args(argc, argv, "0:", &kwargs);
1904
2024
  value_counts_extract_opts(kwargs, &opts);
1905
2025
 
1906
2026
  memo.result = rb_hash_new();
@@ -2255,9 +2375,9 @@ ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long
2255
2375
  }
2256
2376
 
2257
2377
  static VALUE
2258
- ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
2378
+ ary_histogram_calculate_edge(VALUE ary, VALUE arg0, const int left_p)
2259
2379
  {
2260
- long n;
2380
+ long n, nbins;
2261
2381
  VALUE minmax;
2262
2382
  VALUE edge = Qnil;
2263
2383
  double lo, hi;
@@ -2265,6 +2385,22 @@ ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
2265
2385
  Check_Type(ary, T_ARRAY);
2266
2386
  n = RARRAY_LEN(ary);
2267
2387
 
2388
+ if (NIL_P(arg0)) {
2389
+ arg0 = sym_auto;
2390
+ }
2391
+
2392
+ if (RB_TYPE_P(arg0, T_SYMBOL)) {
2393
+ if (arg0 != sym_auto && arg0 != sym_sturges) {
2394
+ rb_raise(rb_eArgError, "Unknown method to calculate bin width: %+"PRIsVALUE, arg0);
2395
+ }
2396
+ else {
2397
+ nbins = sturges(n);
2398
+ }
2399
+ }
2400
+ else {
2401
+ nbins = NUM2LONG(arg0);
2402
+ }
2403
+
2268
2404
  if (n == 0 && nbins < 0) {
2269
2405
  rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
2270
2406
  }
@@ -2337,19 +2473,13 @@ static VALUE
2337
2473
  ary_histogram(int argc, VALUE *argv, VALUE ary)
2338
2474
  {
2339
2475
  VALUE arg0, kwargs, bin_weights;
2340
- long nbins, n_bin_weights, i;
2476
+ long n_bin_weights, i;
2341
2477
 
2342
2478
  VALUE weight_array = Qnil;
2343
2479
  VALUE edges = Qnil;
2344
2480
  int left_p = 1;
2345
2481
 
2346
2482
  rb_scan_args(argc, argv, "01:", &arg0, &kwargs);
2347
- if (NIL_P(arg0) || arg0 == sym_auto) {
2348
- nbins = sturges(RARRAY_LEN(ary));
2349
- }
2350
- else {
2351
- nbins = NUM2LONG(arg0);
2352
- }
2353
2483
 
2354
2484
  if (!NIL_P(kwargs)) {
2355
2485
  enum { kw_weights, kw_edges, kw_closed };
@@ -2370,7 +2500,10 @@ ary_histogram(int argc, VALUE *argv, VALUE ary)
2370
2500
  }
2371
2501
 
2372
2502
  if (NIL_P(edges)) {
2373
- edges = ary_histogram_calculate_edge(ary, nbins, left_p);
2503
+ edges = ary_histogram_calculate_edge(ary, arg0, left_p);
2504
+ }
2505
+ else if (! NIL_P(arg0)) {
2506
+ rb_raise(rb_eArgError, "Unable to use both `nbins` and `edges` together");
2374
2507
  }
2375
2508
 
2376
2509
  n_bin_weights = RARRAY_LEN(edges) - 1;
@@ -2395,10 +2528,12 @@ Init_extension(void)
2395
2528
  rb_ext_ractor_safe(true);
2396
2529
  #endif
2397
2530
 
2398
- #ifndef HAVE_ENUM_SUM
2399
- rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
2400
- #endif
2531
+ mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
2401
2532
 
2533
+ id_builtin_sum = rb_intern("__sum__");
2534
+
2535
+ rb_define_alias(rb_mEnumerable, "__sum__", "sum");
2536
+ rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
2402
2537
  rb_define_method(rb_mEnumerable, "mean_variance", enum_mean_variance_m, -1);
2403
2538
  rb_define_method(rb_mEnumerable, "mean", enum_mean, 0);
2404
2539
  rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
@@ -2406,11 +2541,10 @@ Init_extension(void)
2406
2541
  rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
2407
2542
  rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
2408
2543
 
2409
- #ifndef HAVE_ARRAY_SUM
2544
+ rb_define_alias(rb_cArray, "__sum__", "sum");
2410
2545
  rb_define_method(rb_cArray, "sum", ary_sum, -1);
2411
- #endif
2412
2546
  rb_define_method(rb_cArray, "mean_variance", ary_mean_variance_m, -1);
2413
- rb_define_method(rb_cArray, "mean", ary_mean, 0);
2547
+ rb_define_method(rb_cArray, "mean", ary_mean, -1);
2414
2548
  rb_define_method(rb_cArray, "variance", ary_variance, -1);
2415
2549
  rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
2416
2550
  rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
@@ -2423,7 +2557,6 @@ Init_extension(void)
2423
2557
  half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
2424
2558
  rb_gc_register_mark_object(half_in_rational);
2425
2559
 
2426
- mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
2427
2560
  cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
2428
2561
 
2429
2562
  rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
@@ -2449,6 +2582,7 @@ Init_extension(void)
2449
2582
  id_population = rb_intern("population");
2450
2583
  id_closed = rb_intern("closed");
2451
2584
  id_edge = rb_intern("edge");
2585
+ id_skip_na = rb_intern("skip_na");
2452
2586
 
2453
2587
  sym_auto = ID2SYM(rb_intern("auto"));
2454
2588
  sym_left = ID2SYM(rb_intern("left"));
@@ -1,5 +1,5 @@
1
1
  module EnumerableStatistics
2
- VERSION = '2.0.7'
2
+ VERSION = '2.0.9'
3
3
 
4
4
  module Version
5
5
  numbers, TAG = VERSION.split('-', 2)
metadata CHANGED
@@ -1,127 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: enumerable-statistics
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.7
4
+ version: 2.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kenta Murata
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2021-06-24 00:00:00.000000000 Z
12
- dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake-compiler
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: 0.9.8
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: 0.9.8
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '3.4'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '3.4'
69
- - !ruby/object:Gem::Dependency
70
- name: test-unit
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: fuubar
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: yard
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
- - !ruby/object:Gem::Dependency
112
- name: benchmark-driver
113
- requirement: !ruby/object:Gem::Requirement
114
- requirements:
115
- - - ">="
116
- - !ruby/object:Gem::Version
117
- version: '0'
118
- type: :development
119
- prerelease: false
120
- version_requirements: !ruby/object:Gem::Requirement
121
- requirements:
122
- - - ">="
123
- - !ruby/object:Gem::Version
124
- version: '0'
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
125
12
  description: This library provides statistics features for Enumerable
126
13
  email:
127
14
  - mrkn@mrkn.jp
@@ -130,7 +17,9 @@ extensions:
130
17
  - ext/enumerable/statistics/extension/extconf.rb
131
18
  extra_rdoc_files: []
132
19
  files:
20
+ - ".github/dependabot.yml"
133
21
  - ".github/workflows/ci.yml"
22
+ - ".github/workflows/release.yml"
134
23
  - ".gitignore"
135
24
  - ".rspec"
136
25
  - ".yardopts"
@@ -270,10 +159,13 @@ files:
270
159
  - yard/templates/mathjax/tags/text/overload.erb
271
160
  - yard/templates/mathjax/tags/text/see.erb
272
161
  - yard/templates/mathjax/tags/text/tag.erb
273
- homepage: https://github.com/mrkn/enumerable-statistics
162
+ homepage: https://github.com/red-data-tools/enumerable-statistics
274
163
  licenses: []
275
- metadata: {}
276
- post_install_message:
164
+ metadata:
165
+ bug_tracker_uri: https://github.com/red-data-tools/enumerable-statistics/issues
166
+ changelog_uri: https://github.com/red-data-tools/enumerable-statistics/releases/tag/v2.0.9
167
+ homepage_uri: https://github.com/red-data-tools/enumerable-statistics
168
+ source_code_uri: https://github.com/red-data-tools/enumerable-statistics.git
277
169
  rdoc_options: []
278
170
  require_paths:
279
171
  - ext
@@ -289,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
289
181
  - !ruby/object:Gem::Version
290
182
  version: '0'
291
183
  requirements: []
292
- rubygems_version: 3.2.21
293
- signing_key:
184
+ rubygems_version: 4.0.3
294
185
  specification_version: 4
295
186
  summary: Statistics features for Enumerable
296
187
  test_files: []