RubyGems - enumerable-statistics - Versions diffs - 1.0.1 → 2.0.0.pre - Mend

enumerable-statistics 1.0.1 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +5 -5
data/.travis.yml +21 -8
data/.yardopts +1 -0
data/CHANGELOG.md +7 -0
data/README.md +8 -0
data/Rakefile +5 -3
data/bench/array_value_counts.yml +42 -0
data/bench/enum_value_counts.yml +42 -0
data/bench/hash_value_counts.yml +42 -0
data/bench/mean.yml +30 -0
data/bench/sum.yml +29 -0
data/bench/variance.yml +39 -0
data/enumerable-statistics.gemspec +16 -7
data/ext/-bench-/extconf.rb +3 -0
data/ext/enumerable/statistics/extension/extconf.rb +12 -0
data/ext/enumerable/statistics/extension/statistics.c +865 -29
data/lib/enumerable/statistics.rb +1 -1
data/lib/enumerable_statistics.rb +2 -0
data/lib/enumerable_statistics/histogram.rb +5 -0
data/lib/enumerable_statistics/version.rb +9 -0
data/templates/default/layout/html/headers.erb +36 -0
metadata +45 -24
data/bench/mean.rb +0 -27
data/bench/sum.rb +0 -26
data/bench/variance.rb +0 -30
data/lib/enumerable/statistics/version.rb +0 -5

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 01d8583f76df44d84e20ab090a5589e44e33dacf
-  data.tar.gz: c070a1c7b007646ee0769b9e4924c0af0914341d
+SHA256:
+  metadata.gz: d632cae80814e40cb6247d76cdfa74999e30effa4f969935a2deef35937c4f5e
+  data.tar.gz: 85c9ba3067efd94649e01836ebf04cf0ddebc2a69fea06f65a4efd067a9dffe7
 SHA512:
-  metadata.gz: 7b3027aea941fa441b21c53ff8570b712cd576b42d97d9b7e3c421c92ad9c79552860a4f1c3bfd86da5c7f654d5396ab23415a0f861a4290ac1971ced29372e4
-  data.tar.gz: d082b4b5c2dd4c91b40400ccbbcd68859e6811bc14add0416133c8a70d67c97b4af10ae4f53fef4fc73efcc139d7f3d224baa5e7b983c4428880bc671039a935
+  metadata.gz: 1ee5828934ed01b5bc2f3173816bfe24914f36641df6a99b2aff355478d74f13fd36201547776c06d8b4150784176420be8ceea1bbdd2fa9397d6930522caff1
+  data.tar.gz: 7a4a87570189ff29bef98fdeef30f2640eebbea550419229b9ffeb89c9d032fbe72082e6be00cd5fb9a85be2e515d6211b057f5fd1c4834ad6cb8c5153135a5f

data/.travis.yml CHANGED

@@ -1,19 +1,32 @@
 ---
-language: ruby
+notification:
+  email:
+  - mrkn@ruby-lang.org
-rvm:
-  - ruby-head
-  - 2.3.0
-  - 2.2.4
-  - 2.1
+language: ruby
 before_install:
   - gem update --system
-  - gem update bundler
+  - gem install bundler
 install:
   - bundle install
 script:
-  - bundle exec rake clobber compile
+  - bundle exec rake --trace clobber compile
   - bundle exec rake spec
+matrix:
+  include:
+    - name: "2.3"
+      rvm: 2.3
+    - name: "2.4"
+      rvm: 2.4.5
+    - name: "2.5"
+      rvm: 2.5.2
+    - name: "2.6"
+      rvm: 2.6
+    - name: "trunk"
+      rvm: ruby-head
+  allow_failures:
+    - rvm: 2.3

data/.yardopts CHANGED

	@@ -1 +1,2 @@
1 1	--markup markdown
2	+ -p templates

data/CHANGELOG.md CHANGED

@@ -1,3 +1,10 @@
+# 2.0.0-pre
+- Add `value_counts` method in Array, Hash, and Enumerable
+- Add `median` method in Array
+- Add `percentile` method in Array
+- Add `histogram` method in Array
 # 1.0.1
 - Add `mean_variance` method in Array class and Enumerable module

data/README.md CHANGED

@@ -40,6 +40,14 @@ The following methods are supplied by this library:
   - Calculates a mean and a variance simultaneously
 - `Array#mean_stdev`, `Enumerable#mean_stdev`
   - Calculates a mean and a standard deviation simultaneously
+- `Array#median`
+  - Calculates a median of values in an array
+- `Array#percentile(q)`
+  - Calculates a percentile or percentiles of values in an array
+- `Array#value_counts`, `Enumerable#value_counts`, and `Hash#value_counts`
+  - Count how many items for each value in the container
+- `Array#histogram`
+  - Calculate histogram of the values in the array
 Moreover, for Ruby < 2.4, `Array#sum` and `Enumerable#sum` are provided.

data/Rakefile CHANGED

@@ -6,15 +6,17 @@ task :default => :spec
 Rake::ExtensionTask.new('enumerable/statistics/extension')
+directory 'lib/enumerable/statistics'
 RSpec::Core::RakeTask.new(:spec)
 task :bench do
   puts "# sum\n"
-  system('ruby bench/sum.rb')
+  system('benchmark-driver bench/sum.yml')
   puts "# mean\n"
-  system('ruby bench/mean.rb')
+  system('benchmark-driver bench/mean.yml')
   puts "# variance\n"
-  system('ruby bench/variance.rb')
+  system('benchmark-driver bench/variance.yml')
 end

data/bench/array_value_counts.yml ADDED

@@ -0,0 +1,42 @@
+contexts:
+  - name: "1.1.0.dev"
+    gems:
+      enumerable-statistics: "1.1.0.dev"
+    require: false
+    prelude: |-
+      require 'enumerable/statistics'
+  - name: "HEAD"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  chars = ('a'..'m').to_a
+  ary = Array.new(n) { chars.sample }
+benchmark:
+  inject: |-
+    ary.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
+  unsort_keepna: |-
+    ary.value_counts(sort: false, dropna: false)
+  unsort_dropna: |-
+    ary.value_counts(sort: false, dropna: true)
+  sort_keepna: |-
+    ary.value_counts(sort: true, dropna: false)
+  sort_dropna: |-
+    ary.value_counts(sort: true, dropna: true)
+  norm_unsort_keepna: |-
+    ary.value_counts(normalize: true, sort: false, dropna: false)
+  norm_unsort_dropna: |-
+    ary.value_counts(normalize: true, sort: false, dropna: true)
+  norm_sort_keepna: |-
+    ary.value_counts(normalize: true, sort: true, dropna: false)
+  norm_sort_dropna: |-
+    ary.value_counts(normalize: true, sort: true, dropna: true)
+  sort_asc_keepna: |-
+    ary.value_counts(sort: true, ascending: true, dropna: false)
+  sort_asc_dropna: |-
+    ary.value_counts(sort: true, ascending: true, dropna: true)
+  norm_sort_asc_keepna: |-
+    ary.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
+  norm_sort_asc_dropna: |-
+    ary.value_counts(normalize: true, sort: true, ascending: true, dropna: true)

data/bench/enum_value_counts.yml ADDED

@@ -0,0 +1,42 @@
+contexts:
+  - name: "1.1.0.dev"
+    gems:
+      enumerable-statistics: "1.1.0.dev"
+    require: false
+    prelude: |-
+      require 'enumerable/statistics'
+  - name: "HEAD"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  chars = ('a'..'m').to_a
+  enum = Array.new(n) { chars.sample }.each
+benchmark:
+  inject: |-
+    enum.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
+  unsort_keepna: |-
+    enum.value_counts(sort: false, dropna: false)
+  unsort_dropna: |-
+    enum.value_counts(sort: false, dropna: true)
+  sort_keepna: |-
+    enum.value_counts(sort: true, dropna: false)
+  sort_dropna: |-
+    enum.value_counts(sort: true, dropna: true)
+  norm_unsort_keepna: |-
+    enum.value_counts(normalize: true, sort: false, dropna: false)
+  norm_unsort_dropna: |-
+    enum.value_counts(normalize: true, sort: false, dropna: true)
+  norm_sort_keepna: |-
+    enum.value_counts(normalize: true, sort: true, dropna: false)
+  norm_sort_dropna: |-
+    enum.value_counts(normalize: true, sort: true, dropna: true)
+  sort_asc_keepna: |-
+    enum.value_counts(sort: true, ascending: true, dropna: false)
+  sort_asc_dropna: |-
+    enum.value_counts(sort: true, ascending: true, dropna: true)
+  norm_sort_asc_keepna: |-
+    enum.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
+  norm_sort_asc_dropna: |-
+    enum.value_counts(normalize: true, sort: true, ascending: true, dropna: true)

data/bench/hash_value_counts.yml ADDED

@@ -0,0 +1,42 @@
+contexts:
+  - name: "1.1.0.dev"
+    gems:
+      enumerable-statistics: "1.1.0.dev"
+    require: false
+    prelude: |-
+      require 'enumerable/statistics'
+  - name: "HEAD"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  chars = ('a'..'m').to_a
+  hash = Array.new(n) { chars.sample }.each_with_index.to_h
+benchmark:
+  inject: |-
+    hash.inject(Hash.new(0)) { |h, (k, v)| h[v] += 1; h }
+  unsort_keepna: |-
+    hash.value_counts(sort: false, dropna: false)
+  unsort_dropna: |-
+    hash.value_counts(sort: false, dropna: true)
+  sort_keepna: |-
+    hash.value_counts(sort: true, dropna: false)
+  sort_dropna: |-
+    hash.value_counts(sort: true, dropna: true)
+  norm_unsort_keepna: |-
+    hash.value_counts(normalize: true, sort: false, dropna: false)
+  norm_unsort_dropna: |-
+    hash.value_counts(normalize: true, sort: false, dropna: true)
+  norm_sort_keepna: |-
+    hash.value_counts(normalize: true, sort: true, dropna: false)
+  norm_sort_dropna: |-
+    hash.value_counts(normalize: true, sort: true, dropna: true)
+  sort_asc_keepna: |-
+    hash.value_counts(sort: true, ascending: true, dropna: false)
+  sort_asc_dropna: |-
+    hash.value_counts(sort: true, ascending: true, dropna: true)
+  norm_sort_asc_keepna: |-
+    hash.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
+  norm_sort_asc_dropna: |-
+    hash.value_counts(normalize: true, sort: true, ascending: true, dropna: true)

data/bench/mean.yml ADDED

@@ -0,0 +1,30 @@
+contexts:
+  - name: "master"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  ary = Array.new(n) { rand }
+benchmark:
+  inject: mean = ary.inject(:+) / n.to_f
+  while: |-
+    i, mean = 0, 0
+    while i < n
+      mean += ary[i]
+      i += 1
+    end
+    mean /= n.to_f
+  pure_ruby: |-
+    i, f, c = 0, 0.0, 0.0, 0.0, 0.0
+    while i < n
+      x = ary[i]
+      y = x - c
+      t = f + y
+      c = (t - f) - y
+      f = t
+      i += 1
+    end
+    mean = f / n
+  c_ext: mean = ary.mean

data/bench/sum.yml ADDED

@@ -0,0 +1,29 @@
+contexts:
+  - name: "master"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  ary = Array.new(n) { rand }
+benchmark:
+  inject: sum = ary.inject(:+)
+  while: |-
+    i, sum = 0, 0
+    while i < n
+      sum += ary[i]
+      i += 1
+    end
+  pure_ruby: |-
+    i, f, c = 0, 0.0, 0.0, 0.0, 0.0
+    while i < n
+      x = ary[i]
+      y = x - c
+      t = f + y
+      c = (t - f) - y
+      f = t
+      i += 1
+    end
+    sum = f
+  sum: sum = ary.sum

data/bench/variance.yml ADDED

@@ -0,0 +1,39 @@
+contexts:
+  - name: "master"
+    prelude: |-
+      require 'bundler/setup'
+      require 'enumerable/statistics'
+prelude: |-
+  n = 1000
+  ary = Array.new(n) { rand }
+benchmark:
+  inject: |-
+    mean = ary.mean
+    var = ary.inject(0.0) { |sum, x|
+      sum += (x - mean) ** 2
+    } / (n - 1).to_f
+  while: |-
+    mean = ary.mean
+    i, var = 0, 0
+    while i < n
+      var += (ary[i] - mean) ** 2
+      i += 1
+    end
+    var /= n.to_f
+  pure_ruby: |-
+    i, m, m2, f, c = 0, 0.0, 0.0, 0.0, 0.0
+    while i < n
+      x = ary[i]
+      y = x - c
+      t = f + y
+      c = (t - f) - y
+      f = t
+      delta = x - m
+      m += delta / i
+      m2 += delta * (x - m)
+      i += 1
+    end
+    var = m2 / n
+  c_ext: var = ary.variance

data/enumerable-statistics.gemspec CHANGED

@@ -1,11 +1,17 @@
 # coding: utf-8
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
-require 'enumerable/statistics/version'
+require 'enumerable_statistics/version'
 Gem::Specification.new do |spec|
   spec.name          = "enumerable-statistics"
-  spec.version       = Enumerable::Statistics::VERSION
+  spec.version       = [
+                         EnumerableStatistics::Version::MAJOR,
+                         EnumerableStatistics::Version::MINOR,
+                         EnumerableStatistics::Version::MICRO,
+                         EnumerableStatistics::Version::TAG
+                       ].compact.join('.')
   spec.authors       = ["Kenta Murata"]
   spec.email         = ["mrkn@mrkn.jp"]
@@ -19,10 +25,13 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["ext", "lib"]
   spec.extensions    = Dir['ext/**/extconf.rb']
-  spec.add_development_dependency "bundler", "~> 1.11"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "rake-compiler", "~> 0.9.8"
-  spec.add_development_dependency "rspec", "~> 3.4"
+  spec.required_ruby_version = '>= 2.4'
+  spec.add_development_dependency "bundler", ">= 1.17.2"
+  spec.add_development_dependency "rake"
+  spec.add_development_dependency "rake-compiler", ">= 0.9.8"
+  spec.add_development_dependency "rspec", ">= 3.4"
   spec.add_development_dependency "fuubar"
-  spec.add_development_dependency "benchmark-ips"
+  spec.add_development_dependency "yard"
+  spec.add_development_dependency "benchmark-driver"
 end

data/ext/-bench-/extconf.rb ADDED

@@ -0,0 +1,3 @@
+require 'mkmf'
+create_makefile('-bench-')

data/ext/enumerable/statistics/extension/extconf.rb CHANGED

@@ -1,5 +1,17 @@
 require 'mkmf'
 have_type('struct RRational')
+have_func('rb_rational_new')
+have_func('rb_rational_num')
+have_func('rb_rational_den')
+have_func('rb_rational_plus')
 have_type('struct RComplex')
+have_func('rb_complex_raw')
+have_func('rb_complex_real')
+have_func('rb_complex_imag')
+have_func('rb_complex_plus')
+have_func('rb_complex_div')
+have_func('rb_dbl_complex_new')
 create_makefile('enumerable/statistics/extension')

data/ext/enumerable/statistics/extension/statistics.c CHANGED

@@ -1,6 +1,8 @@
 #include <ruby/ruby.h>
+#include <ruby/util.h>
 #include <ruby/version.h>
 #include <assert.h>
+#include <math.h>
 #if RUBY_API_VERSION_CODE >= 20400
 /* for 2.4.0 or higher */
@@ -16,6 +18,12 @@
 # undef HAVE_RB_RATIONAL_PLUS
 #endif
+#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
+# define HAVE_ARITHMETIC_SEQUENCE
+#else
+# undef HAVE_ARITHMETIC_SEQUENCE
+#endif
 #ifndef RB_INTEGER_TYPE_P
 # define RB_INTEGER_TYPE_P(obj) enum_stat_integer_type_p(obj)
 static inline int
@@ -86,8 +94,12 @@ struct RComplex {
 static VALUE half_in_rational;
 static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
-static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp;
-static ID id_each, id_real_p, id_sum, id_population;
+static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
+static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
+static VALUE sym_left, sym_right;
+static VALUE cHistogram;
 inline static VALUE
 f_add(VALUE x, VALUE y)
@@ -131,28 +143,6 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
   return (VALUE)obj;
 }
-static VALUE
-complex_caonicalize_new(VALUE klass, VALUE real, VALUE imag)
-{
-  if (f_real_p(real) && f_real_p(imag))
-    return complex_new(klass, real, imag);
-  else if (f_real_p(imag)) {
-    VALUE new_imag;
-    new_imag = f_add(RCOMPLEX(real)->imag, imag);
-    return complex_new(klass, RCOMPLEX(real)->real, new_imag);
-  }
-  else {
-    VALUE new_real, new_imag;
-    new_real = f_sub(RCOMPLEX(real)->real, RCOMPLEX(imag)->imag);
-    new_imag = f_add(RCOMPLEX(real)->imag, RCOMPLEX(imag)->real);
-    return complex_new(klass, new_real, new_imag);
-  }
-}
 static VALUE
 complex_add(VALUE self, VALUE other)
 {
@@ -623,7 +613,7 @@ rb_rational_plus(VALUE self, VALUE other)
     VALUE num = RRATIONAL(self)->num;
     VALUE den = RRATIONAL(self)->den;
-    return f_addsub(self, num, den, other, ONE, idPLUS);
+    return f_addsub(self, num, den, other, ONE, '+');
   }
   else if (RB_TYPE_P(other, T_FLOAT)) {
     return f_add(f_to_f(self), other);
@@ -852,11 +842,11 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
 static int
 opt_population_p(VALUE opts)
 {
-  ID kwargs = id_population;
   VALUE population = Qfalse;
   if (!NIL_P(opts)) {
 #ifdef HAVE_RB_GET_KWARGS
+    ID kwargs = id_population;
     rb_get_kwargs(opts, &kwargs, 0, 1, &population);
 #else
     VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
@@ -868,7 +858,7 @@ opt_population_p(VALUE opts)
 }
 /* call-seq:
- *    eary.mean_variance(population: false)
+ *    ary.mean_variance(population: false)
  *
  * Calculate a mean and a variance of the values in `ary`.
  * The first element of the result array is the mean, and the second is the variance.
@@ -1148,6 +1138,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
     *count_ptr = memo.count;
 }
+#ifndef HAVE_ENUM_SUM
 /* call-seq:
  *    enum.sum
  *
@@ -1172,10 +1163,11 @@ enum_sum(int argc, VALUE* argv, VALUE obj)
   return sum;
 }
+#endif
 struct enum_mean_variance_memo {
   int block_given;
-  long n;
+  size_t n;
   double m, m2, f, c;
 };
@@ -1229,7 +1221,7 @@ enum_mean_variance_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
 {
   struct enum_mean_variance_memo *memo = (struct enum_mean_variance_memo *)args;
   ENUM_WANT_SVALUE();
-  mean_variance_iter(e, (struct enum_sum_memo *) args);
+  mean_variance_iter(e, memo);
   return Qnil;
 }
@@ -1487,9 +1479,836 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
   return stdev;
 }
+static inline int
+is_na(VALUE v)
+{
+  if (NIL_P(v))
+    return 1;
+  if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
+    return 1;
+  if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
+    return 1;
+  return 0;
+}
+static int
+ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
+{
+  VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
+  VALUE cmp;
+  if (is_na(a)) {
+    return -1;
+  }
+  else if (is_na(b)) {
+    return 1;
+  }
+  /* TODO: optimize */
+  cmp = rb_funcall(a, id_cmp, 1, b);
+  return rb_cmpint(cmp, a, b);
+}
+static VALUE
+ary_percentile_make_sorted(VALUE ary)
+{
+  long n, i;
+  VALUE sorted;
+  n = RARRAY_LEN(ary);
+  sorted = rb_ary_tmp_new(n);
+  for (i = 0; i < n; ++i) {
+    rb_ary_push(sorted, RARRAY_AREF(ary, i));
+  }
+  RARRAY_PTR_USE(sorted, ptr, {
+    ruby_qsort(ptr, n, sizeof(VALUE),
+               ary_percentile_sort_cmp, NULL);
+  });
+  return sorted;
+}
+static inline VALUE
+ary_percentile_single_sorted(VALUE sorted, long n, double d)
+{
+  VALUE x0, x1;
+  double i, f;
+  long l;
+  assert(RB_TYPE_P(sorted, T_ARRAY));
+  assert(n == RARRAY_LEN(sorted));
+  assert(n > 0);
+  if (d < 0 || 100 < d) {
+    rb_raise(rb_eArgError, "percentile out of bounds");
+  }
+  if (is_na(RARRAY_AREF(sorted, 0))) {
+    return DBL2NUM(nan(""));
+  }
+  n = RARRAY_LEN(sorted);
+  if (n == 1) {
+    return RARRAY_AREF(sorted, 0);
+  }
+  d = (n - 1) * d / 100.0;
+  f = modf(d, &i);
+  l = (long)i;
+  x0 = RARRAY_AREF(sorted, l);
+  if (f == 0 || l == n - 1) {
+    return x0;
+  }
+  x0 = rb_funcall(x0, idSTAR, 1, DBL2NUM(1 - f));
+  x1 = RARRAY_AREF(sorted, l + 1);
+  x1 = rb_funcall(x1, idSTAR, 1, DBL2NUM(f));
+  return rb_funcall(x0, idPLUS, 1, x1);
+}
+static VALUE
+ary_percentile_single(VALUE ary, VALUE q)
+{
+  long n;
+  double d;
+  VALUE qf, sorted;
+  assert(RB_TYPE_P(ary, T_ARRAY));
+  n = RARRAY_LEN(ary);
+  assert(n > 0);
+  switch (TYPE(q)) {
+    case T_FIXNUM:
+      d = (double)FIX2LONG(q);
+      break;
+    case T_BIGNUM:
+      d = rb_big2dbl(q);
+      break;
+    case T_RATIONAL:
+      /* fall through */
+    default:
+      qf = NUM2DBL(q);
+      goto float_percentile;
+    case T_FLOAT:
+      qf = q;
+float_percentile:
+      d = RFLOAT_VALUE(qf);
+      break;
+  }
+  if (n == 1) {
+    return RARRAY_AREF(ary, 0);
+  }
+  sorted = ary_percentile_make_sorted(ary);
+  return ary_percentile_single_sorted(sorted, n, d);
+}
+/* call-seq:
+ *    ary.percentile(q) -> float
+ *
+ * Calculate specified percentiles of the values in `ary`.
+ *
+ * @param [Number, Array] percentile or array of percentiles to compute,
+ *   which must be between 0 and 100 inclusive.
+ *
+ * @return [Float, Array] A percentile value(s)
+ */
+static VALUE
+ary_percentile(VALUE ary, VALUE q)
+{
+  long n, m, i;
+  double d;
+  VALUE qf, qs, sorted, res;
+  n = RARRAY_LEN(ary);
+  if (n == 0) {
+    rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
+  }
+  qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
+  if (NIL_P(qs)) {
+    return ary_percentile_single(ary, q);
+  }
+  m = RARRAY_LEN(qs);
+  res = rb_ary_new_capa(m);
+  if (m == 1) {
+    q = RARRAY_AREF(qs, 0);
+    rb_ary_push(res, ary_percentile_single(ary, q));
+  }
+  else {
+    sorted = ary_percentile_make_sorted(ary);
+    for (i = 0; i < m; ++i) {
+      VALUE x;
+      q = RARRAY_AREF(qs, i);
+      switch (TYPE(q)) {
+        case T_FIXNUM:
+          d = (double)FIX2LONG(q);
+          break;
+        case T_BIGNUM:
+          d = rb_big2dbl(q);
+          break;
+        case T_RATIONAL:
+          /* fall through */
+        default:
+          qf = NUM2DBL(q);
+          goto float_percentile;
+        case T_FLOAT:
+          qf = q;
+float_percentile:
+          d = RFLOAT_VALUE(qf);
+          break;
+      }
+      x = ary_percentile_single_sorted(sorted, n, d);
+      rb_ary_push(res, x);
+    }
+  }
+  return res;
+}
+/* call-seq:
+ *    ary.median -> float
+ *
+ * Calculate a median of the values in `ary`.
+ *
+ * @return [Float] A median value
+ */
+static VALUE
+ary_median(VALUE ary)
+{
+  long n;
+  VALUE sorted, a0, a1;
+  n = RARRAY_LEN(ary);
+  switch (n) {
+    case 0:
+      goto return_nan;
+    case 1:
+      return RARRAY_AREF(ary, 0);
+    case 2:
+      a0 = RARRAY_AREF(ary, 0);
+      a1 = RARRAY_AREF(ary, 1);
+      goto mean_two;
+    default:
+      break;
+  }
+  sorted = ary_percentile_make_sorted(ary);
+  a0 = RARRAY_AREF(sorted, 0);
+  if (is_na(a0)) {
+return_nan:
+    return DBL2NUM(nan(""));
+  }
+  a1 = RARRAY_AREF(sorted, n / 2);
+  if (n % 2 == 1) {
+    return a1;
+  }
+  else {
+    a0 = RARRAY_AREF(sorted, n / 2 - 1);
+mean_two:
+    a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
+    if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
+      double d = NUM2DBL(a0);
+      return DBL2NUM(d / 2.0);
+    }
+    return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
+  }
+}
+struct value_counts_opts {
+  int normalize_p;
+  int sort_p;
+  int ascending_p;
+  int dropna_p;
+};
+static inline void
+value_counts_extract_opts(VALUE kwargs, struct value_counts_opts *opts)
+{
+  assert(opts != NULL);
+  /* default values */
+  opts->normalize_p = 0;
+  opts->sort_p = 1;
+  opts->ascending_p = 0;
+  opts->dropna_p = 1;
+  if (!NIL_P(kwargs)) {
+    enum { kw_normalize, kw_sort, kw_ascending, kw_dropna };
+    static ID kwarg_keys[4];
+    VALUE kwarg_vals[4];
+    if (!kwarg_keys[0]) {
+      kwarg_keys[kw_normalize] = rb_intern("normalize");
+      kwarg_keys[kw_sort]      = rb_intern("sort");
+      kwarg_keys[kw_ascending] = rb_intern("ascending");
+      kwarg_keys[kw_dropna]    = rb_intern("dropna");
+    }
+    rb_get_kwargs(kwargs, kwarg_keys, 0, 4, kwarg_vals);
+    opts->normalize_p = (kwarg_vals[kw_normalize] != Qundef) && RTEST(kwarg_vals[kw_normalize]);
+    opts->sort_p      = (kwarg_vals[kw_sort]      != Qundef) && RTEST(kwarg_vals[kw_sort]);
+    opts->ascending_p = (kwarg_vals[kw_ascending] != Qundef) && RTEST(kwarg_vals[kw_ascending]);
+    opts->dropna_p    = (kwarg_vals[kw_dropna]    != Qundef) && RTEST(kwarg_vals[kw_dropna]);
+  }
+}
+static int
+value_counts_result_to_assoc_array_i(VALUE key, VALUE val, VALUE ary)
+{
+  VALUE assoc = rb_ary_tmp_new(2);
+  rb_ary_push(assoc, key);
+  rb_ary_push(assoc, val);
+  rb_ary_push(ary, assoc);
+  return ST_CONTINUE;
+}
+static int
+value_counts_sort_cmp_asc(const void *ap, const void *bp, void *dummy)
+{
+  VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
+  VALUE av, bv, cmp;
+  av = RARRAY_AREF(a, 1);
+  bv = RARRAY_AREF(b, 1);
+  /* TODO: optimize */
+  cmp = rb_funcall(av, id_cmp, 1, bv);
+  return rb_cmpint(cmp, av, bv);
+}
+static int
+value_counts_sort_cmp_desc(const void *ap, const void *bp, void *dummy)
+{
+  VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
+  VALUE av, bv, cmp;
+  av = RARRAY_AREF(a, 1);
+  bv = RARRAY_AREF(b, 1);
+  /* TODO: optimize */
+  cmp = rb_funcall(bv, id_cmp, 1, av);
+  return rb_cmpint(cmp, bv, av);
+}
+static VALUE
+value_counts_sort_result(VALUE result, const int dropna_p, const int ascending_p)
+{
+  VALUE na_count = Qundef, ary, sorted;
+  long i;
+  if (RHASH_SIZE(result) < 1) {
+    return result;
+  }
+  if (!dropna_p) {
+    na_count = rb_hash_lookup2(result, Qnil, Qundef);
+    if (na_count != Qundef) {
+      rb_hash_delete(result, Qnil);
+    }
+  }
+  const long len = (long)RHASH_SIZE(result);
+  ary = rb_ary_tmp_new(len);
+  rb_hash_foreach(result, value_counts_result_to_assoc_array_i, ary);
+  if (ascending_p) {
+    RARRAY_PTR_USE(ary, ptr, {
+      ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
+                 value_counts_sort_cmp_asc, NULL);
+    });
+  }
+  else {
+    RARRAY_PTR_USE(ary, ptr, {
+      ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
+                 value_counts_sort_cmp_desc, NULL);
+    });
+  }
+#ifdef HAVE_RB_HASH_NEW_WITH_SIZE
+  sorted = rb_hash_new_with_size(len);
+#else
+  sorted = rb_hash_new();
+#endif
+  if (na_count != Qundef && ascending_p) {
+    rb_hash_aset(sorted, Qnil, na_count);
+  }
+  for (i = 0; i < len; ++i) {
+    VALUE a = RARRAY_AREF(ary, i);
+    VALUE k = RARRAY_AREF(a, 0);
+    VALUE v = RARRAY_AREF(a, 1);
+    rb_hash_aset(sorted, k, v);
+  }
+  if (na_count != Qundef && !ascending_p) {
+    rb_hash_aset(sorted, Qnil, na_count);
+  }
+  return sorted;
+}
+struct value_counts_normalize_params {
+  VALUE result;
+  long total;
+};
+static int
+value_counts_normalize_i(VALUE key, VALUE val, VALUE arg)
+{
+  struct value_counts_normalize_params *params = (struct value_counts_normalize_params *)arg;
+  double new_val;
+  new_val = NUM2DBL(val) / params->total;
+  rb_hash_aset(params->result, key, DBL2NUM(new_val));
+  return ST_CONTINUE;
+}
+struct value_counts_memo {
+  int dropna_p;
+  long total;
+  long na_count;
+  VALUE result;
+};
+static VALUE
+any_value_counts(int argc, VALUE *argv, VALUE obj,
+                 void (* counter)(VALUE, struct value_counts_memo *))
+{
+  VALUE kwargs;
+  struct value_counts_opts opts;
+  struct value_counts_memo memo;
+  rb_scan_args(argc, argv, ":", &kwargs);
+  value_counts_extract_opts(kwargs, &opts);
+  memo.result = rb_hash_new();
+  memo.total = 0;
+  memo.na_count = 0;
+  memo.dropna_p = opts.dropna_p;
+  if (!opts.dropna_p) {
+    rb_hash_aset(memo.result, Qnil, INT2FIX(0)); // reserve the room for NA
+  }
+  counter(obj, &memo);
+  if (!opts.dropna_p) {
+    if (memo.na_count == 0)
+      rb_hash_delete(memo.result, Qnil);
+    else
+      rb_hash_aset(memo.result, Qnil, LONG2NUM(memo.na_count));
+  }
+  if (opts.sort_p) {
+    memo.result = value_counts_sort_result(memo.result, opts.dropna_p, opts.ascending_p);
+  }
+  if (opts.normalize_p) {
+    struct value_counts_normalize_params params;
+    params.result = memo.result;
+    params.total = memo.total - (opts.dropna_p ? memo.na_count : 0);
+    rb_hash_foreach(memo.result, value_counts_normalize_i, (VALUE)&params);
+  }
+  return memo.result;
+}
+static VALUE
+enum_value_counts_without_sort_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
+{
+  struct value_counts_memo *memo = (struct value_counts_memo *)args;
+  ENUM_WANT_SVALUE();
+  if (is_na(e)) {
+    ++memo->na_count;
+  }
+  else {
+    VALUE cnt = rb_hash_lookup2(memo->result, e, INT2FIX(0));
+    rb_hash_aset(memo->result, e, rb_int_plus(cnt, INT2FIX(1)));
+  }
+  ++memo->total;
+  return Qnil;
+}
+static void
+enum_value_counts_without_sort(VALUE obj, struct value_counts_memo *memo)
+{
+  rb_block_call(obj, id_each, 0, 0, enum_value_counts_without_sort_i, (VALUE)memo);
+}
+static VALUE
+enum_value_counts(int argc, VALUE* argv, VALUE obj)
+{
+  return any_value_counts(argc, argv, obj, enum_value_counts_without_sort);
+}
+static void
+ary_value_counts_without_sort(VALUE ary, struct value_counts_memo *memo)
+{
+  const VALUE zero = INT2FIX(0);
+  const VALUE one = INT2FIX(1);
+  long i, na_count = 0;
+  long const n = RARRAY_LEN(ary);
+  for (i = 0; i < n; ++i) {
+    VALUE val = RARRAY_AREF(ary, i);
+    if (is_na(val)) {
+      ++na_count;
+    }
+    else {
+      VALUE cnt = rb_hash_lookup2(memo->result, val, zero);
+      rb_hash_aset(memo->result, val, rb_int_plus(cnt, one));
+    }
+  }
+  memo->total = n;
+  memo->na_count = na_count;
+}
+/* call-seq:
+ *    ary.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
+ *
+ * Returns a hash that contains the counts of values in `ary`.
+ *
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
+ * as the same thing, and stores the count of them as the value for `nil`.
+ *
+ * @param [false,true] normalize  If `true`, the result contains the relative
+ *                                frequencies of the unique values.
+ * @param [true,false] sort  Sort by values.
+ * @param [false,true] ascending  Sort in ascending order.
+ * @param [true,false] dropna  Don't include counts of NAs.
+ *
+ * @return [Hash] A hash consists of the counts of the values
+ */
+static VALUE
+ary_value_counts(int argc, VALUE* argv, VALUE ary)
+{
+  return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
+}
+static int
+hash_value_counts_without_sort_i(VALUE key, VALUE val, VALUE arg)
+{
+  struct value_counts_memo *memo = (struct value_counts_memo *)arg;
+  if (is_na(val)) {
+    ++memo->na_count;
+    if (memo->dropna_p) {
+      return ST_CONTINUE;
+    }
+  }
+  else {
+    VALUE cnt = rb_hash_lookup2(memo->result, val, INT2FIX(0));
+    rb_hash_aset(memo->result, val, rb_int_plus(cnt, INT2FIX(1)));
+  }
+  return ST_CONTINUE;
+}
+static void
+hash_value_counts_without_sort(VALUE hash, struct value_counts_memo *memo)
+{
+  rb_hash_foreach(hash, hash_value_counts_without_sort_i, (VALUE)memo);
+  memo->total = RHASH_SIZE(hash);
+}
+/* call-seq:
+ *    hash.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
+ *
+ * Returns a hash that contains the counts of values in `hash`.
+ *
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
+ * as the same thing, and stores the count of them as the value for `nil`.
+ *
+ * @param [false,true] normalize  If `true`, the result contains the relative
+ *                                frequencies of the unique values.
+ * @param [true,false] sort  Sort by values.
+ * @param [false,true] ascending  Sort in ascending order.
+ * @param [true,false] dropna  Don't include counts of NAs.
+ *
+ * @return [Hash] A hash consists of the counts of the values
+ */
+static VALUE
+hash_value_counts(int argc, VALUE* argv, VALUE hash)
+{
+  return any_value_counts(argc, argv, hash, hash_value_counts_without_sort);
+}
+static long
+histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
+{
+  double x, y;
+  long lo, hi, mid;
+  x = NUM2DBL(rb_x);
+  lo = -1;
+  hi = RARRAY_LEN(edge);
+  if (left_p) {
+    while (hi - lo > 1) {
+      mid = lo + (hi - lo)/2;
+      y = NUM2DBL(RARRAY_AREF(edge, mid));
+      if (y <= x) {
+        lo = mid;
+      }
+      else {
+        hi = mid;
+      }
+    }
+    return lo;
+  }
+  else {
+    while (hi - lo > 1) {
+      mid = lo + (hi - lo)/2;
+      y = NUM2DBL(RARRAY_AREF(edge, mid));
+      if (y < x) {
+        lo = mid;
+      }
+      else {
+        hi = mid;
+      }
+    }
+    return hi - 1;
+  }
+}
+static void
+histogram_weights_push_values(VALUE weights, VALUE edge, VALUE values, int left_p)
+{
+  VALUE x, cur;
+  long i, n, bi;
+  n = RARRAY_LEN(values);
+  for (i = 0; i < n; ++i) {
+    x = RARRAY_AREF(values, i);
+    bi = histogram_edge_bin_index(edge, x, left_p);
+    cur = rb_ary_entry(weights, bi);
+    if (NIL_P(cur)) {
+      cur = INT2FIX(1);
+    }
+    else {
+      cur = rb_funcall(cur, idPLUS, 1, INT2FIX(1));
+    }
+    rb_ary_store(weights, bi, cur);
+  }
+}
+static int
+opt_closed_left_p(VALUE opts)
+{
+  int left_p = 1;
+  if (!NIL_P(opts)) {
+    VALUE closed;
+#ifdef HAVE_RB_GET_KWARGS
+    ID kwargs = id_closed;
+    rb_get_kwargs(opts, &kwargs, 0, 1, &closed);
+#else
+    closed = rb_hash_lookup2(opts, ID2SYM(id_closed), sym_left);
+#endif
+    left_p = (closed != sym_right);
+    if (left_p && closed != sym_left) {
+      rb_raise(rb_eArgError, "invalid value for :closed keyword "
+               "(%"PRIsVALUE" for :left or :right)", closed);
+    }
+  }
+  return left_p;
+}
+static inline long
+sturges(long n)
+{
+  if (n == 0) return 1L;
+  return (long)(ceil(log2(n)) + 1);
+}
+static VALUE
+ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long nbins, const int left_p)
+{
+  VALUE edge;
+  double bw, lbw, start, step, divisor, r;
+  long i, len;
+  if (hi == lo) {
+    start = hi;
+    step = 1;
+    divisor = 1;
+    len = 1;
+  }
+  else {
+    bw = (hi - lo) / nbins;
+    lbw = log10(bw);
+    if (lbw >= 0) {
+      step = pow(10, floor(lbw));
+      r = bw / step;
+      if (r <= 1.1) {
+        /* do nothing */
+      }
+      else if (r <= 2.2) {
+        step *= 2;
+      }
+      else if (r <= 5.5) {
+        step *= 5;
+      }
+      else {
+        step *= 10;
+      }
+      divisor = 1.0;
+      start = step * floor(lo / step);
+      len = (long)ceil((hi - start) / step);
+    }
+    else {
+      divisor = pow(10, -floor(lbw));
+      r = bw * divisor;
+      if (r <= 1.1) {
+        /* do nothing */
+      }
+      else if (r <= 2.2) {
+        divisor /= 2;
+      }
+      else if (r <= 5.5) {
+        divisor /= 5;
+      }
+      else {
+        divisor /= 10;
+      }
+      step = 1.0;
+      start = floor(lo * divisor);
+      len = (long)ceil(hi * divisor - start);
+    }
+  }
+  if (left_p) {
+    while (lo < start/divisor) {
+      start -= step;
+    }
+    while ((start + (len - 1)*step)/divisor <= hi) {
+      ++len;
+    }
+  }
+  else {
+    while (lo <= start/divisor) {
+      start -= step;
+    }
+    while ((start + (len - 1)*step)/divisor < hi) {
+      ++len;
+    }
+  }
+  edge = rb_ary_new_capa(len);
+  for (i = 0; i < len; ++i) {
+    rb_ary_push(edge, DBL2NUM(start/divisor));
+    start += step;
+  }
+  return edge;
+}
+static VALUE
+ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
+{
+  long n;
+  VALUE minmax;
+  VALUE edge = Qnil;
+  double lo, hi;
+  Check_Type(ary, T_ARRAY);
+  n = RARRAY_LEN(ary);
+  if (n == 0 && nbins < 0) {
+    rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
+  }
+  else if (n > 0 && nbins < 1) {
+    rb_raise(rb_eArgError, "nbins must be >= 1 for a non-empty array, got %ld", nbins);
+  }
+  else if (n == 0) {
+    edge = rb_ary_new_capa(1);
+    rb_ary_push(edge, DBL2NUM(0.0));
+    return edge;
+  }
+  minmax = rb_funcall(ary, rb_intern("minmax"), 0);
+  lo = NUM2DBL(RARRAY_AREF(minmax, 0));
+  hi = NUM2DBL(RARRAY_AREF(minmax, 1));
+  edge = ary_histogram_calculate_edge_lo_hi(lo, hi, nbins, left_p);
+  return edge;
+}
+/* call-seq:
+ *    ary.histogram(nbins=:auto, closed: :left)
+ *
+ * @param [Integer] nbins  The approximate number of bins
+ * @param [:left, :right] closed
+ *   If :left (the default), the bin interval are left-closed.
+ *   If :right, the bin interval are right-closed.
+ *
+ * @return [EnumerableStatistics::Histogram] The histogram struct.
+ */
+static VALUE
+ary_histogram(int argc, VALUE *argv, VALUE ary)
+{
+  VALUE arg0, opts, edge, weights;
+  int left_p;
+  long nbins;
+  rb_scan_args(argc, argv, "01:", &arg0, &opts);
+  if (NIL_P(arg0)) {
+    nbins = sturges(RARRAY_LEN(ary));
+  }
+  else {
+    nbins = NUM2LONG(arg0);
+  }
+  left_p = opt_closed_left_p(opts);
+  edge = ary_histogram_calculate_edge(ary, nbins, left_p);
+  weights = rb_ary_new_capa(RARRAY_LEN(edge) - 1);
+  histogram_weights_push_values(weights, edge, ary, left_p);
+  return rb_struct_new(cHistogram, edge, weights,
+                       left_p ? sym_left : sym_right,
+                       Qfalse);
+}
 void
 Init_extension(void)
 {
+  VALUE mEnumerableStatistics;
 #ifndef HAVE_ENUM_SUM
   rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
 #endif
@@ -1499,6 +2318,7 @@ Init_extension(void)
   rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
   rb_define_method(rb_mEnumerable, "mean_stdev", enum_mean_stdev, -1);
   rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
+  rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
 #ifndef HAVE_ARRAY_SUM
   rb_define_method(rb_cArray, "sum", ary_sum, -1);
@@ -1508,10 +2328,20 @@ Init_extension(void)
   rb_define_method(rb_cArray, "variance", ary_variance, -1);
   rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
   rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
+  rb_define_method(rb_cArray, "percentile", ary_percentile, 1);
+  rb_define_method(rb_cArray, "median", ary_median, 0);
+  rb_define_method(rb_cArray, "value_counts", ary_value_counts, -1);
+  rb_define_method(rb_cHash, "value_counts", hash_value_counts, -1);
   half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
   rb_gc_register_mark_object(half_in_rational);
+  mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
+  cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
+  rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
   idPLUS = '+';
   idMINUS = '-';
   idSTAR = '*';
@@ -1523,8 +2353,14 @@ Init_extension(void)
   id_negate = rb_intern("-@");
   id_to_f = rb_intern("to_f");
   id_cmp = rb_intern("<=>");
+  id_nan_p = rb_intern("nan?");
   id_each = rb_intern("each");
   id_real_p = rb_intern("real?");
   id_sum = rb_intern("sum");
   id_population = rb_intern("population");
+  id_closed = rb_intern("closed");
+  id_edge = rb_intern("edge");
+  sym_left = ID2SYM(rb_intern("left"));
+  sym_right = ID2SYM(rb_intern("right"));
 }