enumerable-statistics 1.0.1 → 2.0.0.pre

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 01d8583f76df44d84e20ab090a5589e44e33dacf
4
- data.tar.gz: c070a1c7b007646ee0769b9e4924c0af0914341d
2
+ SHA256:
3
+ metadata.gz: d632cae80814e40cb6247d76cdfa74999e30effa4f969935a2deef35937c4f5e
4
+ data.tar.gz: 85c9ba3067efd94649e01836ebf04cf0ddebc2a69fea06f65a4efd067a9dffe7
5
5
  SHA512:
6
- metadata.gz: 7b3027aea941fa441b21c53ff8570b712cd576b42d97d9b7e3c421c92ad9c79552860a4f1c3bfd86da5c7f654d5396ab23415a0f861a4290ac1971ced29372e4
7
- data.tar.gz: d082b4b5c2dd4c91b40400ccbbcd68859e6811bc14add0416133c8a70d67c97b4af10ae4f53fef4fc73efcc139d7f3d224baa5e7b983c4428880bc671039a935
6
+ metadata.gz: 1ee5828934ed01b5bc2f3173816bfe24914f36641df6a99b2aff355478d74f13fd36201547776c06d8b4150784176420be8ceea1bbdd2fa9397d6930522caff1
7
+ data.tar.gz: 7a4a87570189ff29bef98fdeef30f2640eebbea550419229b9ffeb89c9d032fbe72082e6be00cd5fb9a85be2e515d6211b057f5fd1c4834ad6cb8c5153135a5f
@@ -1,19 +1,32 @@
1
1
  ---
2
- language: ruby
2
+ notification:
3
+ email:
4
+ - mrkn@ruby-lang.org
3
5
 
4
- rvm:
5
- - ruby-head
6
- - 2.3.0
7
- - 2.2.4
8
- - 2.1
6
+ language: ruby
9
7
 
10
8
  before_install:
11
9
  - gem update --system
12
- - gem update bundler
10
+ - gem install bundler
13
11
 
14
12
  install:
15
13
  - bundle install
16
14
 
17
15
  script:
18
- - bundle exec rake clobber compile
16
+ - bundle exec rake --trace clobber compile
19
17
  - bundle exec rake spec
18
+
19
+ matrix:
20
+ include:
21
+ - name: "2.3"
22
+ rvm: 2.3
23
+ - name: "2.4"
24
+ rvm: 2.4.5
25
+ - name: "2.5"
26
+ rvm: 2.5.2
27
+ - name: "2.6"
28
+ rvm: 2.6
29
+ - name: "trunk"
30
+ rvm: ruby-head
31
+ allow_failures:
32
+ - rvm: 2.3
data/.yardopts CHANGED
@@ -1 +1,2 @@
1
1
  --markup markdown
2
+ -p templates
@@ -1,3 +1,10 @@
1
+ # 2.0.0-pre
2
+
3
+ - Add `value_counts` method in Array, Hash, and Enumerable
4
+ - Add `median` method in Array
5
+ - Add `percentile` method in Array
6
+ - Add `histogram` method in Array
7
+
1
8
  # 1.0.1
2
9
 
3
10
  - Add `mean_variance` method in Array class and Enumerable module
data/README.md CHANGED
@@ -40,6 +40,14 @@ The following methods are supplied by this library:
40
40
  - Calculates a mean and a variance simultaneously
41
41
  - `Array#mean_stdev`, `Enumerable#mean_stdev`
42
42
  - Calculates a mean and a standard deviation simultaneously
43
+ - `Array#median`
44
+ - Calculates a median of values in an array
45
+ - `Array#percentile(q)`
46
+ - Calculates a percentile or percentiles of values in an array
47
+ - `Array#value_counts`, `Enumerable#value_counts`, and `Hash#value_counts`
48
+ - Count how many items for each value in the container
49
+ - `Array#histogram`
50
+ - Calculate histogram of the values in the array
43
51
 
44
52
  Moreover, for Ruby < 2.4, `Array#sum` and `Enumerable#sum` are provided.
45
53
 
data/Rakefile CHANGED
@@ -6,15 +6,17 @@ task :default => :spec
6
6
 
7
7
  Rake::ExtensionTask.new('enumerable/statistics/extension')
8
8
 
9
+ directory 'lib/enumerable/statistics'
10
+
9
11
  RSpec::Core::RakeTask.new(:spec)
10
12
 
11
13
  task :bench do
12
14
  puts "# sum\n"
13
- system('ruby bench/sum.rb')
15
+ system('benchmark-driver bench/sum.yml')
14
16
 
15
17
  puts "# mean\n"
16
- system('ruby bench/mean.rb')
18
+ system('benchmark-driver bench/mean.yml')
17
19
 
18
20
  puts "# variance\n"
19
- system('ruby bench/variance.rb')
21
+ system('benchmark-driver bench/variance.yml')
20
22
  end
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ ary = Array.new(n) { chars.sample }
16
+ benchmark:
17
+ inject: |-
18
+ ary.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
19
+ unsort_keepna: |-
20
+ ary.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ ary.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ ary.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ ary.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ ary.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ ary.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ ary.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ ary.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ ary.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ ary.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ ary.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ ary.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ enum = Array.new(n) { chars.sample }.each
16
+ benchmark:
17
+ inject: |-
18
+ enum.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
19
+ unsort_keepna: |-
20
+ enum.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ enum.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ enum.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ enum.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ enum.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ enum.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ enum.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ enum.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ enum.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ enum.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ enum.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ enum.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ hash = Array.new(n) { chars.sample }.each_with_index.to_h
16
+ benchmark:
17
+ inject: |-
18
+ hash.inject(Hash.new(0)) { |h, (k, v)| h[v] += 1; h }
19
+ unsort_keepna: |-
20
+ hash.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ hash.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ hash.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ hash.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ hash.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ hash.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ hash.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ hash.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ hash.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ hash.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ hash.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ hash.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,30 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: mean = ary.inject(:+) / n.to_f
11
+ while: |-
12
+ i, mean = 0, 0
13
+ while i < n
14
+ mean += ary[i]
15
+ i += 1
16
+ end
17
+ mean /= n.to_f
18
+ pure_ruby: |-
19
+ i, f, c = 0, 0.0, 0.0, 0.0, 0.0
20
+ while i < n
21
+ x = ary[i]
22
+ y = x - c
23
+ t = f + y
24
+ c = (t - f) - y
25
+ f = t
26
+
27
+ i += 1
28
+ end
29
+ mean = f / n
30
+ c_ext: mean = ary.mean
@@ -0,0 +1,29 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: sum = ary.inject(:+)
11
+ while: |-
12
+ i, sum = 0, 0
13
+ while i < n
14
+ sum += ary[i]
15
+ i += 1
16
+ end
17
+ pure_ruby: |-
18
+ i, f, c = 0, 0.0, 0.0, 0.0, 0.0
19
+ while i < n
20
+ x = ary[i]
21
+ y = x - c
22
+ t = f + y
23
+ c = (t - f) - y
24
+ f = t
25
+
26
+ i += 1
27
+ end
28
+ sum = f
29
+ sum: sum = ary.sum
@@ -0,0 +1,39 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: |-
11
+ mean = ary.mean
12
+ var = ary.inject(0.0) { |sum, x|
13
+ sum += (x - mean) ** 2
14
+ } / (n - 1).to_f
15
+ while: |-
16
+ mean = ary.mean
17
+ i, var = 0, 0
18
+ while i < n
19
+ var += (ary[i] - mean) ** 2
20
+ i += 1
21
+ end
22
+ var /= n.to_f
23
+ pure_ruby: |-
24
+ i, m, m2, f, c = 0, 0.0, 0.0, 0.0, 0.0
25
+ while i < n
26
+ x = ary[i]
27
+ y = x - c
28
+ t = f + y
29
+ c = (t - f) - y
30
+ f = t
31
+
32
+ delta = x - m
33
+ m += delta / i
34
+ m2 += delta * (x - m)
35
+
36
+ i += 1
37
+ end
38
+ var = m2 / n
39
+ c_ext: var = ary.variance
@@ -1,11 +1,17 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'enumerable/statistics/version'
4
+
5
+ require 'enumerable_statistics/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
8
  spec.name = "enumerable-statistics"
8
- spec.version = Enumerable::Statistics::VERSION
9
+ spec.version = [
10
+ EnumerableStatistics::Version::MAJOR,
11
+ EnumerableStatistics::Version::MINOR,
12
+ EnumerableStatistics::Version::MICRO,
13
+ EnumerableStatistics::Version::TAG
14
+ ].compact.join('.')
9
15
  spec.authors = ["Kenta Murata"]
10
16
  spec.email = ["mrkn@mrkn.jp"]
11
17
 
@@ -19,10 +25,13 @@ Gem::Specification.new do |spec|
19
25
  spec.require_paths = ["ext", "lib"]
20
26
  spec.extensions = Dir['ext/**/extconf.rb']
21
27
 
22
- spec.add_development_dependency "bundler", "~> 1.11"
23
- spec.add_development_dependency "rake", "~> 10.0"
24
- spec.add_development_dependency "rake-compiler", "~> 0.9.8"
25
- spec.add_development_dependency "rspec", "~> 3.4"
28
+ spec.required_ruby_version = '>= 2.4'
29
+
30
+ spec.add_development_dependency "bundler", ">= 1.17.2"
31
+ spec.add_development_dependency "rake"
32
+ spec.add_development_dependency "rake-compiler", ">= 0.9.8"
33
+ spec.add_development_dependency "rspec", ">= 3.4"
26
34
  spec.add_development_dependency "fuubar"
27
- spec.add_development_dependency "benchmark-ips"
35
+ spec.add_development_dependency "yard"
36
+ spec.add_development_dependency "benchmark-driver"
28
37
  end
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('-bench-')
@@ -1,5 +1,17 @@
1
1
  require 'mkmf'
2
2
 
3
3
  have_type('struct RRational')
4
+ have_func('rb_rational_new')
5
+ have_func('rb_rational_num')
6
+ have_func('rb_rational_den')
7
+ have_func('rb_rational_plus')
8
+
4
9
  have_type('struct RComplex')
10
+ have_func('rb_complex_raw')
11
+ have_func('rb_complex_real')
12
+ have_func('rb_complex_imag')
13
+ have_func('rb_complex_plus')
14
+ have_func('rb_complex_div')
15
+ have_func('rb_dbl_complex_new')
16
+
5
17
  create_makefile('enumerable/statistics/extension')
@@ -1,6 +1,8 @@
1
1
  #include <ruby/ruby.h>
2
+ #include <ruby/util.h>
2
3
  #include <ruby/version.h>
3
4
  #include <assert.h>
5
+ #include <math.h>
4
6
 
5
7
  #if RUBY_API_VERSION_CODE >= 20400
6
8
  /* for 2.4.0 or higher */
@@ -16,6 +18,12 @@
16
18
  # undef HAVE_RB_RATIONAL_PLUS
17
19
  #endif
18
20
 
21
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
22
+ # define HAVE_ARITHMETIC_SEQUENCE
23
+ #else
24
+ # undef HAVE_ARITHMETIC_SEQUENCE
25
+ #endif
26
+
19
27
  #ifndef RB_INTEGER_TYPE_P
20
28
  # define RB_INTEGER_TYPE_P(obj) enum_stat_integer_type_p(obj)
21
29
  static inline int
@@ -86,8 +94,12 @@ struct RComplex {
86
94
  static VALUE half_in_rational;
87
95
 
88
96
  static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
89
- static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp;
90
- static ID id_each, id_real_p, id_sum, id_population;
97
+ static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
98
+ static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
99
+
100
+ static VALUE sym_left, sym_right;
101
+
102
+ static VALUE cHistogram;
91
103
 
92
104
  inline static VALUE
93
105
  f_add(VALUE x, VALUE y)
@@ -131,28 +143,6 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
131
143
  return (VALUE)obj;
132
144
  }
133
145
 
134
- static VALUE
135
- complex_caonicalize_new(VALUE klass, VALUE real, VALUE imag)
136
- {
137
- if (f_real_p(real) && f_real_p(imag))
138
- return complex_new(klass, real, imag);
139
- else if (f_real_p(imag)) {
140
- VALUE new_imag;
141
-
142
- new_imag = f_add(RCOMPLEX(real)->imag, imag);
143
-
144
- return complex_new(klass, RCOMPLEX(real)->real, new_imag);
145
- }
146
- else {
147
- VALUE new_real, new_imag;
148
-
149
- new_real = f_sub(RCOMPLEX(real)->real, RCOMPLEX(imag)->imag);
150
- new_imag = f_add(RCOMPLEX(real)->imag, RCOMPLEX(imag)->real);
151
-
152
- return complex_new(klass, new_real, new_imag);
153
- }
154
- }
155
-
156
146
  static VALUE
157
147
  complex_add(VALUE self, VALUE other)
158
148
  {
@@ -623,7 +613,7 @@ rb_rational_plus(VALUE self, VALUE other)
623
613
  VALUE num = RRATIONAL(self)->num;
624
614
  VALUE den = RRATIONAL(self)->den;
625
615
 
626
- return f_addsub(self, num, den, other, ONE, idPLUS);
616
+ return f_addsub(self, num, den, other, ONE, '+');
627
617
  }
628
618
  else if (RB_TYPE_P(other, T_FLOAT)) {
629
619
  return f_add(f_to_f(self), other);
@@ -852,11 +842,11 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
852
842
  static int
853
843
  opt_population_p(VALUE opts)
854
844
  {
855
- ID kwargs = id_population;
856
845
  VALUE population = Qfalse;
857
846
 
858
847
  if (!NIL_P(opts)) {
859
848
  #ifdef HAVE_RB_GET_KWARGS
849
+ ID kwargs = id_population;
860
850
  rb_get_kwargs(opts, &kwargs, 0, 1, &population);
861
851
  #else
862
852
  VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
@@ -868,7 +858,7 @@ opt_population_p(VALUE opts)
868
858
  }
869
859
 
870
860
  /* call-seq:
871
- * eary.mean_variance(population: false)
861
+ * ary.mean_variance(population: false)
872
862
  *
873
863
  * Calculate a mean and a variance of the values in `ary`.
874
864
  * The first element of the result array is the mean, and the second is the variance.
@@ -1148,6 +1138,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
1148
1138
  *count_ptr = memo.count;
1149
1139
  }
1150
1140
 
1141
+ #ifndef HAVE_ENUM_SUM
1151
1142
  /* call-seq:
1152
1143
  * enum.sum
1153
1144
  *
@@ -1172,10 +1163,11 @@ enum_sum(int argc, VALUE* argv, VALUE obj)
1172
1163
 
1173
1164
  return sum;
1174
1165
  }
1166
+ #endif
1175
1167
 
1176
1168
  struct enum_mean_variance_memo {
1177
1169
  int block_given;
1178
- long n;
1170
+ size_t n;
1179
1171
  double m, m2, f, c;
1180
1172
  };
1181
1173
 
@@ -1229,7 +1221,7 @@ enum_mean_variance_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
1229
1221
  {
1230
1222
  struct enum_mean_variance_memo *memo = (struct enum_mean_variance_memo *)args;
1231
1223
  ENUM_WANT_SVALUE();
1232
- mean_variance_iter(e, (struct enum_sum_memo *) args);
1224
+ mean_variance_iter(e, memo);
1233
1225
  return Qnil;
1234
1226
  }
1235
1227
 
@@ -1487,9 +1479,836 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
1487
1479
  return stdev;
1488
1480
  }
1489
1481
 
1482
+ static inline int
1483
+ is_na(VALUE v)
1484
+ {
1485
+ if (NIL_P(v))
1486
+ return 1;
1487
+
1488
+ if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
1489
+ return 1;
1490
+
1491
+ if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
1492
+ return 1;
1493
+
1494
+ return 0;
1495
+ }
1496
+
1497
+ static int
1498
+ ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
1499
+ {
1500
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1501
+ VALUE cmp;
1502
+
1503
+ if (is_na(a)) {
1504
+ return -1;
1505
+ }
1506
+ else if (is_na(b)) {
1507
+ return 1;
1508
+ }
1509
+
1510
+ /* TODO: optimize */
1511
+ cmp = rb_funcall(a, id_cmp, 1, b);
1512
+ return rb_cmpint(cmp, a, b);
1513
+ }
1514
+
1515
+ static VALUE
1516
+ ary_percentile_make_sorted(VALUE ary)
1517
+ {
1518
+ long n, i;
1519
+ VALUE sorted;
1520
+
1521
+ n = RARRAY_LEN(ary);
1522
+ sorted = rb_ary_tmp_new(n);
1523
+ for (i = 0; i < n; ++i) {
1524
+ rb_ary_push(sorted, RARRAY_AREF(ary, i));
1525
+ }
1526
+ RARRAY_PTR_USE(sorted, ptr, {
1527
+ ruby_qsort(ptr, n, sizeof(VALUE),
1528
+ ary_percentile_sort_cmp, NULL);
1529
+ });
1530
+ return sorted;
1531
+ }
1532
+
1533
+ static inline VALUE
1534
+ ary_percentile_single_sorted(VALUE sorted, long n, double d)
1535
+ {
1536
+ VALUE x0, x1;
1537
+ double i, f;
1538
+ long l;
1539
+
1540
+ assert(RB_TYPE_P(sorted, T_ARRAY));
1541
+ assert(n == RARRAY_LEN(sorted));
1542
+ assert(n > 0);
1543
+
1544
+ if (d < 0 || 100 < d) {
1545
+ rb_raise(rb_eArgError, "percentile out of bounds");
1546
+ }
1547
+
1548
+ if (is_na(RARRAY_AREF(sorted, 0))) {
1549
+ return DBL2NUM(nan(""));
1550
+ }
1551
+
1552
+ n = RARRAY_LEN(sorted);
1553
+ if (n == 1) {
1554
+ return RARRAY_AREF(sorted, 0);
1555
+ }
1556
+
1557
+ d = (n - 1) * d / 100.0;
1558
+ f = modf(d, &i);
1559
+ l = (long)i;
1560
+
1561
+ x0 = RARRAY_AREF(sorted, l);
1562
+ if (f == 0 || l == n - 1) {
1563
+ return x0;
1564
+ }
1565
+
1566
+ x0 = rb_funcall(x0, idSTAR, 1, DBL2NUM(1 - f));
1567
+ x1 = RARRAY_AREF(sorted, l + 1);
1568
+ x1 = rb_funcall(x1, idSTAR, 1, DBL2NUM(f));
1569
+
1570
+ return rb_funcall(x0, idPLUS, 1, x1);
1571
+ }
1572
+
1573
+ static VALUE
1574
+ ary_percentile_single(VALUE ary, VALUE q)
1575
+ {
1576
+ long n;
1577
+ double d;
1578
+ VALUE qf, sorted;
1579
+
1580
+ assert(RB_TYPE_P(ary, T_ARRAY));
1581
+
1582
+ n = RARRAY_LEN(ary);
1583
+ assert(n > 0);
1584
+
1585
+ switch (TYPE(q)) {
1586
+ case T_FIXNUM:
1587
+ d = (double)FIX2LONG(q);
1588
+ break;
1589
+ case T_BIGNUM:
1590
+ d = rb_big2dbl(q);
1591
+ break;
1592
+
1593
+ case T_RATIONAL:
1594
+ /* fall through */
1595
+ default:
1596
+ qf = NUM2DBL(q);
1597
+ goto float_percentile;
1598
+
1599
+ case T_FLOAT:
1600
+ qf = q;
1601
+ float_percentile:
1602
+ d = RFLOAT_VALUE(qf);
1603
+ break;
1604
+ }
1605
+
1606
+ if (n == 1) {
1607
+ return RARRAY_AREF(ary, 0);
1608
+ }
1609
+
1610
+ sorted = ary_percentile_make_sorted(ary);
1611
+
1612
+ return ary_percentile_single_sorted(sorted, n, d);
1613
+ }
1614
+
1615
+ /* call-seq:
1616
+ * ary.percentile(q) -> float
1617
+ *
1618
+ * Calculate specified percentiles of the values in `ary`.
1619
+ *
1620
+ * @param [Number, Array] percentile or array of percentiles to compute,
1621
+ * which must be between 0 and 100 inclusive.
1622
+ *
1623
+ * @return [Float, Array] A percentile value(s)
1624
+ */
1625
+ static VALUE
1626
+ ary_percentile(VALUE ary, VALUE q)
1627
+ {
1628
+ long n, m, i;
1629
+ double d;
1630
+ VALUE qf, qs, sorted, res;
1631
+
1632
+ n = RARRAY_LEN(ary);
1633
+ if (n == 0) {
1634
+ rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
1635
+ }
1636
+
1637
+ qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
1638
+ if (NIL_P(qs)) {
1639
+ return ary_percentile_single(ary, q);
1640
+ }
1641
+
1642
+ m = RARRAY_LEN(qs);
1643
+ res = rb_ary_new_capa(m);
1644
+
1645
+ if (m == 1) {
1646
+ q = RARRAY_AREF(qs, 0);
1647
+ rb_ary_push(res, ary_percentile_single(ary, q));
1648
+ }
1649
+ else {
1650
+ sorted = ary_percentile_make_sorted(ary);
1651
+
1652
+ for (i = 0; i < m; ++i) {
1653
+ VALUE x;
1654
+
1655
+ q = RARRAY_AREF(qs, i);
1656
+ switch (TYPE(q)) {
1657
+ case T_FIXNUM:
1658
+ d = (double)FIX2LONG(q);
1659
+ break;
1660
+ case T_BIGNUM:
1661
+ d = rb_big2dbl(q);
1662
+ break;
1663
+
1664
+ case T_RATIONAL:
1665
+ /* fall through */
1666
+ default:
1667
+ qf = NUM2DBL(q);
1668
+ goto float_percentile;
1669
+
1670
+ case T_FLOAT:
1671
+ qf = q;
1672
+ float_percentile:
1673
+ d = RFLOAT_VALUE(qf);
1674
+ break;
1675
+ }
1676
+
1677
+ x = ary_percentile_single_sorted(sorted, n, d);
1678
+ rb_ary_push(res, x);
1679
+ }
1680
+ }
1681
+
1682
+ return res;
1683
+ }
1684
+
1685
+ /* call-seq:
1686
+ * ary.median -> float
1687
+ *
1688
+ * Calculate a median of the values in `ary`.
1689
+ *
1690
+ * @return [Float] A median value
1691
+ */
1692
+ static VALUE
1693
+ ary_median(VALUE ary)
1694
+ {
1695
+ long n;
1696
+ VALUE sorted, a0, a1;
1697
+
1698
+ n = RARRAY_LEN(ary);
1699
+ switch (n) {
1700
+ case 0:
1701
+ goto return_nan;
1702
+ case 1:
1703
+ return RARRAY_AREF(ary, 0);
1704
+ case 2:
1705
+ a0 = RARRAY_AREF(ary, 0);
1706
+ a1 = RARRAY_AREF(ary, 1);
1707
+ goto mean_two;
1708
+ default:
1709
+ break;
1710
+ }
1711
+
1712
+ sorted = ary_percentile_make_sorted(ary);
1713
+
1714
+ a0 = RARRAY_AREF(sorted, 0);
1715
+ if (is_na(a0)) {
1716
+ return_nan:
1717
+ return DBL2NUM(nan(""));
1718
+ }
1719
+
1720
+ a1 = RARRAY_AREF(sorted, n / 2);
1721
+ if (n % 2 == 1) {
1722
+ return a1;
1723
+ }
1724
+ else {
1725
+ a0 = RARRAY_AREF(sorted, n / 2 - 1);
1726
+
1727
+ mean_two:
1728
+ a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
1729
+ if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
1730
+ double d = NUM2DBL(a0);
1731
+ return DBL2NUM(d / 2.0);
1732
+ }
1733
+
1734
+ return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
1735
+ }
1736
+ }
1737
+
1738
+ struct value_counts_opts {
1739
+ int normalize_p;
1740
+ int sort_p;
1741
+ int ascending_p;
1742
+ int dropna_p;
1743
+ };
1744
+
1745
+ static inline void
1746
+ value_counts_extract_opts(VALUE kwargs, struct value_counts_opts *opts)
1747
+ {
1748
+ assert(opts != NULL);
1749
+
1750
+ /* default values */
1751
+ opts->normalize_p = 0;
1752
+ opts->sort_p = 1;
1753
+ opts->ascending_p = 0;
1754
+ opts->dropna_p = 1;
1755
+
1756
+ if (!NIL_P(kwargs)) {
1757
+ enum { kw_normalize, kw_sort, kw_ascending, kw_dropna };
1758
+ static ID kwarg_keys[4];
1759
+ VALUE kwarg_vals[4];
1760
+
1761
+ if (!kwarg_keys[0]) {
1762
+ kwarg_keys[kw_normalize] = rb_intern("normalize");
1763
+ kwarg_keys[kw_sort] = rb_intern("sort");
1764
+ kwarg_keys[kw_ascending] = rb_intern("ascending");
1765
+ kwarg_keys[kw_dropna] = rb_intern("dropna");
1766
+ }
1767
+
1768
+ rb_get_kwargs(kwargs, kwarg_keys, 0, 4, kwarg_vals);
1769
+ opts->normalize_p = (kwarg_vals[kw_normalize] != Qundef) && RTEST(kwarg_vals[kw_normalize]);
1770
+ opts->sort_p = (kwarg_vals[kw_sort] != Qundef) && RTEST(kwarg_vals[kw_sort]);
1771
+ opts->ascending_p = (kwarg_vals[kw_ascending] != Qundef) && RTEST(kwarg_vals[kw_ascending]);
1772
+ opts->dropna_p = (kwarg_vals[kw_dropna] != Qundef) && RTEST(kwarg_vals[kw_dropna]);
1773
+ }
1774
+ }
1775
+
1776
+ static int
1777
+ value_counts_result_to_assoc_array_i(VALUE key, VALUE val, VALUE ary)
1778
+ {
1779
+ VALUE assoc = rb_ary_tmp_new(2);
1780
+ rb_ary_push(assoc, key);
1781
+ rb_ary_push(assoc, val);
1782
+ rb_ary_push(ary, assoc);
1783
+ return ST_CONTINUE;
1784
+ }
1785
+
1786
+ static int
1787
+ value_counts_sort_cmp_asc(const void *ap, const void *bp, void *dummy)
1788
+ {
1789
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1790
+ VALUE av, bv, cmp;
1791
+
1792
+ av = RARRAY_AREF(a, 1);
1793
+ bv = RARRAY_AREF(b, 1);
1794
+
1795
+ /* TODO: optimize */
1796
+ cmp = rb_funcall(av, id_cmp, 1, bv);
1797
+ return rb_cmpint(cmp, av, bv);
1798
+ }
1799
+
1800
+ static int
1801
+ value_counts_sort_cmp_desc(const void *ap, const void *bp, void *dummy)
1802
+ {
1803
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1804
+ VALUE av, bv, cmp;
1805
+
1806
+ av = RARRAY_AREF(a, 1);
1807
+ bv = RARRAY_AREF(b, 1);
1808
+
1809
+ /* TODO: optimize */
1810
+ cmp = rb_funcall(bv, id_cmp, 1, av);
1811
+ return rb_cmpint(cmp, bv, av);
1812
+ }
1813
+
1814
+ static VALUE
1815
+ value_counts_sort_result(VALUE result, const int dropna_p, const int ascending_p)
1816
+ {
1817
+ VALUE na_count = Qundef, ary, sorted;
1818
+ long i;
1819
+
1820
+ if (RHASH_SIZE(result) < 1) {
1821
+ return result;
1822
+ }
1823
+
1824
+ if (!dropna_p) {
1825
+ na_count = rb_hash_lookup2(result, Qnil, Qundef);
1826
+ if (na_count != Qundef) {
1827
+ rb_hash_delete(result, Qnil);
1828
+ }
1829
+ }
1830
+
1831
+ const long len = (long)RHASH_SIZE(result);
1832
+ ary = rb_ary_tmp_new(len);
1833
+ rb_hash_foreach(result, value_counts_result_to_assoc_array_i, ary);
1834
+ if (ascending_p) {
1835
+ RARRAY_PTR_USE(ary, ptr, {
1836
+ ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
1837
+ value_counts_sort_cmp_asc, NULL);
1838
+ });
1839
+ }
1840
+ else {
1841
+ RARRAY_PTR_USE(ary, ptr, {
1842
+ ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
1843
+ value_counts_sort_cmp_desc, NULL);
1844
+ });
1845
+ }
1846
+
1847
+ #ifdef HAVE_RB_HASH_NEW_WITH_SIZE
1848
+ sorted = rb_hash_new_with_size(len);
1849
+ #else
1850
+ sorted = rb_hash_new();
1851
+ #endif
1852
+
1853
+ if (na_count != Qundef && ascending_p) {
1854
+ rb_hash_aset(sorted, Qnil, na_count);
1855
+ }
1856
+
1857
+ for (i = 0; i < len; ++i) {
1858
+ VALUE a = RARRAY_AREF(ary, i);
1859
+ VALUE k = RARRAY_AREF(a, 0);
1860
+ VALUE v = RARRAY_AREF(a, 1);
1861
+ rb_hash_aset(sorted, k, v);
1862
+ }
1863
+
1864
+ if (na_count != Qundef && !ascending_p) {
1865
+ rb_hash_aset(sorted, Qnil, na_count);
1866
+ }
1867
+
1868
+ return sorted;
1869
+ }
1870
+
1871
+ struct value_counts_normalize_params {
1872
+ VALUE result;
1873
+ long total;
1874
+ };
1875
+
1876
+ static int
1877
+ value_counts_normalize_i(VALUE key, VALUE val, VALUE arg)
1878
+ {
1879
+ struct value_counts_normalize_params *params = (struct value_counts_normalize_params *)arg;
1880
+ double new_val;
1881
+
1882
+ new_val = NUM2DBL(val) / params->total;
1883
+ rb_hash_aset(params->result, key, DBL2NUM(new_val));
1884
+
1885
+ return ST_CONTINUE;
1886
+ }
1887
+
1888
+ struct value_counts_memo {
1889
+ int dropna_p;
1890
+ long total;
1891
+ long na_count;
1892
+ VALUE result;
1893
+ };
1894
+
1895
+ static VALUE
1896
+ any_value_counts(int argc, VALUE *argv, VALUE obj,
1897
+ void (* counter)(VALUE, struct value_counts_memo *))
1898
+ {
1899
+ VALUE kwargs;
1900
+ struct value_counts_opts opts;
1901
+ struct value_counts_memo memo;
1902
+
1903
+ rb_scan_args(argc, argv, ":", &kwargs);
1904
+ value_counts_extract_opts(kwargs, &opts);
1905
+
1906
+ memo.result = rb_hash_new();
1907
+ memo.total = 0;
1908
+ memo.na_count = 0;
1909
+ memo.dropna_p = opts.dropna_p;
1910
+
1911
+ if (!opts.dropna_p) {
1912
+ rb_hash_aset(memo.result, Qnil, INT2FIX(0)); // reserve the room for NA
1913
+ }
1914
+
1915
+ counter(obj, &memo);
1916
+
1917
+ if (!opts.dropna_p) {
1918
+ if (memo.na_count == 0)
1919
+ rb_hash_delete(memo.result, Qnil);
1920
+ else
1921
+ rb_hash_aset(memo.result, Qnil, LONG2NUM(memo.na_count));
1922
+ }
1923
+
1924
+ if (opts.sort_p) {
1925
+ memo.result = value_counts_sort_result(memo.result, opts.dropna_p, opts.ascending_p);
1926
+ }
1927
+
1928
+ if (opts.normalize_p) {
1929
+ struct value_counts_normalize_params params;
1930
+ params.result = memo.result;
1931
+ params.total = memo.total - (opts.dropna_p ? memo.na_count : 0);
1932
+ rb_hash_foreach(memo.result, value_counts_normalize_i, (VALUE)&params);
1933
+ }
1934
+
1935
+ return memo.result;
1936
+ }
1937
+
1938
+ static VALUE
1939
+ enum_value_counts_without_sort_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
1940
+ {
1941
+ struct value_counts_memo *memo = (struct value_counts_memo *)args;
1942
+
1943
+ ENUM_WANT_SVALUE();
1944
+
1945
+ if (is_na(e)) {
1946
+ ++memo->na_count;
1947
+ }
1948
+ else {
1949
+ VALUE cnt = rb_hash_lookup2(memo->result, e, INT2FIX(0));
1950
+ rb_hash_aset(memo->result, e, rb_int_plus(cnt, INT2FIX(1)));
1951
+ }
1952
+
1953
+ ++memo->total;
1954
+
1955
+ return Qnil;
1956
+ }
1957
+
1958
+ static void
1959
+ enum_value_counts_without_sort(VALUE obj, struct value_counts_memo *memo)
1960
+ {
1961
+ rb_block_call(obj, id_each, 0, 0, enum_value_counts_without_sort_i, (VALUE)memo);
1962
+ }
1963
+
1964
+ static VALUE
1965
+ enum_value_counts(int argc, VALUE* argv, VALUE obj)
1966
+ {
1967
+ return any_value_counts(argc, argv, obj, enum_value_counts_without_sort);
1968
+ }
1969
+
1970
+ static void
1971
+ ary_value_counts_without_sort(VALUE ary, struct value_counts_memo *memo)
1972
+ {
1973
+ const VALUE zero = INT2FIX(0);
1974
+ const VALUE one = INT2FIX(1);
1975
+ long i, na_count = 0;
1976
+ long const n = RARRAY_LEN(ary);
1977
+
1978
+ for (i = 0; i < n; ++i) {
1979
+ VALUE val = RARRAY_AREF(ary, i);
1980
+
1981
+ if (is_na(val)) {
1982
+ ++na_count;
1983
+ }
1984
+ else {
1985
+ VALUE cnt = rb_hash_lookup2(memo->result, val, zero);
1986
+ rb_hash_aset(memo->result, val, rb_int_plus(cnt, one));
1987
+ }
1988
+ }
1989
+
1990
+ memo->total = n;
1991
+ memo->na_count = na_count;
1992
+ }
1993
+
1994
+ /* call-seq:
1995
+ * ary.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
1996
+ *
1997
+ * Returns a hash that contains the counts of values in `ary`.
1998
+ *
1999
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
2000
+ * as the same thing, and stores the count of them as the value for `nil`.
2001
+ *
2002
+ * @param [false,true] normalize If `true`, the result contains the relative
2003
+ * frequencies of the unique values.
2004
+ * @param [true,false] sort Sort by values.
2005
+ * @param [false,true] ascending Sort in ascending order.
2006
+ * @param [true,false] dropna Don't include counts of NAs.
2007
+ *
2008
+ * @return [Hash] A hash consists of the counts of the values
2009
+ */
2010
+ static VALUE
2011
+ ary_value_counts(int argc, VALUE* argv, VALUE ary)
2012
+ {
2013
+ return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
2014
+ }
2015
+
2016
+ static int
2017
+ hash_value_counts_without_sort_i(VALUE key, VALUE val, VALUE arg)
2018
+ {
2019
+ struct value_counts_memo *memo = (struct value_counts_memo *)arg;
2020
+
2021
+ if (is_na(val)) {
2022
+ ++memo->na_count;
2023
+
2024
+ if (memo->dropna_p) {
2025
+ return ST_CONTINUE;
2026
+ }
2027
+ }
2028
+ else {
2029
+ VALUE cnt = rb_hash_lookup2(memo->result, val, INT2FIX(0));
2030
+ rb_hash_aset(memo->result, val, rb_int_plus(cnt, INT2FIX(1)));
2031
+ }
2032
+
2033
+ return ST_CONTINUE;
2034
+ }
2035
+
2036
+ static void
2037
+ hash_value_counts_without_sort(VALUE hash, struct value_counts_memo *memo)
2038
+ {
2039
+ rb_hash_foreach(hash, hash_value_counts_without_sort_i, (VALUE)memo);
2040
+ memo->total = RHASH_SIZE(hash);
2041
+ }
2042
+
2043
+ /* call-seq:
2044
+ * hash.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
2045
+ *
2046
+ * Returns a hash that contains the counts of values in `hash`.
2047
+ *
2048
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
2049
+ * as the same thing, and stores the count of them as the value for `nil`.
2050
+ *
2051
+ * @param [false,true] normalize If `true`, the result contains the relative
2052
+ * frequencies of the unique values.
2053
+ * @param [true,false] sort Sort by values.
2054
+ * @param [false,true] ascending Sort in ascending order.
2055
+ * @param [true,false] dropna Don't include counts of NAs.
2056
+ *
2057
+ * @return [Hash] A hash consists of the counts of the values
2058
+ */
2059
+ static VALUE
2060
+ hash_value_counts(int argc, VALUE* argv, VALUE hash)
2061
+ {
2062
+ return any_value_counts(argc, argv, hash, hash_value_counts_without_sort);
2063
+ }
2064
+
2065
+ static long
2066
+ histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
2067
+ {
2068
+ double x, y;
2069
+ long lo, hi, mid;
2070
+
2071
+ x = NUM2DBL(rb_x);
2072
+
2073
+ lo = -1;
2074
+ hi = RARRAY_LEN(edge);
2075
+
2076
+ if (left_p) {
2077
+ while (hi - lo > 1) {
2078
+ mid = lo + (hi - lo)/2;
2079
+ y = NUM2DBL(RARRAY_AREF(edge, mid));
2080
+ if (y <= x) {
2081
+ lo = mid;
2082
+ }
2083
+ else {
2084
+ hi = mid;
2085
+ }
2086
+ }
2087
+ return lo;
2088
+ }
2089
+ else {
2090
+ while (hi - lo > 1) {
2091
+ mid = lo + (hi - lo)/2;
2092
+ y = NUM2DBL(RARRAY_AREF(edge, mid));
2093
+ if (y < x) {
2094
+ lo = mid;
2095
+ }
2096
+ else {
2097
+ hi = mid;
2098
+ }
2099
+ }
2100
+ return hi - 1;
2101
+ }
2102
+ }
2103
+
2104
+ static void
2105
+ histogram_weights_push_values(VALUE weights, VALUE edge, VALUE values, int left_p)
2106
+ {
2107
+ VALUE x, cur;
2108
+ long i, n, bi;
2109
+
2110
+ n = RARRAY_LEN(values);
2111
+ for (i = 0; i < n; ++i) {
2112
+ x = RARRAY_AREF(values, i);
2113
+
2114
+ bi = histogram_edge_bin_index(edge, x, left_p);
2115
+
2116
+ cur = rb_ary_entry(weights, bi);
2117
+ if (NIL_P(cur)) {
2118
+ cur = INT2FIX(1);
2119
+ }
2120
+ else {
2121
+ cur = rb_funcall(cur, idPLUS, 1, INT2FIX(1));
2122
+ }
2123
+
2124
+ rb_ary_store(weights, bi, cur);
2125
+ }
2126
+ }
2127
+
2128
+ static int
2129
+ opt_closed_left_p(VALUE opts)
2130
+ {
2131
+ int left_p = 1;
2132
+
2133
+ if (!NIL_P(opts)) {
2134
+ VALUE closed;
2135
+ #ifdef HAVE_RB_GET_KWARGS
2136
+ ID kwargs = id_closed;
2137
+ rb_get_kwargs(opts, &kwargs, 0, 1, &closed);
2138
+ #else
2139
+ closed = rb_hash_lookup2(opts, ID2SYM(id_closed), sym_left);
2140
+ #endif
2141
+ left_p = (closed != sym_right);
2142
+ if (left_p && closed != sym_left) {
2143
+ rb_raise(rb_eArgError, "invalid value for :closed keyword "
2144
+ "(%"PRIsVALUE" for :left or :right)", closed);
2145
+ }
2146
+ }
2147
+
2148
+ return left_p;
2149
+ }
2150
+
2151
+ static inline long
2152
+ sturges(long n)
2153
+ {
2154
+ if (n == 0) return 1L;
2155
+ return (long)(ceil(log2(n)) + 1);
2156
+ }
2157
+
2158
+ static VALUE
2159
+ ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long nbins, const int left_p)
2160
+ {
2161
+ VALUE edge;
2162
+ double bw, lbw, start, step, divisor, r;
2163
+ long i, len;
2164
+
2165
+ if (hi == lo) {
2166
+ start = hi;
2167
+ step = 1;
2168
+ divisor = 1;
2169
+ len = 1;
2170
+ }
2171
+ else {
2172
+ bw = (hi - lo) / nbins;
2173
+ lbw = log10(bw);
2174
+ if (lbw >= 0) {
2175
+ step = pow(10, floor(lbw));
2176
+ r = bw / step;
2177
+ if (r <= 1.1) {
2178
+ /* do nothing */
2179
+ }
2180
+ else if (r <= 2.2) {
2181
+ step *= 2;
2182
+ }
2183
+ else if (r <= 5.5) {
2184
+ step *= 5;
2185
+ }
2186
+ else {
2187
+ step *= 10;
2188
+ }
2189
+ divisor = 1.0;
2190
+ start = step * floor(lo / step);
2191
+ len = (long)ceil((hi - start) / step);
2192
+ }
2193
+ else {
2194
+ divisor = pow(10, -floor(lbw));
2195
+ r = bw * divisor;
2196
+ if (r <= 1.1) {
2197
+ /* do nothing */
2198
+ }
2199
+ else if (r <= 2.2) {
2200
+ divisor /= 2;
2201
+ }
2202
+ else if (r <= 5.5) {
2203
+ divisor /= 5;
2204
+ }
2205
+ else {
2206
+ divisor /= 10;
2207
+ }
2208
+ step = 1.0;
2209
+ start = floor(lo * divisor);
2210
+ len = (long)ceil(hi * divisor - start);
2211
+ }
2212
+ }
2213
+
2214
+ if (left_p) {
2215
+ while (lo < start/divisor) {
2216
+ start -= step;
2217
+ }
2218
+ while ((start + (len - 1)*step)/divisor <= hi) {
2219
+ ++len;
2220
+ }
2221
+ }
2222
+ else {
2223
+ while (lo <= start/divisor) {
2224
+ start -= step;
2225
+ }
2226
+ while ((start + (len - 1)*step)/divisor < hi) {
2227
+ ++len;
2228
+ }
2229
+ }
2230
+
2231
+ edge = rb_ary_new_capa(len);
2232
+ for (i = 0; i < len; ++i) {
2233
+ rb_ary_push(edge, DBL2NUM(start/divisor));
2234
+ start += step;
2235
+ }
2236
+
2237
+ return edge;
2238
+ }
2239
+
2240
+ static VALUE
2241
+ ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
2242
+ {
2243
+ long n;
2244
+ VALUE minmax;
2245
+ VALUE edge = Qnil;
2246
+ double lo, hi;
2247
+
2248
+ Check_Type(ary, T_ARRAY);
2249
+ n = RARRAY_LEN(ary);
2250
+
2251
+ if (n == 0 && nbins < 0) {
2252
+ rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
2253
+ }
2254
+ else if (n > 0 && nbins < 1) {
2255
+ rb_raise(rb_eArgError, "nbins must be >= 1 for a non-empty array, got %ld", nbins);
2256
+ }
2257
+ else if (n == 0) {
2258
+ edge = rb_ary_new_capa(1);
2259
+ rb_ary_push(edge, DBL2NUM(0.0));
2260
+ return edge;
2261
+ }
2262
+
2263
+ minmax = rb_funcall(ary, rb_intern("minmax"), 0);
2264
+ lo = NUM2DBL(RARRAY_AREF(minmax, 0));
2265
+ hi = NUM2DBL(RARRAY_AREF(minmax, 1));
2266
+
2267
+ edge = ary_histogram_calculate_edge_lo_hi(lo, hi, nbins, left_p);
2268
+
2269
+ return edge;
2270
+ }
2271
+
2272
+ /* call-seq:
2273
+ * ary.histogram(nbins=:auto, closed: :left)
2274
+ *
2275
+ * @param [Integer] nbins The approximate number of bins
2276
+ * @param [:left, :right] closed
2277
+ * If :left (the default), the bin interval are left-closed.
2278
+ * If :right, the bin interval are right-closed.
2279
+ *
2280
+ * @return [EnumerableStatistics::Histogram] The histogram struct.
2281
+ */
2282
+ static VALUE
2283
+ ary_histogram(int argc, VALUE *argv, VALUE ary)
2284
+ {
2285
+ VALUE arg0, opts, edge, weights;
2286
+ int left_p;
2287
+ long nbins;
2288
+
2289
+ rb_scan_args(argc, argv, "01:", &arg0, &opts);
2290
+ if (NIL_P(arg0)) {
2291
+ nbins = sturges(RARRAY_LEN(ary));
2292
+ }
2293
+ else {
2294
+ nbins = NUM2LONG(arg0);
2295
+ }
2296
+ left_p = opt_closed_left_p(opts);
2297
+
2298
+ edge = ary_histogram_calculate_edge(ary, nbins, left_p);
2299
+ weights = rb_ary_new_capa(RARRAY_LEN(edge) - 1);
2300
+ histogram_weights_push_values(weights, edge, ary, left_p);
2301
+
2302
+ return rb_struct_new(cHistogram, edge, weights,
2303
+ left_p ? sym_left : sym_right,
2304
+ Qfalse);
2305
+ }
2306
+
1490
2307
  void
1491
2308
  Init_extension(void)
1492
2309
  {
2310
+ VALUE mEnumerableStatistics;
2311
+
1493
2312
  #ifndef HAVE_ENUM_SUM
1494
2313
  rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
1495
2314
  #endif
@@ -1499,6 +2318,7 @@ Init_extension(void)
1499
2318
  rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
1500
2319
  rb_define_method(rb_mEnumerable, "mean_stdev", enum_mean_stdev, -1);
1501
2320
  rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
2321
+ rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
1502
2322
 
1503
2323
  #ifndef HAVE_ARRAY_SUM
1504
2324
  rb_define_method(rb_cArray, "sum", ary_sum, -1);
@@ -1508,10 +2328,20 @@ Init_extension(void)
1508
2328
  rb_define_method(rb_cArray, "variance", ary_variance, -1);
1509
2329
  rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
1510
2330
  rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
2331
+ rb_define_method(rb_cArray, "percentile", ary_percentile, 1);
2332
+ rb_define_method(rb_cArray, "median", ary_median, 0);
2333
+ rb_define_method(rb_cArray, "value_counts", ary_value_counts, -1);
2334
+
2335
+ rb_define_method(rb_cHash, "value_counts", hash_value_counts, -1);
1511
2336
 
1512
2337
  half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
1513
2338
  rb_gc_register_mark_object(half_in_rational);
1514
2339
 
2340
+ mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
2341
+ cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
2342
+
2343
+ rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
2344
+
1515
2345
  idPLUS = '+';
1516
2346
  idMINUS = '-';
1517
2347
  idSTAR = '*';
@@ -1523,8 +2353,14 @@ Init_extension(void)
1523
2353
  id_negate = rb_intern("-@");
1524
2354
  id_to_f = rb_intern("to_f");
1525
2355
  id_cmp = rb_intern("<=>");
2356
+ id_nan_p = rb_intern("nan?");
1526
2357
  id_each = rb_intern("each");
1527
2358
  id_real_p = rb_intern("real?");
1528
2359
  id_sum = rb_intern("sum");
1529
2360
  id_population = rb_intern("population");
2361
+ id_closed = rb_intern("closed");
2362
+ id_edge = rb_intern("edge");
2363
+
2364
+ sym_left = ID2SYM(rb_intern("left"));
2365
+ sym_right = ID2SYM(rb_intern("right"));
1530
2366
  }