enumerable-statistics 1.0.1 → 2.0.0.pre

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 01d8583f76df44d84e20ab090a5589e44e33dacf
4
- data.tar.gz: c070a1c7b007646ee0769b9e4924c0af0914341d
2
+ SHA256:
3
+ metadata.gz: d632cae80814e40cb6247d76cdfa74999e30effa4f969935a2deef35937c4f5e
4
+ data.tar.gz: 85c9ba3067efd94649e01836ebf04cf0ddebc2a69fea06f65a4efd067a9dffe7
5
5
  SHA512:
6
- metadata.gz: 7b3027aea941fa441b21c53ff8570b712cd576b42d97d9b7e3c421c92ad9c79552860a4f1c3bfd86da5c7f654d5396ab23415a0f861a4290ac1971ced29372e4
7
- data.tar.gz: d082b4b5c2dd4c91b40400ccbbcd68859e6811bc14add0416133c8a70d67c97b4af10ae4f53fef4fc73efcc139d7f3d224baa5e7b983c4428880bc671039a935
6
+ metadata.gz: 1ee5828934ed01b5bc2f3173816bfe24914f36641df6a99b2aff355478d74f13fd36201547776c06d8b4150784176420be8ceea1bbdd2fa9397d6930522caff1
7
+ data.tar.gz: 7a4a87570189ff29bef98fdeef30f2640eebbea550419229b9ffeb89c9d032fbe72082e6be00cd5fb9a85be2e515d6211b057f5fd1c4834ad6cb8c5153135a5f
@@ -1,19 +1,32 @@
1
1
  ---
2
- language: ruby
2
+ notification:
3
+ email:
4
+ - mrkn@ruby-lang.org
3
5
 
4
- rvm:
5
- - ruby-head
6
- - 2.3.0
7
- - 2.2.4
8
- - 2.1
6
+ language: ruby
9
7
 
10
8
  before_install:
11
9
  - gem update --system
12
- - gem update bundler
10
+ - gem install bundler
13
11
 
14
12
  install:
15
13
  - bundle install
16
14
 
17
15
  script:
18
- - bundle exec rake clobber compile
16
+ - bundle exec rake --trace clobber compile
19
17
  - bundle exec rake spec
18
+
19
+ matrix:
20
+ include:
21
+ - name: "2.3"
22
+ rvm: 2.3
23
+ - name: "2.4"
24
+ rvm: 2.4.5
25
+ - name: "2.5"
26
+ rvm: 2.5.2
27
+ - name: "2.6"
28
+ rvm: 2.6
29
+ - name: "trunk"
30
+ rvm: ruby-head
31
+ allow_failures:
32
+ - rvm: 2.3
data/.yardopts CHANGED
@@ -1 +1,2 @@
1
1
  --markup markdown
2
+ -p templates
@@ -1,3 +1,10 @@
1
+ # 2.0.0-pre
2
+
3
+ - Add `value_counts` method in Array, Hash, and Enumerable
4
+ - Add `median` method in Array
5
+ - Add `percentile` method in Array
6
+ - Add `histogram` method in Array
7
+
1
8
  # 1.0.1
2
9
 
3
10
  - Add `mean_variance` method in Array class and Enumerable module
data/README.md CHANGED
@@ -40,6 +40,14 @@ The following methods are supplied by this library:
40
40
  - Calculates a mean and a variance simultaneously
41
41
  - `Array#mean_stdev`, `Enumerable#mean_stdev`
42
42
  - Calculates a mean and a standard deviation simultaneously
43
+ - `Array#median`
44
+ - Calculates a median of values in an array
45
+ - `Array#percentile(q)`
46
+ - Calculates a percentile or percentiles of values in an array
47
+ - `Array#value_counts`, `Enumerable#value_counts`, and `Hash#value_counts`
48
+ - Count how many items for each value in the container
49
+ - `Array#histogram`
50
+ - Calculate histogram of the values in the array
43
51
 
44
52
  Moreover, for Ruby < 2.4, `Array#sum` and `Enumerable#sum` are provided.
45
53
 
data/Rakefile CHANGED
@@ -6,15 +6,17 @@ task :default => :spec
6
6
 
7
7
  Rake::ExtensionTask.new('enumerable/statistics/extension')
8
8
 
9
+ directory 'lib/enumerable/statistics'
10
+
9
11
  RSpec::Core::RakeTask.new(:spec)
10
12
 
11
13
  task :bench do
12
14
  puts "# sum\n"
13
- system('ruby bench/sum.rb')
15
+ system('benchmark-driver bench/sum.yml')
14
16
 
15
17
  puts "# mean\n"
16
- system('ruby bench/mean.rb')
18
+ system('benchmark-driver bench/mean.yml')
17
19
 
18
20
  puts "# variance\n"
19
- system('ruby bench/variance.rb')
21
+ system('benchmark-driver bench/variance.yml')
20
22
  end
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ ary = Array.new(n) { chars.sample }
16
+ benchmark:
17
+ inject: |-
18
+ ary.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
19
+ unsort_keepna: |-
20
+ ary.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ ary.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ ary.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ ary.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ ary.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ ary.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ ary.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ ary.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ ary.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ ary.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ ary.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ ary.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ enum = Array.new(n) { chars.sample }.each
16
+ benchmark:
17
+ inject: |-
18
+ enum.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
19
+ unsort_keepna: |-
20
+ enum.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ enum.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ enum.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ enum.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ enum.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ enum.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ enum.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ enum.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ enum.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ enum.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ enum.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ enum.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,42 @@
1
+ contexts:
2
+ - name: "1.1.0.dev"
3
+ gems:
4
+ enumerable-statistics: "1.1.0.dev"
5
+ require: false
6
+ prelude: |-
7
+ require 'enumerable/statistics'
8
+ - name: "HEAD"
9
+ prelude: |-
10
+ require 'bundler/setup'
11
+ require 'enumerable/statistics'
12
+ prelude: |-
13
+ n = 1000
14
+ chars = ('a'..'m').to_a
15
+ hash = Array.new(n) { chars.sample }.each_with_index.to_h
16
+ benchmark:
17
+ inject: |-
18
+ hash.inject(Hash.new(0)) { |h, (k, v)| h[v] += 1; h }
19
+ unsort_keepna: |-
20
+ hash.value_counts(sort: false, dropna: false)
21
+ unsort_dropna: |-
22
+ hash.value_counts(sort: false, dropna: true)
23
+ sort_keepna: |-
24
+ hash.value_counts(sort: true, dropna: false)
25
+ sort_dropna: |-
26
+ hash.value_counts(sort: true, dropna: true)
27
+ norm_unsort_keepna: |-
28
+ hash.value_counts(normalize: true, sort: false, dropna: false)
29
+ norm_unsort_dropna: |-
30
+ hash.value_counts(normalize: true, sort: false, dropna: true)
31
+ norm_sort_keepna: |-
32
+ hash.value_counts(normalize: true, sort: true, dropna: false)
33
+ norm_sort_dropna: |-
34
+ hash.value_counts(normalize: true, sort: true, dropna: true)
35
+ sort_asc_keepna: |-
36
+ hash.value_counts(sort: true, ascending: true, dropna: false)
37
+ sort_asc_dropna: |-
38
+ hash.value_counts(sort: true, ascending: true, dropna: true)
39
+ norm_sort_asc_keepna: |-
40
+ hash.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
41
+ norm_sort_asc_dropna: |-
42
+ hash.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
@@ -0,0 +1,30 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: mean = ary.inject(:+) / n.to_f
11
+ while: |-
12
+ i, mean = 0, 0
13
+ while i < n
14
+ mean += ary[i]
15
+ i += 1
16
+ end
17
+ mean /= n.to_f
18
+ pure_ruby: |-
19
+ i, f, c = 0, 0.0, 0.0, 0.0, 0.0
20
+ while i < n
21
+ x = ary[i]
22
+ y = x - c
23
+ t = f + y
24
+ c = (t - f) - y
25
+ f = t
26
+
27
+ i += 1
28
+ end
29
+ mean = f / n
30
+ c_ext: mean = ary.mean
@@ -0,0 +1,29 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: sum = ary.inject(:+)
11
+ while: |-
12
+ i, sum = 0, 0
13
+ while i < n
14
+ sum += ary[i]
15
+ i += 1
16
+ end
17
+ pure_ruby: |-
18
+ i, f, c = 0, 0.0, 0.0, 0.0, 0.0
19
+ while i < n
20
+ x = ary[i]
21
+ y = x - c
22
+ t = f + y
23
+ c = (t - f) - y
24
+ f = t
25
+
26
+ i += 1
27
+ end
28
+ sum = f
29
+ sum: sum = ary.sum
@@ -0,0 +1,39 @@
1
+ contexts:
2
+ - name: "master"
3
+ prelude: |-
4
+ require 'bundler/setup'
5
+ require 'enumerable/statistics'
6
+ prelude: |-
7
+ n = 1000
8
+ ary = Array.new(n) { rand }
9
+ benchmark:
10
+ inject: |-
11
+ mean = ary.mean
12
+ var = ary.inject(0.0) { |sum, x|
13
+ sum += (x - mean) ** 2
14
+ } / (n - 1).to_f
15
+ while: |-
16
+ mean = ary.mean
17
+ i, var = 0, 0
18
+ while i < n
19
+ var += (ary[i] - mean) ** 2
20
+ i += 1
21
+ end
22
+ var /= n.to_f
23
+ pure_ruby: |-
24
+ i, m, m2, f, c = 0, 0.0, 0.0, 0.0, 0.0
25
+ while i < n
26
+ x = ary[i]
27
+ y = x - c
28
+ t = f + y
29
+ c = (t - f) - y
30
+ f = t
31
+
32
+ delta = x - m
33
+ m += delta / i
34
+ m2 += delta * (x - m)
35
+
36
+ i += 1
37
+ end
38
+ var = m2 / n
39
+ c_ext: var = ary.variance
@@ -1,11 +1,17 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'enumerable/statistics/version'
4
+
5
+ require 'enumerable_statistics/version'
5
6
 
6
7
  Gem::Specification.new do |spec|
7
8
  spec.name = "enumerable-statistics"
8
- spec.version = Enumerable::Statistics::VERSION
9
+ spec.version = [
10
+ EnumerableStatistics::Version::MAJOR,
11
+ EnumerableStatistics::Version::MINOR,
12
+ EnumerableStatistics::Version::MICRO,
13
+ EnumerableStatistics::Version::TAG
14
+ ].compact.join('.')
9
15
  spec.authors = ["Kenta Murata"]
10
16
  spec.email = ["mrkn@mrkn.jp"]
11
17
 
@@ -19,10 +25,13 @@ Gem::Specification.new do |spec|
19
25
  spec.require_paths = ["ext", "lib"]
20
26
  spec.extensions = Dir['ext/**/extconf.rb']
21
27
 
22
- spec.add_development_dependency "bundler", "~> 1.11"
23
- spec.add_development_dependency "rake", "~> 10.0"
24
- spec.add_development_dependency "rake-compiler", "~> 0.9.8"
25
- spec.add_development_dependency "rspec", "~> 3.4"
28
+ spec.required_ruby_version = '>= 2.4'
29
+
30
+ spec.add_development_dependency "bundler", ">= 1.17.2"
31
+ spec.add_development_dependency "rake"
32
+ spec.add_development_dependency "rake-compiler", ">= 0.9.8"
33
+ spec.add_development_dependency "rspec", ">= 3.4"
26
34
  spec.add_development_dependency "fuubar"
27
- spec.add_development_dependency "benchmark-ips"
35
+ spec.add_development_dependency "yard"
36
+ spec.add_development_dependency "benchmark-driver"
28
37
  end
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+
3
+ create_makefile('-bench-')
@@ -1,5 +1,17 @@
1
1
  require 'mkmf'
2
2
 
3
3
  have_type('struct RRational')
4
+ have_func('rb_rational_new')
5
+ have_func('rb_rational_num')
6
+ have_func('rb_rational_den')
7
+ have_func('rb_rational_plus')
8
+
4
9
  have_type('struct RComplex')
10
+ have_func('rb_complex_raw')
11
+ have_func('rb_complex_real')
12
+ have_func('rb_complex_imag')
13
+ have_func('rb_complex_plus')
14
+ have_func('rb_complex_div')
15
+ have_func('rb_dbl_complex_new')
16
+
5
17
  create_makefile('enumerable/statistics/extension')
@@ -1,6 +1,8 @@
1
1
  #include <ruby/ruby.h>
2
+ #include <ruby/util.h>
2
3
  #include <ruby/version.h>
3
4
  #include <assert.h>
5
+ #include <math.h>
4
6
 
5
7
  #if RUBY_API_VERSION_CODE >= 20400
6
8
  /* for 2.4.0 or higher */
@@ -16,6 +18,12 @@
16
18
  # undef HAVE_RB_RATIONAL_PLUS
17
19
  #endif
18
20
 
21
+ #ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
22
+ # define HAVE_ARITHMETIC_SEQUENCE
23
+ #else
24
+ # undef HAVE_ARITHMETIC_SEQUENCE
25
+ #endif
26
+
19
27
  #ifndef RB_INTEGER_TYPE_P
20
28
  # define RB_INTEGER_TYPE_P(obj) enum_stat_integer_type_p(obj)
21
29
  static inline int
@@ -86,8 +94,12 @@ struct RComplex {
86
94
  static VALUE half_in_rational;
87
95
 
88
96
  static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
89
- static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp;
90
- static ID id_each, id_real_p, id_sum, id_population;
97
+ static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
98
+ static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
99
+
100
+ static VALUE sym_left, sym_right;
101
+
102
+ static VALUE cHistogram;
91
103
 
92
104
  inline static VALUE
93
105
  f_add(VALUE x, VALUE y)
@@ -131,28 +143,6 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
131
143
  return (VALUE)obj;
132
144
  }
133
145
 
134
- static VALUE
135
- complex_caonicalize_new(VALUE klass, VALUE real, VALUE imag)
136
- {
137
- if (f_real_p(real) && f_real_p(imag))
138
- return complex_new(klass, real, imag);
139
- else if (f_real_p(imag)) {
140
- VALUE new_imag;
141
-
142
- new_imag = f_add(RCOMPLEX(real)->imag, imag);
143
-
144
- return complex_new(klass, RCOMPLEX(real)->real, new_imag);
145
- }
146
- else {
147
- VALUE new_real, new_imag;
148
-
149
- new_real = f_sub(RCOMPLEX(real)->real, RCOMPLEX(imag)->imag);
150
- new_imag = f_add(RCOMPLEX(real)->imag, RCOMPLEX(imag)->real);
151
-
152
- return complex_new(klass, new_real, new_imag);
153
- }
154
- }
155
-
156
146
  static VALUE
157
147
  complex_add(VALUE self, VALUE other)
158
148
  {
@@ -623,7 +613,7 @@ rb_rational_plus(VALUE self, VALUE other)
623
613
  VALUE num = RRATIONAL(self)->num;
624
614
  VALUE den = RRATIONAL(self)->den;
625
615
 
626
- return f_addsub(self, num, den, other, ONE, idPLUS);
616
+ return f_addsub(self, num, den, other, ONE, '+');
627
617
  }
628
618
  else if (RB_TYPE_P(other, T_FLOAT)) {
629
619
  return f_add(f_to_f(self), other);
@@ -852,11 +842,11 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
852
842
  static int
853
843
  opt_population_p(VALUE opts)
854
844
  {
855
- ID kwargs = id_population;
856
845
  VALUE population = Qfalse;
857
846
 
858
847
  if (!NIL_P(opts)) {
859
848
  #ifdef HAVE_RB_GET_KWARGS
849
+ ID kwargs = id_population;
860
850
  rb_get_kwargs(opts, &kwargs, 0, 1, &population);
861
851
  #else
862
852
  VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
@@ -868,7 +858,7 @@ opt_population_p(VALUE opts)
868
858
  }
869
859
 
870
860
  /* call-seq:
871
- * eary.mean_variance(population: false)
861
+ * ary.mean_variance(population: false)
872
862
  *
873
863
  * Calculate a mean and a variance of the values in `ary`.
874
864
  * The first element of the result array is the mean, and the second is the variance.
@@ -1148,6 +1138,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
1148
1138
  *count_ptr = memo.count;
1149
1139
  }
1150
1140
 
1141
+ #ifndef HAVE_ENUM_SUM
1151
1142
  /* call-seq:
1152
1143
  * enum.sum
1153
1144
  *
@@ -1172,10 +1163,11 @@ enum_sum(int argc, VALUE* argv, VALUE obj)
1172
1163
 
1173
1164
  return sum;
1174
1165
  }
1166
+ #endif
1175
1167
 
1176
1168
  struct enum_mean_variance_memo {
1177
1169
  int block_given;
1178
- long n;
1170
+ size_t n;
1179
1171
  double m, m2, f, c;
1180
1172
  };
1181
1173
 
@@ -1229,7 +1221,7 @@ enum_mean_variance_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
1229
1221
  {
1230
1222
  struct enum_mean_variance_memo *memo = (struct enum_mean_variance_memo *)args;
1231
1223
  ENUM_WANT_SVALUE();
1232
- mean_variance_iter(e, (struct enum_sum_memo *) args);
1224
+ mean_variance_iter(e, memo);
1233
1225
  return Qnil;
1234
1226
  }
1235
1227
 
@@ -1487,9 +1479,836 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
1487
1479
  return stdev;
1488
1480
  }
1489
1481
 
1482
+ static inline int
1483
+ is_na(VALUE v)
1484
+ {
1485
+ if (NIL_P(v))
1486
+ return 1;
1487
+
1488
+ if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
1489
+ return 1;
1490
+
1491
+ if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
1492
+ return 1;
1493
+
1494
+ return 0;
1495
+ }
1496
+
1497
+ static int
1498
+ ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
1499
+ {
1500
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1501
+ VALUE cmp;
1502
+
1503
+ if (is_na(a)) {
1504
+ return -1;
1505
+ }
1506
+ else if (is_na(b)) {
1507
+ return 1;
1508
+ }
1509
+
1510
+ /* TODO: optimize */
1511
+ cmp = rb_funcall(a, id_cmp, 1, b);
1512
+ return rb_cmpint(cmp, a, b);
1513
+ }
1514
+
1515
+ static VALUE
1516
+ ary_percentile_make_sorted(VALUE ary)
1517
+ {
1518
+ long n, i;
1519
+ VALUE sorted;
1520
+
1521
+ n = RARRAY_LEN(ary);
1522
+ sorted = rb_ary_tmp_new(n);
1523
+ for (i = 0; i < n; ++i) {
1524
+ rb_ary_push(sorted, RARRAY_AREF(ary, i));
1525
+ }
1526
+ RARRAY_PTR_USE(sorted, ptr, {
1527
+ ruby_qsort(ptr, n, sizeof(VALUE),
1528
+ ary_percentile_sort_cmp, NULL);
1529
+ });
1530
+ return sorted;
1531
+ }
1532
+
1533
+ static inline VALUE
1534
+ ary_percentile_single_sorted(VALUE sorted, long n, double d)
1535
+ {
1536
+ VALUE x0, x1;
1537
+ double i, f;
1538
+ long l;
1539
+
1540
+ assert(RB_TYPE_P(sorted, T_ARRAY));
1541
+ assert(n == RARRAY_LEN(sorted));
1542
+ assert(n > 0);
1543
+
1544
+ if (d < 0 || 100 < d) {
1545
+ rb_raise(rb_eArgError, "percentile out of bounds");
1546
+ }
1547
+
1548
+ if (is_na(RARRAY_AREF(sorted, 0))) {
1549
+ return DBL2NUM(nan(""));
1550
+ }
1551
+
1552
+ n = RARRAY_LEN(sorted);
1553
+ if (n == 1) {
1554
+ return RARRAY_AREF(sorted, 0);
1555
+ }
1556
+
1557
+ d = (n - 1) * d / 100.0;
1558
+ f = modf(d, &i);
1559
+ l = (long)i;
1560
+
1561
+ x0 = RARRAY_AREF(sorted, l);
1562
+ if (f == 0 || l == n - 1) {
1563
+ return x0;
1564
+ }
1565
+
1566
+ x0 = rb_funcall(x0, idSTAR, 1, DBL2NUM(1 - f));
1567
+ x1 = RARRAY_AREF(sorted, l + 1);
1568
+ x1 = rb_funcall(x1, idSTAR, 1, DBL2NUM(f));
1569
+
1570
+ return rb_funcall(x0, idPLUS, 1, x1);
1571
+ }
1572
+
1573
+ static VALUE
1574
+ ary_percentile_single(VALUE ary, VALUE q)
1575
+ {
1576
+ long n;
1577
+ double d;
1578
+ VALUE qf, sorted;
1579
+
1580
+ assert(RB_TYPE_P(ary, T_ARRAY));
1581
+
1582
+ n = RARRAY_LEN(ary);
1583
+ assert(n > 0);
1584
+
1585
+ switch (TYPE(q)) {
1586
+ case T_FIXNUM:
1587
+ d = (double)FIX2LONG(q);
1588
+ break;
1589
+ case T_BIGNUM:
1590
+ d = rb_big2dbl(q);
1591
+ break;
1592
+
1593
+ case T_RATIONAL:
1594
+ /* fall through */
1595
+ default:
1596
+ qf = NUM2DBL(q);
1597
+ goto float_percentile;
1598
+
1599
+ case T_FLOAT:
1600
+ qf = q;
1601
+ float_percentile:
1602
+ d = RFLOAT_VALUE(qf);
1603
+ break;
1604
+ }
1605
+
1606
+ if (n == 1) {
1607
+ return RARRAY_AREF(ary, 0);
1608
+ }
1609
+
1610
+ sorted = ary_percentile_make_sorted(ary);
1611
+
1612
+ return ary_percentile_single_sorted(sorted, n, d);
1613
+ }
1614
+
1615
+ /* call-seq:
1616
+ * ary.percentile(q) -> float
1617
+ *
1618
+ * Calculate specified percentiles of the values in `ary`.
1619
+ *
1620
+ * @param [Number, Array] percentile or array of percentiles to compute,
1621
+ * which must be between 0 and 100 inclusive.
1622
+ *
1623
+ * @return [Float, Array] A percentile value(s)
1624
+ */
1625
+ static VALUE
1626
+ ary_percentile(VALUE ary, VALUE q)
1627
+ {
1628
+ long n, m, i;
1629
+ double d;
1630
+ VALUE qf, qs, sorted, res;
1631
+
1632
+ n = RARRAY_LEN(ary);
1633
+ if (n == 0) {
1634
+ rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
1635
+ }
1636
+
1637
+ qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
1638
+ if (NIL_P(qs)) {
1639
+ return ary_percentile_single(ary, q);
1640
+ }
1641
+
1642
+ m = RARRAY_LEN(qs);
1643
+ res = rb_ary_new_capa(m);
1644
+
1645
+ if (m == 1) {
1646
+ q = RARRAY_AREF(qs, 0);
1647
+ rb_ary_push(res, ary_percentile_single(ary, q));
1648
+ }
1649
+ else {
1650
+ sorted = ary_percentile_make_sorted(ary);
1651
+
1652
+ for (i = 0; i < m; ++i) {
1653
+ VALUE x;
1654
+
1655
+ q = RARRAY_AREF(qs, i);
1656
+ switch (TYPE(q)) {
1657
+ case T_FIXNUM:
1658
+ d = (double)FIX2LONG(q);
1659
+ break;
1660
+ case T_BIGNUM:
1661
+ d = rb_big2dbl(q);
1662
+ break;
1663
+
1664
+ case T_RATIONAL:
1665
+ /* fall through */
1666
+ default:
1667
+ qf = NUM2DBL(q);
1668
+ goto float_percentile;
1669
+
1670
+ case T_FLOAT:
1671
+ qf = q;
1672
+ float_percentile:
1673
+ d = RFLOAT_VALUE(qf);
1674
+ break;
1675
+ }
1676
+
1677
+ x = ary_percentile_single_sorted(sorted, n, d);
1678
+ rb_ary_push(res, x);
1679
+ }
1680
+ }
1681
+
1682
+ return res;
1683
+ }
1684
+
1685
+ /* call-seq:
1686
+ * ary.median -> float
1687
+ *
1688
+ * Calculate a median of the values in `ary`.
1689
+ *
1690
+ * @return [Float] A median value
1691
+ */
1692
+ static VALUE
1693
+ ary_median(VALUE ary)
1694
+ {
1695
+ long n;
1696
+ VALUE sorted, a0, a1;
1697
+
1698
+ n = RARRAY_LEN(ary);
1699
+ switch (n) {
1700
+ case 0:
1701
+ goto return_nan;
1702
+ case 1:
1703
+ return RARRAY_AREF(ary, 0);
1704
+ case 2:
1705
+ a0 = RARRAY_AREF(ary, 0);
1706
+ a1 = RARRAY_AREF(ary, 1);
1707
+ goto mean_two;
1708
+ default:
1709
+ break;
1710
+ }
1711
+
1712
+ sorted = ary_percentile_make_sorted(ary);
1713
+
1714
+ a0 = RARRAY_AREF(sorted, 0);
1715
+ if (is_na(a0)) {
1716
+ return_nan:
1717
+ return DBL2NUM(nan(""));
1718
+ }
1719
+
1720
+ a1 = RARRAY_AREF(sorted, n / 2);
1721
+ if (n % 2 == 1) {
1722
+ return a1;
1723
+ }
1724
+ else {
1725
+ a0 = RARRAY_AREF(sorted, n / 2 - 1);
1726
+
1727
+ mean_two:
1728
+ a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
1729
+ if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
1730
+ double d = NUM2DBL(a0);
1731
+ return DBL2NUM(d / 2.0);
1732
+ }
1733
+
1734
+ return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
1735
+ }
1736
+ }
1737
+
1738
+ struct value_counts_opts {
1739
+ int normalize_p;
1740
+ int sort_p;
1741
+ int ascending_p;
1742
+ int dropna_p;
1743
+ };
1744
+
1745
+ static inline void
1746
+ value_counts_extract_opts(VALUE kwargs, struct value_counts_opts *opts)
1747
+ {
1748
+ assert(opts != NULL);
1749
+
1750
+ /* default values */
1751
+ opts->normalize_p = 0;
1752
+ opts->sort_p = 1;
1753
+ opts->ascending_p = 0;
1754
+ opts->dropna_p = 1;
1755
+
1756
+ if (!NIL_P(kwargs)) {
1757
+ enum { kw_normalize, kw_sort, kw_ascending, kw_dropna };
1758
+ static ID kwarg_keys[4];
1759
+ VALUE kwarg_vals[4];
1760
+
1761
+ if (!kwarg_keys[0]) {
1762
+ kwarg_keys[kw_normalize] = rb_intern("normalize");
1763
+ kwarg_keys[kw_sort] = rb_intern("sort");
1764
+ kwarg_keys[kw_ascending] = rb_intern("ascending");
1765
+ kwarg_keys[kw_dropna] = rb_intern("dropna");
1766
+ }
1767
+
1768
+ rb_get_kwargs(kwargs, kwarg_keys, 0, 4, kwarg_vals);
1769
+ opts->normalize_p = (kwarg_vals[kw_normalize] != Qundef) && RTEST(kwarg_vals[kw_normalize]);
1770
+ opts->sort_p = (kwarg_vals[kw_sort] != Qundef) && RTEST(kwarg_vals[kw_sort]);
1771
+ opts->ascending_p = (kwarg_vals[kw_ascending] != Qundef) && RTEST(kwarg_vals[kw_ascending]);
1772
+ opts->dropna_p = (kwarg_vals[kw_dropna] != Qundef) && RTEST(kwarg_vals[kw_dropna]);
1773
+ }
1774
+ }
1775
+
1776
+ static int
1777
+ value_counts_result_to_assoc_array_i(VALUE key, VALUE val, VALUE ary)
1778
+ {
1779
+ VALUE assoc = rb_ary_tmp_new(2);
1780
+ rb_ary_push(assoc, key);
1781
+ rb_ary_push(assoc, val);
1782
+ rb_ary_push(ary, assoc);
1783
+ return ST_CONTINUE;
1784
+ }
1785
+
1786
+ static int
1787
+ value_counts_sort_cmp_asc(const void *ap, const void *bp, void *dummy)
1788
+ {
1789
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1790
+ VALUE av, bv, cmp;
1791
+
1792
+ av = RARRAY_AREF(a, 1);
1793
+ bv = RARRAY_AREF(b, 1);
1794
+
1795
+ /* TODO: optimize */
1796
+ cmp = rb_funcall(av, id_cmp, 1, bv);
1797
+ return rb_cmpint(cmp, av, bv);
1798
+ }
1799
+
1800
+ static int
1801
+ value_counts_sort_cmp_desc(const void *ap, const void *bp, void *dummy)
1802
+ {
1803
+ VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
1804
+ VALUE av, bv, cmp;
1805
+
1806
+ av = RARRAY_AREF(a, 1);
1807
+ bv = RARRAY_AREF(b, 1);
1808
+
1809
+ /* TODO: optimize */
1810
+ cmp = rb_funcall(bv, id_cmp, 1, av);
1811
+ return rb_cmpint(cmp, bv, av);
1812
+ }
1813
+
1814
+ static VALUE
1815
+ value_counts_sort_result(VALUE result, const int dropna_p, const int ascending_p)
1816
+ {
1817
+ VALUE na_count = Qundef, ary, sorted;
1818
+ long i;
1819
+
1820
+ if (RHASH_SIZE(result) < 1) {
1821
+ return result;
1822
+ }
1823
+
1824
+ if (!dropna_p) {
1825
+ na_count = rb_hash_lookup2(result, Qnil, Qundef);
1826
+ if (na_count != Qundef) {
1827
+ rb_hash_delete(result, Qnil);
1828
+ }
1829
+ }
1830
+
1831
+ const long len = (long)RHASH_SIZE(result);
1832
+ ary = rb_ary_tmp_new(len);
1833
+ rb_hash_foreach(result, value_counts_result_to_assoc_array_i, ary);
1834
+ if (ascending_p) {
1835
+ RARRAY_PTR_USE(ary, ptr, {
1836
+ ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
1837
+ value_counts_sort_cmp_asc, NULL);
1838
+ });
1839
+ }
1840
+ else {
1841
+ RARRAY_PTR_USE(ary, ptr, {
1842
+ ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
1843
+ value_counts_sort_cmp_desc, NULL);
1844
+ });
1845
+ }
1846
+
1847
+ #ifdef HAVE_RB_HASH_NEW_WITH_SIZE
1848
+ sorted = rb_hash_new_with_size(len);
1849
+ #else
1850
+ sorted = rb_hash_new();
1851
+ #endif
1852
+
1853
+ if (na_count != Qundef && ascending_p) {
1854
+ rb_hash_aset(sorted, Qnil, na_count);
1855
+ }
1856
+
1857
+ for (i = 0; i < len; ++i) {
1858
+ VALUE a = RARRAY_AREF(ary, i);
1859
+ VALUE k = RARRAY_AREF(a, 0);
1860
+ VALUE v = RARRAY_AREF(a, 1);
1861
+ rb_hash_aset(sorted, k, v);
1862
+ }
1863
+
1864
+ if (na_count != Qundef && !ascending_p) {
1865
+ rb_hash_aset(sorted, Qnil, na_count);
1866
+ }
1867
+
1868
+ return sorted;
1869
+ }
1870
+
1871
+ struct value_counts_normalize_params {
1872
+ VALUE result;
1873
+ long total;
1874
+ };
1875
+
1876
+ static int
1877
+ value_counts_normalize_i(VALUE key, VALUE val, VALUE arg)
1878
+ {
1879
+ struct value_counts_normalize_params *params = (struct value_counts_normalize_params *)arg;
1880
+ double new_val;
1881
+
1882
+ new_val = NUM2DBL(val) / params->total;
1883
+ rb_hash_aset(params->result, key, DBL2NUM(new_val));
1884
+
1885
+ return ST_CONTINUE;
1886
+ }
1887
+
1888
+ struct value_counts_memo {
1889
+ int dropna_p;
1890
+ long total;
1891
+ long na_count;
1892
+ VALUE result;
1893
+ };
1894
+
1895
+ static VALUE
1896
+ any_value_counts(int argc, VALUE *argv, VALUE obj,
1897
+ void (* counter)(VALUE, struct value_counts_memo *))
1898
+ {
1899
+ VALUE kwargs;
1900
+ struct value_counts_opts opts;
1901
+ struct value_counts_memo memo;
1902
+
1903
+ rb_scan_args(argc, argv, ":", &kwargs);
1904
+ value_counts_extract_opts(kwargs, &opts);
1905
+
1906
+ memo.result = rb_hash_new();
1907
+ memo.total = 0;
1908
+ memo.na_count = 0;
1909
+ memo.dropna_p = opts.dropna_p;
1910
+
1911
+ if (!opts.dropna_p) {
1912
+ rb_hash_aset(memo.result, Qnil, INT2FIX(0)); // reserve the room for NA
1913
+ }
1914
+
1915
+ counter(obj, &memo);
1916
+
1917
+ if (!opts.dropna_p) {
1918
+ if (memo.na_count == 0)
1919
+ rb_hash_delete(memo.result, Qnil);
1920
+ else
1921
+ rb_hash_aset(memo.result, Qnil, LONG2NUM(memo.na_count));
1922
+ }
1923
+
1924
+ if (opts.sort_p) {
1925
+ memo.result = value_counts_sort_result(memo.result, opts.dropna_p, opts.ascending_p);
1926
+ }
1927
+
1928
+ if (opts.normalize_p) {
1929
+ struct value_counts_normalize_params params;
1930
+ params.result = memo.result;
1931
+ params.total = memo.total - (opts.dropna_p ? memo.na_count : 0);
1932
+ rb_hash_foreach(memo.result, value_counts_normalize_i, (VALUE)&params);
1933
+ }
1934
+
1935
+ return memo.result;
1936
+ }
1937
+
1938
+ static VALUE
1939
+ enum_value_counts_without_sort_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
1940
+ {
1941
+ struct value_counts_memo *memo = (struct value_counts_memo *)args;
1942
+
1943
+ ENUM_WANT_SVALUE();
1944
+
1945
+ if (is_na(e)) {
1946
+ ++memo->na_count;
1947
+ }
1948
+ else {
1949
+ VALUE cnt = rb_hash_lookup2(memo->result, e, INT2FIX(0));
1950
+ rb_hash_aset(memo->result, e, rb_int_plus(cnt, INT2FIX(1)));
1951
+ }
1952
+
1953
+ ++memo->total;
1954
+
1955
+ return Qnil;
1956
+ }
1957
+
1958
+ static void
1959
+ enum_value_counts_without_sort(VALUE obj, struct value_counts_memo *memo)
1960
+ {
1961
+ rb_block_call(obj, id_each, 0, 0, enum_value_counts_without_sort_i, (VALUE)memo);
1962
+ }
1963
+
1964
+ static VALUE
1965
+ enum_value_counts(int argc, VALUE* argv, VALUE obj)
1966
+ {
1967
+ return any_value_counts(argc, argv, obj, enum_value_counts_without_sort);
1968
+ }
1969
+
1970
+ static void
1971
+ ary_value_counts_without_sort(VALUE ary, struct value_counts_memo *memo)
1972
+ {
1973
+ const VALUE zero = INT2FIX(0);
1974
+ const VALUE one = INT2FIX(1);
1975
+ long i, na_count = 0;
1976
+ long const n = RARRAY_LEN(ary);
1977
+
1978
+ for (i = 0; i < n; ++i) {
1979
+ VALUE val = RARRAY_AREF(ary, i);
1980
+
1981
+ if (is_na(val)) {
1982
+ ++na_count;
1983
+ }
1984
+ else {
1985
+ VALUE cnt = rb_hash_lookup2(memo->result, val, zero);
1986
+ rb_hash_aset(memo->result, val, rb_int_plus(cnt, one));
1987
+ }
1988
+ }
1989
+
1990
+ memo->total = n;
1991
+ memo->na_count = na_count;
1992
+ }
1993
+
1994
+ /* call-seq:
1995
+ * ary.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
1996
+ *
1997
+ * Returns a hash that contains the counts of values in `ary`.
1998
+ *
1999
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
2000
+ * as the same thing, and stores the count of them as the value for `nil`.
2001
+ *
2002
+ * @param [false,true] normalize If `true`, the result contains the relative
2003
+ * frequencies of the unique values.
2004
+ * @param [true,false] sort Sort by values.
2005
+ * @param [false,true] ascending Sort in ascending order.
2006
+ * @param [true,false] dropna Don't include counts of NAs.
2007
+ *
2008
+ * @return [Hash] A hash consists of the counts of the values
2009
+ */
2010
+ static VALUE
2011
+ ary_value_counts(int argc, VALUE* argv, VALUE ary)
2012
+ {
2013
+ return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
2014
+ }
2015
+
2016
+ static int
2017
+ hash_value_counts_without_sort_i(VALUE key, VALUE val, VALUE arg)
2018
+ {
2019
+ struct value_counts_memo *memo = (struct value_counts_memo *)arg;
2020
+
2021
+ if (is_na(val)) {
2022
+ ++memo->na_count;
2023
+
2024
+ if (memo->dropna_p) {
2025
+ return ST_CONTINUE;
2026
+ }
2027
+ }
2028
+ else {
2029
+ VALUE cnt = rb_hash_lookup2(memo->result, val, INT2FIX(0));
2030
+ rb_hash_aset(memo->result, val, rb_int_plus(cnt, INT2FIX(1)));
2031
+ }
2032
+
2033
+ return ST_CONTINUE;
2034
+ }
2035
+
2036
+ static void
2037
+ hash_value_counts_without_sort(VALUE hash, struct value_counts_memo *memo)
2038
+ {
2039
+ rb_hash_foreach(hash, hash_value_counts_without_sort_i, (VALUE)memo);
2040
+ memo->total = RHASH_SIZE(hash);
2041
+ }
2042
+
2043
+ /* call-seq:
2044
+ * hash.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
2045
+ *
2046
+ * Returns a hash that contains the counts of values in `hash`.
2047
+ *
2048
+ * This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
2049
+ * as the same thing, and stores the count of them as the value for `nil`.
2050
+ *
2051
+ * @param [false,true] normalize If `true`, the result contains the relative
2052
+ * frequencies of the unique values.
2053
+ * @param [true,false] sort Sort by values.
2054
+ * @param [false,true] ascending Sort in ascending order.
2055
+ * @param [true,false] dropna Don't include counts of NAs.
2056
+ *
2057
+ * @return [Hash] A hash consists of the counts of the values
2058
+ */
2059
+ static VALUE
2060
+ hash_value_counts(int argc, VALUE* argv, VALUE hash)
2061
+ {
2062
+ return any_value_counts(argc, argv, hash, hash_value_counts_without_sort);
2063
+ }
2064
+
2065
+ static long
2066
+ histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
2067
+ {
2068
+ double x, y;
2069
+ long lo, hi, mid;
2070
+
2071
+ x = NUM2DBL(rb_x);
2072
+
2073
+ lo = -1;
2074
+ hi = RARRAY_LEN(edge);
2075
+
2076
+ if (left_p) {
2077
+ while (hi - lo > 1) {
2078
+ mid = lo + (hi - lo)/2;
2079
+ y = NUM2DBL(RARRAY_AREF(edge, mid));
2080
+ if (y <= x) {
2081
+ lo = mid;
2082
+ }
2083
+ else {
2084
+ hi = mid;
2085
+ }
2086
+ }
2087
+ return lo;
2088
+ }
2089
+ else {
2090
+ while (hi - lo > 1) {
2091
+ mid = lo + (hi - lo)/2;
2092
+ y = NUM2DBL(RARRAY_AREF(edge, mid));
2093
+ if (y < x) {
2094
+ lo = mid;
2095
+ }
2096
+ else {
2097
+ hi = mid;
2098
+ }
2099
+ }
2100
+ return hi - 1;
2101
+ }
2102
+ }
2103
+
2104
+ static void
2105
+ histogram_weights_push_values(VALUE weights, VALUE edge, VALUE values, int left_p)
2106
+ {
2107
+ VALUE x, cur;
2108
+ long i, n, bi;
2109
+
2110
+ n = RARRAY_LEN(values);
2111
+ for (i = 0; i < n; ++i) {
2112
+ x = RARRAY_AREF(values, i);
2113
+
2114
+ bi = histogram_edge_bin_index(edge, x, left_p);
2115
+
2116
+ cur = rb_ary_entry(weights, bi);
2117
+ if (NIL_P(cur)) {
2118
+ cur = INT2FIX(1);
2119
+ }
2120
+ else {
2121
+ cur = rb_funcall(cur, idPLUS, 1, INT2FIX(1));
2122
+ }
2123
+
2124
+ rb_ary_store(weights, bi, cur);
2125
+ }
2126
+ }
2127
+
2128
+ static int
2129
+ opt_closed_left_p(VALUE opts)
2130
+ {
2131
+ int left_p = 1;
2132
+
2133
+ if (!NIL_P(opts)) {
2134
+ VALUE closed;
2135
+ #ifdef HAVE_RB_GET_KWARGS
2136
+ ID kwargs = id_closed;
2137
+ rb_get_kwargs(opts, &kwargs, 0, 1, &closed);
2138
+ #else
2139
+ closed = rb_hash_lookup2(opts, ID2SYM(id_closed), sym_left);
2140
+ #endif
2141
+ left_p = (closed != sym_right);
2142
+ if (left_p && closed != sym_left) {
2143
+ rb_raise(rb_eArgError, "invalid value for :closed keyword "
2144
+ "(%"PRIsVALUE" for :left or :right)", closed);
2145
+ }
2146
+ }
2147
+
2148
+ return left_p;
2149
+ }
2150
+
2151
+ static inline long
2152
+ sturges(long n)
2153
+ {
2154
+ if (n == 0) return 1L;
2155
+ return (long)(ceil(log2(n)) + 1);
2156
+ }
2157
+
2158
+ static VALUE
2159
+ ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long nbins, const int left_p)
2160
+ {
2161
+ VALUE edge;
2162
+ double bw, lbw, start, step, divisor, r;
2163
+ long i, len;
2164
+
2165
+ if (hi == lo) {
2166
+ start = hi;
2167
+ step = 1;
2168
+ divisor = 1;
2169
+ len = 1;
2170
+ }
2171
+ else {
2172
+ bw = (hi - lo) / nbins;
2173
+ lbw = log10(bw);
2174
+ if (lbw >= 0) {
2175
+ step = pow(10, floor(lbw));
2176
+ r = bw / step;
2177
+ if (r <= 1.1) {
2178
+ /* do nothing */
2179
+ }
2180
+ else if (r <= 2.2) {
2181
+ step *= 2;
2182
+ }
2183
+ else if (r <= 5.5) {
2184
+ step *= 5;
2185
+ }
2186
+ else {
2187
+ step *= 10;
2188
+ }
2189
+ divisor = 1.0;
2190
+ start = step * floor(lo / step);
2191
+ len = (long)ceil((hi - start) / step);
2192
+ }
2193
+ else {
2194
+ divisor = pow(10, -floor(lbw));
2195
+ r = bw * divisor;
2196
+ if (r <= 1.1) {
2197
+ /* do nothing */
2198
+ }
2199
+ else if (r <= 2.2) {
2200
+ divisor /= 2;
2201
+ }
2202
+ else if (r <= 5.5) {
2203
+ divisor /= 5;
2204
+ }
2205
+ else {
2206
+ divisor /= 10;
2207
+ }
2208
+ step = 1.0;
2209
+ start = floor(lo * divisor);
2210
+ len = (long)ceil(hi * divisor - start);
2211
+ }
2212
+ }
2213
+
2214
+ if (left_p) {
2215
+ while (lo < start/divisor) {
2216
+ start -= step;
2217
+ }
2218
+ while ((start + (len - 1)*step)/divisor <= hi) {
2219
+ ++len;
2220
+ }
2221
+ }
2222
+ else {
2223
+ while (lo <= start/divisor) {
2224
+ start -= step;
2225
+ }
2226
+ while ((start + (len - 1)*step)/divisor < hi) {
2227
+ ++len;
2228
+ }
2229
+ }
2230
+
2231
+ edge = rb_ary_new_capa(len);
2232
+ for (i = 0; i < len; ++i) {
2233
+ rb_ary_push(edge, DBL2NUM(start/divisor));
2234
+ start += step;
2235
+ }
2236
+
2237
+ return edge;
2238
+ }
2239
+
2240
+ static VALUE
2241
+ ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
2242
+ {
2243
+ long n;
2244
+ VALUE minmax;
2245
+ VALUE edge = Qnil;
2246
+ double lo, hi;
2247
+
2248
+ Check_Type(ary, T_ARRAY);
2249
+ n = RARRAY_LEN(ary);
2250
+
2251
+ if (n == 0 && nbins < 0) {
2252
+ rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
2253
+ }
2254
+ else if (n > 0 && nbins < 1) {
2255
+ rb_raise(rb_eArgError, "nbins must be >= 1 for a non-empty array, got %ld", nbins);
2256
+ }
2257
+ else if (n == 0) {
2258
+ edge = rb_ary_new_capa(1);
2259
+ rb_ary_push(edge, DBL2NUM(0.0));
2260
+ return edge;
2261
+ }
2262
+
2263
+ minmax = rb_funcall(ary, rb_intern("minmax"), 0);
2264
+ lo = NUM2DBL(RARRAY_AREF(minmax, 0));
2265
+ hi = NUM2DBL(RARRAY_AREF(minmax, 1));
2266
+
2267
+ edge = ary_histogram_calculate_edge_lo_hi(lo, hi, nbins, left_p);
2268
+
2269
+ return edge;
2270
+ }
2271
+
2272
+ /* call-seq:
2273
+ * ary.histogram(nbins=:auto, closed: :left)
2274
+ *
2275
+ * @param [Integer] nbins The approximate number of bins
2276
+ * @param [:left, :right] closed
2277
+ * If :left (the default), the bin interval are left-closed.
2278
+ * If :right, the bin interval are right-closed.
2279
+ *
2280
+ * @return [EnumerableStatistics::Histogram] The histogram struct.
2281
+ */
2282
+ static VALUE
2283
+ ary_histogram(int argc, VALUE *argv, VALUE ary)
2284
+ {
2285
+ VALUE arg0, opts, edge, weights;
2286
+ int left_p;
2287
+ long nbins;
2288
+
2289
+ rb_scan_args(argc, argv, "01:", &arg0, &opts);
2290
+ if (NIL_P(arg0)) {
2291
+ nbins = sturges(RARRAY_LEN(ary));
2292
+ }
2293
+ else {
2294
+ nbins = NUM2LONG(arg0);
2295
+ }
2296
+ left_p = opt_closed_left_p(opts);
2297
+
2298
+ edge = ary_histogram_calculate_edge(ary, nbins, left_p);
2299
+ weights = rb_ary_new_capa(RARRAY_LEN(edge) - 1);
2300
+ histogram_weights_push_values(weights, edge, ary, left_p);
2301
+
2302
+ return rb_struct_new(cHistogram, edge, weights,
2303
+ left_p ? sym_left : sym_right,
2304
+ Qfalse);
2305
+ }
2306
+
1490
2307
  void
1491
2308
  Init_extension(void)
1492
2309
  {
2310
+ VALUE mEnumerableStatistics;
2311
+
1493
2312
  #ifndef HAVE_ENUM_SUM
1494
2313
  rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
1495
2314
  #endif
@@ -1499,6 +2318,7 @@ Init_extension(void)
1499
2318
  rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
1500
2319
  rb_define_method(rb_mEnumerable, "mean_stdev", enum_mean_stdev, -1);
1501
2320
  rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
2321
+ rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
1502
2322
 
1503
2323
  #ifndef HAVE_ARRAY_SUM
1504
2324
  rb_define_method(rb_cArray, "sum", ary_sum, -1);
@@ -1508,10 +2328,20 @@ Init_extension(void)
1508
2328
  rb_define_method(rb_cArray, "variance", ary_variance, -1);
1509
2329
  rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
1510
2330
  rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
2331
+ rb_define_method(rb_cArray, "percentile", ary_percentile, 1);
2332
+ rb_define_method(rb_cArray, "median", ary_median, 0);
2333
+ rb_define_method(rb_cArray, "value_counts", ary_value_counts, -1);
2334
+
2335
+ rb_define_method(rb_cHash, "value_counts", hash_value_counts, -1);
1511
2336
 
1512
2337
  half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
1513
2338
  rb_gc_register_mark_object(half_in_rational);
1514
2339
 
2340
+ mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
2341
+ cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
2342
+
2343
+ rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
2344
+
1515
2345
  idPLUS = '+';
1516
2346
  idMINUS = '-';
1517
2347
  idSTAR = '*';
@@ -1523,8 +2353,14 @@ Init_extension(void)
1523
2353
  id_negate = rb_intern("-@");
1524
2354
  id_to_f = rb_intern("to_f");
1525
2355
  id_cmp = rb_intern("<=>");
2356
+ id_nan_p = rb_intern("nan?");
1526
2357
  id_each = rb_intern("each");
1527
2358
  id_real_p = rb_intern("real?");
1528
2359
  id_sum = rb_intern("sum");
1529
2360
  id_population = rb_intern("population");
2361
+ id_closed = rb_intern("closed");
2362
+ id_edge = rb_intern("edge");
2363
+
2364
+ sym_left = ID2SYM(rb_intern("left"));
2365
+ sym_right = ID2SYM(rb_intern("right"));
1530
2366
  }