enumerable-statistics 1.0.1 → 2.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +21 -8
- data/.yardopts +1 -0
- data/CHANGELOG.md +7 -0
- data/README.md +8 -0
- data/Rakefile +5 -3
- data/bench/array_value_counts.yml +42 -0
- data/bench/enum_value_counts.yml +42 -0
- data/bench/hash_value_counts.yml +42 -0
- data/bench/mean.yml +30 -0
- data/bench/sum.yml +29 -0
- data/bench/variance.yml +39 -0
- data/enumerable-statistics.gemspec +16 -7
- data/ext/-bench-/extconf.rb +3 -0
- data/ext/enumerable/statistics/extension/extconf.rb +12 -0
- data/ext/enumerable/statistics/extension/statistics.c +865 -29
- data/lib/enumerable/statistics.rb +1 -1
- data/lib/enumerable_statistics.rb +2 -0
- data/lib/enumerable_statistics/histogram.rb +5 -0
- data/lib/enumerable_statistics/version.rb +9 -0
- data/templates/default/layout/html/headers.erb +36 -0
- metadata +45 -24
- data/bench/mean.rb +0 -27
- data/bench/sum.rb +0 -26
- data/bench/variance.rb +0 -30
- data/lib/enumerable/statistics/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d632cae80814e40cb6247d76cdfa74999e30effa4f969935a2deef35937c4f5e
|
4
|
+
data.tar.gz: 85c9ba3067efd94649e01836ebf04cf0ddebc2a69fea06f65a4efd067a9dffe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ee5828934ed01b5bc2f3173816bfe24914f36641df6a99b2aff355478d74f13fd36201547776c06d8b4150784176420be8ceea1bbdd2fa9397d6930522caff1
|
7
|
+
data.tar.gz: 7a4a87570189ff29bef98fdeef30f2640eebbea550419229b9ffeb89c9d032fbe72082e6be00cd5fb9a85be2e515d6211b057f5fd1c4834ad6cb8c5153135a5f
|
data/.travis.yml
CHANGED
@@ -1,19 +1,32 @@
|
|
1
1
|
---
|
2
|
-
|
2
|
+
notification:
|
3
|
+
email:
|
4
|
+
- mrkn@ruby-lang.org
|
3
5
|
|
4
|
-
|
5
|
-
- ruby-head
|
6
|
-
- 2.3.0
|
7
|
-
- 2.2.4
|
8
|
-
- 2.1
|
6
|
+
language: ruby
|
9
7
|
|
10
8
|
before_install:
|
11
9
|
- gem update --system
|
12
|
-
- gem
|
10
|
+
- gem install bundler
|
13
11
|
|
14
12
|
install:
|
15
13
|
- bundle install
|
16
14
|
|
17
15
|
script:
|
18
|
-
- bundle exec rake clobber compile
|
16
|
+
- bundle exec rake --trace clobber compile
|
19
17
|
- bundle exec rake spec
|
18
|
+
|
19
|
+
matrix:
|
20
|
+
include:
|
21
|
+
- name: "2.3"
|
22
|
+
rvm: 2.3
|
23
|
+
- name: "2.4"
|
24
|
+
rvm: 2.4.5
|
25
|
+
- name: "2.5"
|
26
|
+
rvm: 2.5.2
|
27
|
+
- name: "2.6"
|
28
|
+
rvm: 2.6
|
29
|
+
- name: "trunk"
|
30
|
+
rvm: ruby-head
|
31
|
+
allow_failures:
|
32
|
+
- rvm: 2.3
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
# 2.0.0-pre
|
2
|
+
|
3
|
+
- Add `value_counts` method in Array, Hash, and Enumerable
|
4
|
+
- Add `median` method in Array
|
5
|
+
- Add `percentile` method in Array
|
6
|
+
- Add `histogram` method in Array
|
7
|
+
|
1
8
|
# 1.0.1
|
2
9
|
|
3
10
|
- Add `mean_variance` method in Array class and Enumerable module
|
data/README.md
CHANGED
@@ -40,6 +40,14 @@ The following methods are supplied by this library:
|
|
40
40
|
- Calculates a mean and a variance simultaneously
|
41
41
|
- `Array#mean_stdev`, `Enumerable#mean_stdev`
|
42
42
|
- Calculates a mean and a standard deviation simultaneously
|
43
|
+
- `Array#median`
|
44
|
+
- Calculates a median of values in an array
|
45
|
+
- `Array#percentile(q)`
|
46
|
+
- Calculates a percentile or percentiles of values in an array
|
47
|
+
- `Array#value_counts`, `Enumerable#value_counts`, and `Hash#value_counts`
|
48
|
+
- Count how many items for each value in the container
|
49
|
+
- `Array#histogram`
|
50
|
+
- Calculate histogram of the values in the array
|
43
51
|
|
44
52
|
Moreover, for Ruby < 2.4, `Array#sum` and `Enumerable#sum` are provided.
|
45
53
|
|
data/Rakefile
CHANGED
@@ -6,15 +6,17 @@ task :default => :spec
|
|
6
6
|
|
7
7
|
Rake::ExtensionTask.new('enumerable/statistics/extension')
|
8
8
|
|
9
|
+
directory 'lib/enumerable/statistics'
|
10
|
+
|
9
11
|
RSpec::Core::RakeTask.new(:spec)
|
10
12
|
|
11
13
|
task :bench do
|
12
14
|
puts "# sum\n"
|
13
|
-
system('
|
15
|
+
system('benchmark-driver bench/sum.yml')
|
14
16
|
|
15
17
|
puts "# mean\n"
|
16
|
-
system('
|
18
|
+
system('benchmark-driver bench/mean.yml')
|
17
19
|
|
18
20
|
puts "# variance\n"
|
19
|
-
system('
|
21
|
+
system('benchmark-driver bench/variance.yml')
|
20
22
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
ary = Array.new(n) { chars.sample }
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
ary.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
ary.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
ary.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
ary.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
ary.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
ary.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
ary.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
ary.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
ary.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
ary.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
ary.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
ary.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
ary.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
enum = Array.new(n) { chars.sample }.each
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
enum.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
enum.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
enum.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
enum.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
enum.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
enum.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
enum.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
enum.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
enum.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
enum.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
enum.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
enum.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
enum.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
hash = Array.new(n) { chars.sample }.each_with_index.to_h
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
hash.inject(Hash.new(0)) { |h, (k, v)| h[v] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
hash.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
hash.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
hash.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
hash.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
hash.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
hash.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
hash.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
hash.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
hash.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
hash.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
hash.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
hash.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
data/bench/mean.yml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: mean = ary.inject(:+) / n.to_f
|
11
|
+
while: |-
|
12
|
+
i, mean = 0, 0
|
13
|
+
while i < n
|
14
|
+
mean += ary[i]
|
15
|
+
i += 1
|
16
|
+
end
|
17
|
+
mean /= n.to_f
|
18
|
+
pure_ruby: |-
|
19
|
+
i, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
20
|
+
while i < n
|
21
|
+
x = ary[i]
|
22
|
+
y = x - c
|
23
|
+
t = f + y
|
24
|
+
c = (t - f) - y
|
25
|
+
f = t
|
26
|
+
|
27
|
+
i += 1
|
28
|
+
end
|
29
|
+
mean = f / n
|
30
|
+
c_ext: mean = ary.mean
|
data/bench/sum.yml
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: sum = ary.inject(:+)
|
11
|
+
while: |-
|
12
|
+
i, sum = 0, 0
|
13
|
+
while i < n
|
14
|
+
sum += ary[i]
|
15
|
+
i += 1
|
16
|
+
end
|
17
|
+
pure_ruby: |-
|
18
|
+
i, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
19
|
+
while i < n
|
20
|
+
x = ary[i]
|
21
|
+
y = x - c
|
22
|
+
t = f + y
|
23
|
+
c = (t - f) - y
|
24
|
+
f = t
|
25
|
+
|
26
|
+
i += 1
|
27
|
+
end
|
28
|
+
sum = f
|
29
|
+
sum: sum = ary.sum
|
data/bench/variance.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: |-
|
11
|
+
mean = ary.mean
|
12
|
+
var = ary.inject(0.0) { |sum, x|
|
13
|
+
sum += (x - mean) ** 2
|
14
|
+
} / (n - 1).to_f
|
15
|
+
while: |-
|
16
|
+
mean = ary.mean
|
17
|
+
i, var = 0, 0
|
18
|
+
while i < n
|
19
|
+
var += (ary[i] - mean) ** 2
|
20
|
+
i += 1
|
21
|
+
end
|
22
|
+
var /= n.to_f
|
23
|
+
pure_ruby: |-
|
24
|
+
i, m, m2, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
25
|
+
while i < n
|
26
|
+
x = ary[i]
|
27
|
+
y = x - c
|
28
|
+
t = f + y
|
29
|
+
c = (t - f) - y
|
30
|
+
f = t
|
31
|
+
|
32
|
+
delta = x - m
|
33
|
+
m += delta / i
|
34
|
+
m2 += delta * (x - m)
|
35
|
+
|
36
|
+
i += 1
|
37
|
+
end
|
38
|
+
var = m2 / n
|
39
|
+
c_ext: var = ary.variance
|
@@ -1,11 +1,17 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
lib = File.expand_path('../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
|
4
|
+
|
5
|
+
require 'enumerable_statistics/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
8
|
spec.name = "enumerable-statistics"
|
8
|
-
spec.version =
|
9
|
+
spec.version = [
|
10
|
+
EnumerableStatistics::Version::MAJOR,
|
11
|
+
EnumerableStatistics::Version::MINOR,
|
12
|
+
EnumerableStatistics::Version::MICRO,
|
13
|
+
EnumerableStatistics::Version::TAG
|
14
|
+
].compact.join('.')
|
9
15
|
spec.authors = ["Kenta Murata"]
|
10
16
|
spec.email = ["mrkn@mrkn.jp"]
|
11
17
|
|
@@ -19,10 +25,13 @@ Gem::Specification.new do |spec|
|
|
19
25
|
spec.require_paths = ["ext", "lib"]
|
20
26
|
spec.extensions = Dir['ext/**/extconf.rb']
|
21
27
|
|
22
|
-
spec.
|
23
|
-
|
24
|
-
spec.add_development_dependency "
|
25
|
-
spec.add_development_dependency "
|
28
|
+
spec.required_ruby_version = '>= 2.4'
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.17.2"
|
31
|
+
spec.add_development_dependency "rake"
|
32
|
+
spec.add_development_dependency "rake-compiler", ">= 0.9.8"
|
33
|
+
spec.add_development_dependency "rspec", ">= 3.4"
|
26
34
|
spec.add_development_dependency "fuubar"
|
27
|
-
spec.add_development_dependency "
|
35
|
+
spec.add_development_dependency "yard"
|
36
|
+
spec.add_development_dependency "benchmark-driver"
|
28
37
|
end
|
@@ -1,5 +1,17 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
|
3
3
|
have_type('struct RRational')
|
4
|
+
have_func('rb_rational_new')
|
5
|
+
have_func('rb_rational_num')
|
6
|
+
have_func('rb_rational_den')
|
7
|
+
have_func('rb_rational_plus')
|
8
|
+
|
4
9
|
have_type('struct RComplex')
|
10
|
+
have_func('rb_complex_raw')
|
11
|
+
have_func('rb_complex_real')
|
12
|
+
have_func('rb_complex_imag')
|
13
|
+
have_func('rb_complex_plus')
|
14
|
+
have_func('rb_complex_div')
|
15
|
+
have_func('rb_dbl_complex_new')
|
16
|
+
|
5
17
|
create_makefile('enumerable/statistics/extension')
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <ruby/ruby.h>
|
2
|
+
#include <ruby/util.h>
|
2
3
|
#include <ruby/version.h>
|
3
4
|
#include <assert.h>
|
5
|
+
#include <math.h>
|
4
6
|
|
5
7
|
#if RUBY_API_VERSION_CODE >= 20400
|
6
8
|
/* for 2.4.0 or higher */
|
@@ -16,6 +18,12 @@
|
|
16
18
|
# undef HAVE_RB_RATIONAL_PLUS
|
17
19
|
#endif
|
18
20
|
|
21
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
22
|
+
# define HAVE_ARITHMETIC_SEQUENCE
|
23
|
+
#else
|
24
|
+
# undef HAVE_ARITHMETIC_SEQUENCE
|
25
|
+
#endif
|
26
|
+
|
19
27
|
#ifndef RB_INTEGER_TYPE_P
|
20
28
|
# define RB_INTEGER_TYPE_P(obj) enum_stat_integer_type_p(obj)
|
21
29
|
static inline int
|
@@ -86,8 +94,12 @@ struct RComplex {
|
|
86
94
|
static VALUE half_in_rational;
|
87
95
|
|
88
96
|
static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
|
89
|
-
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp;
|
90
|
-
static ID id_each, id_real_p, id_sum, id_population;
|
97
|
+
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
|
98
|
+
static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
|
99
|
+
|
100
|
+
static VALUE sym_left, sym_right;
|
101
|
+
|
102
|
+
static VALUE cHistogram;
|
91
103
|
|
92
104
|
inline static VALUE
|
93
105
|
f_add(VALUE x, VALUE y)
|
@@ -131,28 +143,6 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
|
|
131
143
|
return (VALUE)obj;
|
132
144
|
}
|
133
145
|
|
134
|
-
static VALUE
|
135
|
-
complex_caonicalize_new(VALUE klass, VALUE real, VALUE imag)
|
136
|
-
{
|
137
|
-
if (f_real_p(real) && f_real_p(imag))
|
138
|
-
return complex_new(klass, real, imag);
|
139
|
-
else if (f_real_p(imag)) {
|
140
|
-
VALUE new_imag;
|
141
|
-
|
142
|
-
new_imag = f_add(RCOMPLEX(real)->imag, imag);
|
143
|
-
|
144
|
-
return complex_new(klass, RCOMPLEX(real)->real, new_imag);
|
145
|
-
}
|
146
|
-
else {
|
147
|
-
VALUE new_real, new_imag;
|
148
|
-
|
149
|
-
new_real = f_sub(RCOMPLEX(real)->real, RCOMPLEX(imag)->imag);
|
150
|
-
new_imag = f_add(RCOMPLEX(real)->imag, RCOMPLEX(imag)->real);
|
151
|
-
|
152
|
-
return complex_new(klass, new_real, new_imag);
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
146
|
static VALUE
|
157
147
|
complex_add(VALUE self, VALUE other)
|
158
148
|
{
|
@@ -623,7 +613,7 @@ rb_rational_plus(VALUE self, VALUE other)
|
|
623
613
|
VALUE num = RRATIONAL(self)->num;
|
624
614
|
VALUE den = RRATIONAL(self)->den;
|
625
615
|
|
626
|
-
return f_addsub(self, num, den, other, ONE,
|
616
|
+
return f_addsub(self, num, den, other, ONE, '+');
|
627
617
|
}
|
628
618
|
else if (RB_TYPE_P(other, T_FLOAT)) {
|
629
619
|
return f_add(f_to_f(self), other);
|
@@ -852,11 +842,11 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
852
842
|
static int
|
853
843
|
opt_population_p(VALUE opts)
|
854
844
|
{
|
855
|
-
ID kwargs = id_population;
|
856
845
|
VALUE population = Qfalse;
|
857
846
|
|
858
847
|
if (!NIL_P(opts)) {
|
859
848
|
#ifdef HAVE_RB_GET_KWARGS
|
849
|
+
ID kwargs = id_population;
|
860
850
|
rb_get_kwargs(opts, &kwargs, 0, 1, &population);
|
861
851
|
#else
|
862
852
|
VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
|
@@ -868,7 +858,7 @@ opt_population_p(VALUE opts)
|
|
868
858
|
}
|
869
859
|
|
870
860
|
/* call-seq:
|
871
|
-
*
|
861
|
+
* ary.mean_variance(population: false)
|
872
862
|
*
|
873
863
|
* Calculate a mean and a variance of the values in `ary`.
|
874
864
|
* The first element of the result array is the mean, and the second is the variance.
|
@@ -1148,6 +1138,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1148
1138
|
*count_ptr = memo.count;
|
1149
1139
|
}
|
1150
1140
|
|
1141
|
+
#ifndef HAVE_ENUM_SUM
|
1151
1142
|
/* call-seq:
|
1152
1143
|
* enum.sum
|
1153
1144
|
*
|
@@ -1172,10 +1163,11 @@ enum_sum(int argc, VALUE* argv, VALUE obj)
|
|
1172
1163
|
|
1173
1164
|
return sum;
|
1174
1165
|
}
|
1166
|
+
#endif
|
1175
1167
|
|
1176
1168
|
struct enum_mean_variance_memo {
|
1177
1169
|
int block_given;
|
1178
|
-
|
1170
|
+
size_t n;
|
1179
1171
|
double m, m2, f, c;
|
1180
1172
|
};
|
1181
1173
|
|
@@ -1229,7 +1221,7 @@ enum_mean_variance_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
|
|
1229
1221
|
{
|
1230
1222
|
struct enum_mean_variance_memo *memo = (struct enum_mean_variance_memo *)args;
|
1231
1223
|
ENUM_WANT_SVALUE();
|
1232
|
-
mean_variance_iter(e,
|
1224
|
+
mean_variance_iter(e, memo);
|
1233
1225
|
return Qnil;
|
1234
1226
|
}
|
1235
1227
|
|
@@ -1487,9 +1479,836 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
|
|
1487
1479
|
return stdev;
|
1488
1480
|
}
|
1489
1481
|
|
1482
|
+
static inline int
|
1483
|
+
is_na(VALUE v)
|
1484
|
+
{
|
1485
|
+
if (NIL_P(v))
|
1486
|
+
return 1;
|
1487
|
+
|
1488
|
+
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
1489
|
+
return 1;
|
1490
|
+
|
1491
|
+
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
1492
|
+
return 1;
|
1493
|
+
|
1494
|
+
return 0;
|
1495
|
+
}
|
1496
|
+
|
1497
|
+
static int
|
1498
|
+
ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
|
1499
|
+
{
|
1500
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1501
|
+
VALUE cmp;
|
1502
|
+
|
1503
|
+
if (is_na(a)) {
|
1504
|
+
return -1;
|
1505
|
+
}
|
1506
|
+
else if (is_na(b)) {
|
1507
|
+
return 1;
|
1508
|
+
}
|
1509
|
+
|
1510
|
+
/* TODO: optimize */
|
1511
|
+
cmp = rb_funcall(a, id_cmp, 1, b);
|
1512
|
+
return rb_cmpint(cmp, a, b);
|
1513
|
+
}
|
1514
|
+
|
1515
|
+
static VALUE
|
1516
|
+
ary_percentile_make_sorted(VALUE ary)
|
1517
|
+
{
|
1518
|
+
long n, i;
|
1519
|
+
VALUE sorted;
|
1520
|
+
|
1521
|
+
n = RARRAY_LEN(ary);
|
1522
|
+
sorted = rb_ary_tmp_new(n);
|
1523
|
+
for (i = 0; i < n; ++i) {
|
1524
|
+
rb_ary_push(sorted, RARRAY_AREF(ary, i));
|
1525
|
+
}
|
1526
|
+
RARRAY_PTR_USE(sorted, ptr, {
|
1527
|
+
ruby_qsort(ptr, n, sizeof(VALUE),
|
1528
|
+
ary_percentile_sort_cmp, NULL);
|
1529
|
+
});
|
1530
|
+
return sorted;
|
1531
|
+
}
|
1532
|
+
|
1533
|
+
static inline VALUE
|
1534
|
+
ary_percentile_single_sorted(VALUE sorted, long n, double d)
|
1535
|
+
{
|
1536
|
+
VALUE x0, x1;
|
1537
|
+
double i, f;
|
1538
|
+
long l;
|
1539
|
+
|
1540
|
+
assert(RB_TYPE_P(sorted, T_ARRAY));
|
1541
|
+
assert(n == RARRAY_LEN(sorted));
|
1542
|
+
assert(n > 0);
|
1543
|
+
|
1544
|
+
if (d < 0 || 100 < d) {
|
1545
|
+
rb_raise(rb_eArgError, "percentile out of bounds");
|
1546
|
+
}
|
1547
|
+
|
1548
|
+
if (is_na(RARRAY_AREF(sorted, 0))) {
|
1549
|
+
return DBL2NUM(nan(""));
|
1550
|
+
}
|
1551
|
+
|
1552
|
+
n = RARRAY_LEN(sorted);
|
1553
|
+
if (n == 1) {
|
1554
|
+
return RARRAY_AREF(sorted, 0);
|
1555
|
+
}
|
1556
|
+
|
1557
|
+
d = (n - 1) * d / 100.0;
|
1558
|
+
f = modf(d, &i);
|
1559
|
+
l = (long)i;
|
1560
|
+
|
1561
|
+
x0 = RARRAY_AREF(sorted, l);
|
1562
|
+
if (f == 0 || l == n - 1) {
|
1563
|
+
return x0;
|
1564
|
+
}
|
1565
|
+
|
1566
|
+
x0 = rb_funcall(x0, idSTAR, 1, DBL2NUM(1 - f));
|
1567
|
+
x1 = RARRAY_AREF(sorted, l + 1);
|
1568
|
+
x1 = rb_funcall(x1, idSTAR, 1, DBL2NUM(f));
|
1569
|
+
|
1570
|
+
return rb_funcall(x0, idPLUS, 1, x1);
|
1571
|
+
}
|
1572
|
+
|
1573
|
+
static VALUE
|
1574
|
+
ary_percentile_single(VALUE ary, VALUE q)
|
1575
|
+
{
|
1576
|
+
long n;
|
1577
|
+
double d;
|
1578
|
+
VALUE qf, sorted;
|
1579
|
+
|
1580
|
+
assert(RB_TYPE_P(ary, T_ARRAY));
|
1581
|
+
|
1582
|
+
n = RARRAY_LEN(ary);
|
1583
|
+
assert(n > 0);
|
1584
|
+
|
1585
|
+
switch (TYPE(q)) {
|
1586
|
+
case T_FIXNUM:
|
1587
|
+
d = (double)FIX2LONG(q);
|
1588
|
+
break;
|
1589
|
+
case T_BIGNUM:
|
1590
|
+
d = rb_big2dbl(q);
|
1591
|
+
break;
|
1592
|
+
|
1593
|
+
case T_RATIONAL:
|
1594
|
+
/* fall through */
|
1595
|
+
default:
|
1596
|
+
qf = NUM2DBL(q);
|
1597
|
+
goto float_percentile;
|
1598
|
+
|
1599
|
+
case T_FLOAT:
|
1600
|
+
qf = q;
|
1601
|
+
float_percentile:
|
1602
|
+
d = RFLOAT_VALUE(qf);
|
1603
|
+
break;
|
1604
|
+
}
|
1605
|
+
|
1606
|
+
if (n == 1) {
|
1607
|
+
return RARRAY_AREF(ary, 0);
|
1608
|
+
}
|
1609
|
+
|
1610
|
+
sorted = ary_percentile_make_sorted(ary);
|
1611
|
+
|
1612
|
+
return ary_percentile_single_sorted(sorted, n, d);
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
/* call-seq:
|
1616
|
+
* ary.percentile(q) -> float
|
1617
|
+
*
|
1618
|
+
* Calculate specified percentiles of the values in `ary`.
|
1619
|
+
*
|
1620
|
+
* @param [Number, Array] percentile or array of percentiles to compute,
|
1621
|
+
* which must be between 0 and 100 inclusive.
|
1622
|
+
*
|
1623
|
+
* @return [Float, Array] A percentile value(s)
|
1624
|
+
*/
|
1625
|
+
static VALUE
|
1626
|
+
ary_percentile(VALUE ary, VALUE q)
|
1627
|
+
{
|
1628
|
+
long n, m, i;
|
1629
|
+
double d;
|
1630
|
+
VALUE qf, qs, sorted, res;
|
1631
|
+
|
1632
|
+
n = RARRAY_LEN(ary);
|
1633
|
+
if (n == 0) {
|
1634
|
+
rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
|
1635
|
+
}
|
1636
|
+
|
1637
|
+
qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
|
1638
|
+
if (NIL_P(qs)) {
|
1639
|
+
return ary_percentile_single(ary, q);
|
1640
|
+
}
|
1641
|
+
|
1642
|
+
m = RARRAY_LEN(qs);
|
1643
|
+
res = rb_ary_new_capa(m);
|
1644
|
+
|
1645
|
+
if (m == 1) {
|
1646
|
+
q = RARRAY_AREF(qs, 0);
|
1647
|
+
rb_ary_push(res, ary_percentile_single(ary, q));
|
1648
|
+
}
|
1649
|
+
else {
|
1650
|
+
sorted = ary_percentile_make_sorted(ary);
|
1651
|
+
|
1652
|
+
for (i = 0; i < m; ++i) {
|
1653
|
+
VALUE x;
|
1654
|
+
|
1655
|
+
q = RARRAY_AREF(qs, i);
|
1656
|
+
switch (TYPE(q)) {
|
1657
|
+
case T_FIXNUM:
|
1658
|
+
d = (double)FIX2LONG(q);
|
1659
|
+
break;
|
1660
|
+
case T_BIGNUM:
|
1661
|
+
d = rb_big2dbl(q);
|
1662
|
+
break;
|
1663
|
+
|
1664
|
+
case T_RATIONAL:
|
1665
|
+
/* fall through */
|
1666
|
+
default:
|
1667
|
+
qf = NUM2DBL(q);
|
1668
|
+
goto float_percentile;
|
1669
|
+
|
1670
|
+
case T_FLOAT:
|
1671
|
+
qf = q;
|
1672
|
+
float_percentile:
|
1673
|
+
d = RFLOAT_VALUE(qf);
|
1674
|
+
break;
|
1675
|
+
}
|
1676
|
+
|
1677
|
+
x = ary_percentile_single_sorted(sorted, n, d);
|
1678
|
+
rb_ary_push(res, x);
|
1679
|
+
}
|
1680
|
+
}
|
1681
|
+
|
1682
|
+
return res;
|
1683
|
+
}
|
1684
|
+
|
1685
|
+
/* call-seq:
|
1686
|
+
* ary.median -> float
|
1687
|
+
*
|
1688
|
+
* Calculate a median of the values in `ary`.
|
1689
|
+
*
|
1690
|
+
* @return [Float] A median value
|
1691
|
+
*/
|
1692
|
+
static VALUE
|
1693
|
+
ary_median(VALUE ary)
|
1694
|
+
{
|
1695
|
+
long n;
|
1696
|
+
VALUE sorted, a0, a1;
|
1697
|
+
|
1698
|
+
n = RARRAY_LEN(ary);
|
1699
|
+
switch (n) {
|
1700
|
+
case 0:
|
1701
|
+
goto return_nan;
|
1702
|
+
case 1:
|
1703
|
+
return RARRAY_AREF(ary, 0);
|
1704
|
+
case 2:
|
1705
|
+
a0 = RARRAY_AREF(ary, 0);
|
1706
|
+
a1 = RARRAY_AREF(ary, 1);
|
1707
|
+
goto mean_two;
|
1708
|
+
default:
|
1709
|
+
break;
|
1710
|
+
}
|
1711
|
+
|
1712
|
+
sorted = ary_percentile_make_sorted(ary);
|
1713
|
+
|
1714
|
+
a0 = RARRAY_AREF(sorted, 0);
|
1715
|
+
if (is_na(a0)) {
|
1716
|
+
return_nan:
|
1717
|
+
return DBL2NUM(nan(""));
|
1718
|
+
}
|
1719
|
+
|
1720
|
+
a1 = RARRAY_AREF(sorted, n / 2);
|
1721
|
+
if (n % 2 == 1) {
|
1722
|
+
return a1;
|
1723
|
+
}
|
1724
|
+
else {
|
1725
|
+
a0 = RARRAY_AREF(sorted, n / 2 - 1);
|
1726
|
+
|
1727
|
+
mean_two:
|
1728
|
+
a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
|
1729
|
+
if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
|
1730
|
+
double d = NUM2DBL(a0);
|
1731
|
+
return DBL2NUM(d / 2.0);
|
1732
|
+
}
|
1733
|
+
|
1734
|
+
return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
|
1735
|
+
}
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
struct value_counts_opts {
|
1739
|
+
int normalize_p;
|
1740
|
+
int sort_p;
|
1741
|
+
int ascending_p;
|
1742
|
+
int dropna_p;
|
1743
|
+
};
|
1744
|
+
|
1745
|
+
static inline void
|
1746
|
+
value_counts_extract_opts(VALUE kwargs, struct value_counts_opts *opts)
|
1747
|
+
{
|
1748
|
+
assert(opts != NULL);
|
1749
|
+
|
1750
|
+
/* default values */
|
1751
|
+
opts->normalize_p = 0;
|
1752
|
+
opts->sort_p = 1;
|
1753
|
+
opts->ascending_p = 0;
|
1754
|
+
opts->dropna_p = 1;
|
1755
|
+
|
1756
|
+
if (!NIL_P(kwargs)) {
|
1757
|
+
enum { kw_normalize, kw_sort, kw_ascending, kw_dropna };
|
1758
|
+
static ID kwarg_keys[4];
|
1759
|
+
VALUE kwarg_vals[4];
|
1760
|
+
|
1761
|
+
if (!kwarg_keys[0]) {
|
1762
|
+
kwarg_keys[kw_normalize] = rb_intern("normalize");
|
1763
|
+
kwarg_keys[kw_sort] = rb_intern("sort");
|
1764
|
+
kwarg_keys[kw_ascending] = rb_intern("ascending");
|
1765
|
+
kwarg_keys[kw_dropna] = rb_intern("dropna");
|
1766
|
+
}
|
1767
|
+
|
1768
|
+
rb_get_kwargs(kwargs, kwarg_keys, 0, 4, kwarg_vals);
|
1769
|
+
opts->normalize_p = (kwarg_vals[kw_normalize] != Qundef) && RTEST(kwarg_vals[kw_normalize]);
|
1770
|
+
opts->sort_p = (kwarg_vals[kw_sort] != Qundef) && RTEST(kwarg_vals[kw_sort]);
|
1771
|
+
opts->ascending_p = (kwarg_vals[kw_ascending] != Qundef) && RTEST(kwarg_vals[kw_ascending]);
|
1772
|
+
opts->dropna_p = (kwarg_vals[kw_dropna] != Qundef) && RTEST(kwarg_vals[kw_dropna]);
|
1773
|
+
}
|
1774
|
+
}
|
1775
|
+
|
1776
|
+
static int
|
1777
|
+
value_counts_result_to_assoc_array_i(VALUE key, VALUE val, VALUE ary)
|
1778
|
+
{
|
1779
|
+
VALUE assoc = rb_ary_tmp_new(2);
|
1780
|
+
rb_ary_push(assoc, key);
|
1781
|
+
rb_ary_push(assoc, val);
|
1782
|
+
rb_ary_push(ary, assoc);
|
1783
|
+
return ST_CONTINUE;
|
1784
|
+
}
|
1785
|
+
|
1786
|
+
static int
|
1787
|
+
value_counts_sort_cmp_asc(const void *ap, const void *bp, void *dummy)
|
1788
|
+
{
|
1789
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1790
|
+
VALUE av, bv, cmp;
|
1791
|
+
|
1792
|
+
av = RARRAY_AREF(a, 1);
|
1793
|
+
bv = RARRAY_AREF(b, 1);
|
1794
|
+
|
1795
|
+
/* TODO: optimize */
|
1796
|
+
cmp = rb_funcall(av, id_cmp, 1, bv);
|
1797
|
+
return rb_cmpint(cmp, av, bv);
|
1798
|
+
}
|
1799
|
+
|
1800
|
+
static int
|
1801
|
+
value_counts_sort_cmp_desc(const void *ap, const void *bp, void *dummy)
|
1802
|
+
{
|
1803
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1804
|
+
VALUE av, bv, cmp;
|
1805
|
+
|
1806
|
+
av = RARRAY_AREF(a, 1);
|
1807
|
+
bv = RARRAY_AREF(b, 1);
|
1808
|
+
|
1809
|
+
/* TODO: optimize */
|
1810
|
+
cmp = rb_funcall(bv, id_cmp, 1, av);
|
1811
|
+
return rb_cmpint(cmp, bv, av);
|
1812
|
+
}
|
1813
|
+
|
1814
|
+
static VALUE
|
1815
|
+
value_counts_sort_result(VALUE result, const int dropna_p, const int ascending_p)
|
1816
|
+
{
|
1817
|
+
VALUE na_count = Qundef, ary, sorted;
|
1818
|
+
long i;
|
1819
|
+
|
1820
|
+
if (RHASH_SIZE(result) < 1) {
|
1821
|
+
return result;
|
1822
|
+
}
|
1823
|
+
|
1824
|
+
if (!dropna_p) {
|
1825
|
+
na_count = rb_hash_lookup2(result, Qnil, Qundef);
|
1826
|
+
if (na_count != Qundef) {
|
1827
|
+
rb_hash_delete(result, Qnil);
|
1828
|
+
}
|
1829
|
+
}
|
1830
|
+
|
1831
|
+
const long len = (long)RHASH_SIZE(result);
|
1832
|
+
ary = rb_ary_tmp_new(len);
|
1833
|
+
rb_hash_foreach(result, value_counts_result_to_assoc_array_i, ary);
|
1834
|
+
if (ascending_p) {
|
1835
|
+
RARRAY_PTR_USE(ary, ptr, {
|
1836
|
+
ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
|
1837
|
+
value_counts_sort_cmp_asc, NULL);
|
1838
|
+
});
|
1839
|
+
}
|
1840
|
+
else {
|
1841
|
+
RARRAY_PTR_USE(ary, ptr, {
|
1842
|
+
ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
|
1843
|
+
value_counts_sort_cmp_desc, NULL);
|
1844
|
+
});
|
1845
|
+
}
|
1846
|
+
|
1847
|
+
#ifdef HAVE_RB_HASH_NEW_WITH_SIZE
|
1848
|
+
sorted = rb_hash_new_with_size(len);
|
1849
|
+
#else
|
1850
|
+
sorted = rb_hash_new();
|
1851
|
+
#endif
|
1852
|
+
|
1853
|
+
if (na_count != Qundef && ascending_p) {
|
1854
|
+
rb_hash_aset(sorted, Qnil, na_count);
|
1855
|
+
}
|
1856
|
+
|
1857
|
+
for (i = 0; i < len; ++i) {
|
1858
|
+
VALUE a = RARRAY_AREF(ary, i);
|
1859
|
+
VALUE k = RARRAY_AREF(a, 0);
|
1860
|
+
VALUE v = RARRAY_AREF(a, 1);
|
1861
|
+
rb_hash_aset(sorted, k, v);
|
1862
|
+
}
|
1863
|
+
|
1864
|
+
if (na_count != Qundef && !ascending_p) {
|
1865
|
+
rb_hash_aset(sorted, Qnil, na_count);
|
1866
|
+
}
|
1867
|
+
|
1868
|
+
return sorted;
|
1869
|
+
}
|
1870
|
+
|
1871
|
+
struct value_counts_normalize_params {
|
1872
|
+
VALUE result;
|
1873
|
+
long total;
|
1874
|
+
};
|
1875
|
+
|
1876
|
+
static int
|
1877
|
+
value_counts_normalize_i(VALUE key, VALUE val, VALUE arg)
|
1878
|
+
{
|
1879
|
+
struct value_counts_normalize_params *params = (struct value_counts_normalize_params *)arg;
|
1880
|
+
double new_val;
|
1881
|
+
|
1882
|
+
new_val = NUM2DBL(val) / params->total;
|
1883
|
+
rb_hash_aset(params->result, key, DBL2NUM(new_val));
|
1884
|
+
|
1885
|
+
return ST_CONTINUE;
|
1886
|
+
}
|
1887
|
+
|
1888
|
+
struct value_counts_memo {
|
1889
|
+
int dropna_p;
|
1890
|
+
long total;
|
1891
|
+
long na_count;
|
1892
|
+
VALUE result;
|
1893
|
+
};
|
1894
|
+
|
1895
|
+
static VALUE
|
1896
|
+
any_value_counts(int argc, VALUE *argv, VALUE obj,
|
1897
|
+
void (* counter)(VALUE, struct value_counts_memo *))
|
1898
|
+
{
|
1899
|
+
VALUE kwargs;
|
1900
|
+
struct value_counts_opts opts;
|
1901
|
+
struct value_counts_memo memo;
|
1902
|
+
|
1903
|
+
rb_scan_args(argc, argv, ":", &kwargs);
|
1904
|
+
value_counts_extract_opts(kwargs, &opts);
|
1905
|
+
|
1906
|
+
memo.result = rb_hash_new();
|
1907
|
+
memo.total = 0;
|
1908
|
+
memo.na_count = 0;
|
1909
|
+
memo.dropna_p = opts.dropna_p;
|
1910
|
+
|
1911
|
+
if (!opts.dropna_p) {
|
1912
|
+
rb_hash_aset(memo.result, Qnil, INT2FIX(0)); // reserve the room for NA
|
1913
|
+
}
|
1914
|
+
|
1915
|
+
counter(obj, &memo);
|
1916
|
+
|
1917
|
+
if (!opts.dropna_p) {
|
1918
|
+
if (memo.na_count == 0)
|
1919
|
+
rb_hash_delete(memo.result, Qnil);
|
1920
|
+
else
|
1921
|
+
rb_hash_aset(memo.result, Qnil, LONG2NUM(memo.na_count));
|
1922
|
+
}
|
1923
|
+
|
1924
|
+
if (opts.sort_p) {
|
1925
|
+
memo.result = value_counts_sort_result(memo.result, opts.dropna_p, opts.ascending_p);
|
1926
|
+
}
|
1927
|
+
|
1928
|
+
if (opts.normalize_p) {
|
1929
|
+
struct value_counts_normalize_params params;
|
1930
|
+
params.result = memo.result;
|
1931
|
+
params.total = memo.total - (opts.dropna_p ? memo.na_count : 0);
|
1932
|
+
rb_hash_foreach(memo.result, value_counts_normalize_i, (VALUE)¶ms);
|
1933
|
+
}
|
1934
|
+
|
1935
|
+
return memo.result;
|
1936
|
+
}
|
1937
|
+
|
1938
|
+
static VALUE
|
1939
|
+
enum_value_counts_without_sort_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
|
1940
|
+
{
|
1941
|
+
struct value_counts_memo *memo = (struct value_counts_memo *)args;
|
1942
|
+
|
1943
|
+
ENUM_WANT_SVALUE();
|
1944
|
+
|
1945
|
+
if (is_na(e)) {
|
1946
|
+
++memo->na_count;
|
1947
|
+
}
|
1948
|
+
else {
|
1949
|
+
VALUE cnt = rb_hash_lookup2(memo->result, e, INT2FIX(0));
|
1950
|
+
rb_hash_aset(memo->result, e, rb_int_plus(cnt, INT2FIX(1)));
|
1951
|
+
}
|
1952
|
+
|
1953
|
+
++memo->total;
|
1954
|
+
|
1955
|
+
return Qnil;
|
1956
|
+
}
|
1957
|
+
|
1958
|
+
static void
|
1959
|
+
enum_value_counts_without_sort(VALUE obj, struct value_counts_memo *memo)
|
1960
|
+
{
|
1961
|
+
rb_block_call(obj, id_each, 0, 0, enum_value_counts_without_sort_i, (VALUE)memo);
|
1962
|
+
}
|
1963
|
+
|
1964
|
+
static VALUE
|
1965
|
+
enum_value_counts(int argc, VALUE* argv, VALUE obj)
|
1966
|
+
{
|
1967
|
+
return any_value_counts(argc, argv, obj, enum_value_counts_without_sort);
|
1968
|
+
}
|
1969
|
+
|
1970
|
+
static void
|
1971
|
+
ary_value_counts_without_sort(VALUE ary, struct value_counts_memo *memo)
|
1972
|
+
{
|
1973
|
+
const VALUE zero = INT2FIX(0);
|
1974
|
+
const VALUE one = INT2FIX(1);
|
1975
|
+
long i, na_count = 0;
|
1976
|
+
long const n = RARRAY_LEN(ary);
|
1977
|
+
|
1978
|
+
for (i = 0; i < n; ++i) {
|
1979
|
+
VALUE val = RARRAY_AREF(ary, i);
|
1980
|
+
|
1981
|
+
if (is_na(val)) {
|
1982
|
+
++na_count;
|
1983
|
+
}
|
1984
|
+
else {
|
1985
|
+
VALUE cnt = rb_hash_lookup2(memo->result, val, zero);
|
1986
|
+
rb_hash_aset(memo->result, val, rb_int_plus(cnt, one));
|
1987
|
+
}
|
1988
|
+
}
|
1989
|
+
|
1990
|
+
memo->total = n;
|
1991
|
+
memo->na_count = na_count;
|
1992
|
+
}
|
1993
|
+
|
1994
|
+
/* call-seq:
|
1995
|
+
* ary.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
|
1996
|
+
*
|
1997
|
+
* Returns a hash that contains the counts of values in `ary`.
|
1998
|
+
*
|
1999
|
+
* This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
|
2000
|
+
* as the same thing, and stores the count of them as the value for `nil`.
|
2001
|
+
*
|
2002
|
+
* @param [false,true] normalize If `true`, the result contains the relative
|
2003
|
+
* frequencies of the unique values.
|
2004
|
+
* @param [true,false] sort Sort by values.
|
2005
|
+
* @param [false,true] ascending Sort in ascending order.
|
2006
|
+
* @param [true,false] dropna Don't include counts of NAs.
|
2007
|
+
*
|
2008
|
+
* @return [Hash] A hash consists of the counts of the values
|
2009
|
+
*/
|
2010
|
+
static VALUE
|
2011
|
+
ary_value_counts(int argc, VALUE* argv, VALUE ary)
|
2012
|
+
{
|
2013
|
+
return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
|
2014
|
+
}
|
2015
|
+
|
2016
|
+
static int
|
2017
|
+
hash_value_counts_without_sort_i(VALUE key, VALUE val, VALUE arg)
|
2018
|
+
{
|
2019
|
+
struct value_counts_memo *memo = (struct value_counts_memo *)arg;
|
2020
|
+
|
2021
|
+
if (is_na(val)) {
|
2022
|
+
++memo->na_count;
|
2023
|
+
|
2024
|
+
if (memo->dropna_p) {
|
2025
|
+
return ST_CONTINUE;
|
2026
|
+
}
|
2027
|
+
}
|
2028
|
+
else {
|
2029
|
+
VALUE cnt = rb_hash_lookup2(memo->result, val, INT2FIX(0));
|
2030
|
+
rb_hash_aset(memo->result, val, rb_int_plus(cnt, INT2FIX(1)));
|
2031
|
+
}
|
2032
|
+
|
2033
|
+
return ST_CONTINUE;
|
2034
|
+
}
|
2035
|
+
|
2036
|
+
static void
|
2037
|
+
hash_value_counts_without_sort(VALUE hash, struct value_counts_memo *memo)
|
2038
|
+
{
|
2039
|
+
rb_hash_foreach(hash, hash_value_counts_without_sort_i, (VALUE)memo);
|
2040
|
+
memo->total = RHASH_SIZE(hash);
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
/* call-seq:
|
2044
|
+
* hash.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
|
2045
|
+
*
|
2046
|
+
* Returns a hash that contains the counts of values in `hash`.
|
2047
|
+
*
|
2048
|
+
* This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
|
2049
|
+
* as the same thing, and stores the count of them as the value for `nil`.
|
2050
|
+
*
|
2051
|
+
* @param [false,true] normalize If `true`, the result contains the relative
|
2052
|
+
* frequencies of the unique values.
|
2053
|
+
* @param [true,false] sort Sort by values.
|
2054
|
+
* @param [false,true] ascending Sort in ascending order.
|
2055
|
+
* @param [true,false] dropna Don't include counts of NAs.
|
2056
|
+
*
|
2057
|
+
* @return [Hash] A hash consists of the counts of the values
|
2058
|
+
*/
|
2059
|
+
static VALUE
|
2060
|
+
hash_value_counts(int argc, VALUE* argv, VALUE hash)
|
2061
|
+
{
|
2062
|
+
return any_value_counts(argc, argv, hash, hash_value_counts_without_sort);
|
2063
|
+
}
|
2064
|
+
|
2065
|
+
static long
|
2066
|
+
histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
|
2067
|
+
{
|
2068
|
+
double x, y;
|
2069
|
+
long lo, hi, mid;
|
2070
|
+
|
2071
|
+
x = NUM2DBL(rb_x);
|
2072
|
+
|
2073
|
+
lo = -1;
|
2074
|
+
hi = RARRAY_LEN(edge);
|
2075
|
+
|
2076
|
+
if (left_p) {
|
2077
|
+
while (hi - lo > 1) {
|
2078
|
+
mid = lo + (hi - lo)/2;
|
2079
|
+
y = NUM2DBL(RARRAY_AREF(edge, mid));
|
2080
|
+
if (y <= x) {
|
2081
|
+
lo = mid;
|
2082
|
+
}
|
2083
|
+
else {
|
2084
|
+
hi = mid;
|
2085
|
+
}
|
2086
|
+
}
|
2087
|
+
return lo;
|
2088
|
+
}
|
2089
|
+
else {
|
2090
|
+
while (hi - lo > 1) {
|
2091
|
+
mid = lo + (hi - lo)/2;
|
2092
|
+
y = NUM2DBL(RARRAY_AREF(edge, mid));
|
2093
|
+
if (y < x) {
|
2094
|
+
lo = mid;
|
2095
|
+
}
|
2096
|
+
else {
|
2097
|
+
hi = mid;
|
2098
|
+
}
|
2099
|
+
}
|
2100
|
+
return hi - 1;
|
2101
|
+
}
|
2102
|
+
}
|
2103
|
+
|
2104
|
+
static void
|
2105
|
+
histogram_weights_push_values(VALUE weights, VALUE edge, VALUE values, int left_p)
|
2106
|
+
{
|
2107
|
+
VALUE x, cur;
|
2108
|
+
long i, n, bi;
|
2109
|
+
|
2110
|
+
n = RARRAY_LEN(values);
|
2111
|
+
for (i = 0; i < n; ++i) {
|
2112
|
+
x = RARRAY_AREF(values, i);
|
2113
|
+
|
2114
|
+
bi = histogram_edge_bin_index(edge, x, left_p);
|
2115
|
+
|
2116
|
+
cur = rb_ary_entry(weights, bi);
|
2117
|
+
if (NIL_P(cur)) {
|
2118
|
+
cur = INT2FIX(1);
|
2119
|
+
}
|
2120
|
+
else {
|
2121
|
+
cur = rb_funcall(cur, idPLUS, 1, INT2FIX(1));
|
2122
|
+
}
|
2123
|
+
|
2124
|
+
rb_ary_store(weights, bi, cur);
|
2125
|
+
}
|
2126
|
+
}
|
2127
|
+
|
2128
|
+
static int
|
2129
|
+
opt_closed_left_p(VALUE opts)
|
2130
|
+
{
|
2131
|
+
int left_p = 1;
|
2132
|
+
|
2133
|
+
if (!NIL_P(opts)) {
|
2134
|
+
VALUE closed;
|
2135
|
+
#ifdef HAVE_RB_GET_KWARGS
|
2136
|
+
ID kwargs = id_closed;
|
2137
|
+
rb_get_kwargs(opts, &kwargs, 0, 1, &closed);
|
2138
|
+
#else
|
2139
|
+
closed = rb_hash_lookup2(opts, ID2SYM(id_closed), sym_left);
|
2140
|
+
#endif
|
2141
|
+
left_p = (closed != sym_right);
|
2142
|
+
if (left_p && closed != sym_left) {
|
2143
|
+
rb_raise(rb_eArgError, "invalid value for :closed keyword "
|
2144
|
+
"(%"PRIsVALUE" for :left or :right)", closed);
|
2145
|
+
}
|
2146
|
+
}
|
2147
|
+
|
2148
|
+
return left_p;
|
2149
|
+
}
|
2150
|
+
|
2151
|
+
static inline long
|
2152
|
+
sturges(long n)
|
2153
|
+
{
|
2154
|
+
if (n == 0) return 1L;
|
2155
|
+
return (long)(ceil(log2(n)) + 1);
|
2156
|
+
}
|
2157
|
+
|
2158
|
+
static VALUE
|
2159
|
+
ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long nbins, const int left_p)
|
2160
|
+
{
|
2161
|
+
VALUE edge;
|
2162
|
+
double bw, lbw, start, step, divisor, r;
|
2163
|
+
long i, len;
|
2164
|
+
|
2165
|
+
if (hi == lo) {
|
2166
|
+
start = hi;
|
2167
|
+
step = 1;
|
2168
|
+
divisor = 1;
|
2169
|
+
len = 1;
|
2170
|
+
}
|
2171
|
+
else {
|
2172
|
+
bw = (hi - lo) / nbins;
|
2173
|
+
lbw = log10(bw);
|
2174
|
+
if (lbw >= 0) {
|
2175
|
+
step = pow(10, floor(lbw));
|
2176
|
+
r = bw / step;
|
2177
|
+
if (r <= 1.1) {
|
2178
|
+
/* do nothing */
|
2179
|
+
}
|
2180
|
+
else if (r <= 2.2) {
|
2181
|
+
step *= 2;
|
2182
|
+
}
|
2183
|
+
else if (r <= 5.5) {
|
2184
|
+
step *= 5;
|
2185
|
+
}
|
2186
|
+
else {
|
2187
|
+
step *= 10;
|
2188
|
+
}
|
2189
|
+
divisor = 1.0;
|
2190
|
+
start = step * floor(lo / step);
|
2191
|
+
len = (long)ceil((hi - start) / step);
|
2192
|
+
}
|
2193
|
+
else {
|
2194
|
+
divisor = pow(10, -floor(lbw));
|
2195
|
+
r = bw * divisor;
|
2196
|
+
if (r <= 1.1) {
|
2197
|
+
/* do nothing */
|
2198
|
+
}
|
2199
|
+
else if (r <= 2.2) {
|
2200
|
+
divisor /= 2;
|
2201
|
+
}
|
2202
|
+
else if (r <= 5.5) {
|
2203
|
+
divisor /= 5;
|
2204
|
+
}
|
2205
|
+
else {
|
2206
|
+
divisor /= 10;
|
2207
|
+
}
|
2208
|
+
step = 1.0;
|
2209
|
+
start = floor(lo * divisor);
|
2210
|
+
len = (long)ceil(hi * divisor - start);
|
2211
|
+
}
|
2212
|
+
}
|
2213
|
+
|
2214
|
+
if (left_p) {
|
2215
|
+
while (lo < start/divisor) {
|
2216
|
+
start -= step;
|
2217
|
+
}
|
2218
|
+
while ((start + (len - 1)*step)/divisor <= hi) {
|
2219
|
+
++len;
|
2220
|
+
}
|
2221
|
+
}
|
2222
|
+
else {
|
2223
|
+
while (lo <= start/divisor) {
|
2224
|
+
start -= step;
|
2225
|
+
}
|
2226
|
+
while ((start + (len - 1)*step)/divisor < hi) {
|
2227
|
+
++len;
|
2228
|
+
}
|
2229
|
+
}
|
2230
|
+
|
2231
|
+
edge = rb_ary_new_capa(len);
|
2232
|
+
for (i = 0; i < len; ++i) {
|
2233
|
+
rb_ary_push(edge, DBL2NUM(start/divisor));
|
2234
|
+
start += step;
|
2235
|
+
}
|
2236
|
+
|
2237
|
+
return edge;
|
2238
|
+
}
|
2239
|
+
|
2240
|
+
static VALUE
|
2241
|
+
ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
|
2242
|
+
{
|
2243
|
+
long n;
|
2244
|
+
VALUE minmax;
|
2245
|
+
VALUE edge = Qnil;
|
2246
|
+
double lo, hi;
|
2247
|
+
|
2248
|
+
Check_Type(ary, T_ARRAY);
|
2249
|
+
n = RARRAY_LEN(ary);
|
2250
|
+
|
2251
|
+
if (n == 0 && nbins < 0) {
|
2252
|
+
rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
|
2253
|
+
}
|
2254
|
+
else if (n > 0 && nbins < 1) {
|
2255
|
+
rb_raise(rb_eArgError, "nbins must be >= 1 for a non-empty array, got %ld", nbins);
|
2256
|
+
}
|
2257
|
+
else if (n == 0) {
|
2258
|
+
edge = rb_ary_new_capa(1);
|
2259
|
+
rb_ary_push(edge, DBL2NUM(0.0));
|
2260
|
+
return edge;
|
2261
|
+
}
|
2262
|
+
|
2263
|
+
minmax = rb_funcall(ary, rb_intern("minmax"), 0);
|
2264
|
+
lo = NUM2DBL(RARRAY_AREF(minmax, 0));
|
2265
|
+
hi = NUM2DBL(RARRAY_AREF(minmax, 1));
|
2266
|
+
|
2267
|
+
edge = ary_histogram_calculate_edge_lo_hi(lo, hi, nbins, left_p);
|
2268
|
+
|
2269
|
+
return edge;
|
2270
|
+
}
|
2271
|
+
|
2272
|
+
/* call-seq:
|
2273
|
+
* ary.histogram(nbins=:auto, closed: :left)
|
2274
|
+
*
|
2275
|
+
* @param [Integer] nbins The approximate number of bins
|
2276
|
+
* @param [:left, :right] closed
|
2277
|
+
* If :left (the default), the bin interval are left-closed.
|
2278
|
+
* If :right, the bin interval are right-closed.
|
2279
|
+
*
|
2280
|
+
* @return [EnumerableStatistics::Histogram] The histogram struct.
|
2281
|
+
*/
|
2282
|
+
static VALUE
|
2283
|
+
ary_histogram(int argc, VALUE *argv, VALUE ary)
|
2284
|
+
{
|
2285
|
+
VALUE arg0, opts, edge, weights;
|
2286
|
+
int left_p;
|
2287
|
+
long nbins;
|
2288
|
+
|
2289
|
+
rb_scan_args(argc, argv, "01:", &arg0, &opts);
|
2290
|
+
if (NIL_P(arg0)) {
|
2291
|
+
nbins = sturges(RARRAY_LEN(ary));
|
2292
|
+
}
|
2293
|
+
else {
|
2294
|
+
nbins = NUM2LONG(arg0);
|
2295
|
+
}
|
2296
|
+
left_p = opt_closed_left_p(opts);
|
2297
|
+
|
2298
|
+
edge = ary_histogram_calculate_edge(ary, nbins, left_p);
|
2299
|
+
weights = rb_ary_new_capa(RARRAY_LEN(edge) - 1);
|
2300
|
+
histogram_weights_push_values(weights, edge, ary, left_p);
|
2301
|
+
|
2302
|
+
return rb_struct_new(cHistogram, edge, weights,
|
2303
|
+
left_p ? sym_left : sym_right,
|
2304
|
+
Qfalse);
|
2305
|
+
}
|
2306
|
+
|
1490
2307
|
void
|
1491
2308
|
Init_extension(void)
|
1492
2309
|
{
|
2310
|
+
VALUE mEnumerableStatistics;
|
2311
|
+
|
1493
2312
|
#ifndef HAVE_ENUM_SUM
|
1494
2313
|
rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
|
1495
2314
|
#endif
|
@@ -1499,6 +2318,7 @@ Init_extension(void)
|
|
1499
2318
|
rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
|
1500
2319
|
rb_define_method(rb_mEnumerable, "mean_stdev", enum_mean_stdev, -1);
|
1501
2320
|
rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
|
2321
|
+
rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
|
1502
2322
|
|
1503
2323
|
#ifndef HAVE_ARRAY_SUM
|
1504
2324
|
rb_define_method(rb_cArray, "sum", ary_sum, -1);
|
@@ -1508,10 +2328,20 @@ Init_extension(void)
|
|
1508
2328
|
rb_define_method(rb_cArray, "variance", ary_variance, -1);
|
1509
2329
|
rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
|
1510
2330
|
rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
|
2331
|
+
rb_define_method(rb_cArray, "percentile", ary_percentile, 1);
|
2332
|
+
rb_define_method(rb_cArray, "median", ary_median, 0);
|
2333
|
+
rb_define_method(rb_cArray, "value_counts", ary_value_counts, -1);
|
2334
|
+
|
2335
|
+
rb_define_method(rb_cHash, "value_counts", hash_value_counts, -1);
|
1511
2336
|
|
1512
2337
|
half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
|
1513
2338
|
rb_gc_register_mark_object(half_in_rational);
|
1514
2339
|
|
2340
|
+
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2341
|
+
cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
|
2342
|
+
|
2343
|
+
rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
|
2344
|
+
|
1515
2345
|
idPLUS = '+';
|
1516
2346
|
idMINUS = '-';
|
1517
2347
|
idSTAR = '*';
|
@@ -1523,8 +2353,14 @@ Init_extension(void)
|
|
1523
2353
|
id_negate = rb_intern("-@");
|
1524
2354
|
id_to_f = rb_intern("to_f");
|
1525
2355
|
id_cmp = rb_intern("<=>");
|
2356
|
+
id_nan_p = rb_intern("nan?");
|
1526
2357
|
id_each = rb_intern("each");
|
1527
2358
|
id_real_p = rb_intern("real?");
|
1528
2359
|
id_sum = rb_intern("sum");
|
1529
2360
|
id_population = rb_intern("population");
|
2361
|
+
id_closed = rb_intern("closed");
|
2362
|
+
id_edge = rb_intern("edge");
|
2363
|
+
|
2364
|
+
sym_left = ID2SYM(rb_intern("left"));
|
2365
|
+
sym_right = ID2SYM(rb_intern("right"));
|
1530
2366
|
}
|