enumerable-statistics 1.0.1 → 2.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +21 -8
- data/.yardopts +1 -0
- data/CHANGELOG.md +7 -0
- data/README.md +8 -0
- data/Rakefile +5 -3
- data/bench/array_value_counts.yml +42 -0
- data/bench/enum_value_counts.yml +42 -0
- data/bench/hash_value_counts.yml +42 -0
- data/bench/mean.yml +30 -0
- data/bench/sum.yml +29 -0
- data/bench/variance.yml +39 -0
- data/enumerable-statistics.gemspec +16 -7
- data/ext/-bench-/extconf.rb +3 -0
- data/ext/enumerable/statistics/extension/extconf.rb +12 -0
- data/ext/enumerable/statistics/extension/statistics.c +865 -29
- data/lib/enumerable/statistics.rb +1 -1
- data/lib/enumerable_statistics.rb +2 -0
- data/lib/enumerable_statistics/histogram.rb +5 -0
- data/lib/enumerable_statistics/version.rb +9 -0
- data/templates/default/layout/html/headers.erb +36 -0
- metadata +45 -24
- data/bench/mean.rb +0 -27
- data/bench/sum.rb +0 -26
- data/bench/variance.rb +0 -30
- data/lib/enumerable/statistics/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d632cae80814e40cb6247d76cdfa74999e30effa4f969935a2deef35937c4f5e
|
4
|
+
data.tar.gz: 85c9ba3067efd94649e01836ebf04cf0ddebc2a69fea06f65a4efd067a9dffe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ee5828934ed01b5bc2f3173816bfe24914f36641df6a99b2aff355478d74f13fd36201547776c06d8b4150784176420be8ceea1bbdd2fa9397d6930522caff1
|
7
|
+
data.tar.gz: 7a4a87570189ff29bef98fdeef30f2640eebbea550419229b9ffeb89c9d032fbe72082e6be00cd5fb9a85be2e515d6211b057f5fd1c4834ad6cb8c5153135a5f
|
data/.travis.yml
CHANGED
@@ -1,19 +1,32 @@
|
|
1
1
|
---
|
2
|
-
|
2
|
+
notification:
|
3
|
+
email:
|
4
|
+
- mrkn@ruby-lang.org
|
3
5
|
|
4
|
-
|
5
|
-
- ruby-head
|
6
|
-
- 2.3.0
|
7
|
-
- 2.2.4
|
8
|
-
- 2.1
|
6
|
+
language: ruby
|
9
7
|
|
10
8
|
before_install:
|
11
9
|
- gem update --system
|
12
|
-
- gem
|
10
|
+
- gem install bundler
|
13
11
|
|
14
12
|
install:
|
15
13
|
- bundle install
|
16
14
|
|
17
15
|
script:
|
18
|
-
- bundle exec rake clobber compile
|
16
|
+
- bundle exec rake --trace clobber compile
|
19
17
|
- bundle exec rake spec
|
18
|
+
|
19
|
+
matrix:
|
20
|
+
include:
|
21
|
+
- name: "2.3"
|
22
|
+
rvm: 2.3
|
23
|
+
- name: "2.4"
|
24
|
+
rvm: 2.4.5
|
25
|
+
- name: "2.5"
|
26
|
+
rvm: 2.5.2
|
27
|
+
- name: "2.6"
|
28
|
+
rvm: 2.6
|
29
|
+
- name: "trunk"
|
30
|
+
rvm: ruby-head
|
31
|
+
allow_failures:
|
32
|
+
- rvm: 2.3
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
# 2.0.0-pre
|
2
|
+
|
3
|
+
- Add `value_counts` method in Array, Hash, and Enumerable
|
4
|
+
- Add `median` method in Array
|
5
|
+
- Add `percentile` method in Array
|
6
|
+
- Add `histogram` method in Array
|
7
|
+
|
1
8
|
# 1.0.1
|
2
9
|
|
3
10
|
- Add `mean_variance` method in Array class and Enumerable module
|
data/README.md
CHANGED
@@ -40,6 +40,14 @@ The following methods are supplied by this library:
|
|
40
40
|
- Calculates a mean and a variance simultaneously
|
41
41
|
- `Array#mean_stdev`, `Enumerable#mean_stdev`
|
42
42
|
- Calculates a mean and a standard deviation simultaneously
|
43
|
+
- `Array#median`
|
44
|
+
- Calculates a median of values in an array
|
45
|
+
- `Array#percentile(q)`
|
46
|
+
- Calculates a percentile or percentiles of values in an array
|
47
|
+
- `Array#value_counts`, `Enumerable#value_counts`, and `Hash#value_counts`
|
48
|
+
- Count how many items for each value in the container
|
49
|
+
- `Array#histogram`
|
50
|
+
- Calculate histogram of the values in the array
|
43
51
|
|
44
52
|
Moreover, for Ruby < 2.4, `Array#sum` and `Enumerable#sum` are provided.
|
45
53
|
|
data/Rakefile
CHANGED
@@ -6,15 +6,17 @@ task :default => :spec
|
|
6
6
|
|
7
7
|
Rake::ExtensionTask.new('enumerable/statistics/extension')
|
8
8
|
|
9
|
+
directory 'lib/enumerable/statistics'
|
10
|
+
|
9
11
|
RSpec::Core::RakeTask.new(:spec)
|
10
12
|
|
11
13
|
task :bench do
|
12
14
|
puts "# sum\n"
|
13
|
-
system('
|
15
|
+
system('benchmark-driver bench/sum.yml')
|
14
16
|
|
15
17
|
puts "# mean\n"
|
16
|
-
system('
|
18
|
+
system('benchmark-driver bench/mean.yml')
|
17
19
|
|
18
20
|
puts "# variance\n"
|
19
|
-
system('
|
21
|
+
system('benchmark-driver bench/variance.yml')
|
20
22
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
ary = Array.new(n) { chars.sample }
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
ary.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
ary.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
ary.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
ary.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
ary.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
ary.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
ary.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
ary.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
ary.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
ary.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
ary.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
ary.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
ary.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
enum = Array.new(n) { chars.sample }.each
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
enum.inject(Hash.new(0)) { |h, x| h[x] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
enum.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
enum.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
enum.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
enum.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
enum.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
enum.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
enum.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
enum.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
enum.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
enum.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
enum.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
enum.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
@@ -0,0 +1,42 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "1.1.0.dev"
|
3
|
+
gems:
|
4
|
+
enumerable-statistics: "1.1.0.dev"
|
5
|
+
require: false
|
6
|
+
prelude: |-
|
7
|
+
require 'enumerable/statistics'
|
8
|
+
- name: "HEAD"
|
9
|
+
prelude: |-
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'enumerable/statistics'
|
12
|
+
prelude: |-
|
13
|
+
n = 1000
|
14
|
+
chars = ('a'..'m').to_a
|
15
|
+
hash = Array.new(n) { chars.sample }.each_with_index.to_h
|
16
|
+
benchmark:
|
17
|
+
inject: |-
|
18
|
+
hash.inject(Hash.new(0)) { |h, (k, v)| h[v] += 1; h }
|
19
|
+
unsort_keepna: |-
|
20
|
+
hash.value_counts(sort: false, dropna: false)
|
21
|
+
unsort_dropna: |-
|
22
|
+
hash.value_counts(sort: false, dropna: true)
|
23
|
+
sort_keepna: |-
|
24
|
+
hash.value_counts(sort: true, dropna: false)
|
25
|
+
sort_dropna: |-
|
26
|
+
hash.value_counts(sort: true, dropna: true)
|
27
|
+
norm_unsort_keepna: |-
|
28
|
+
hash.value_counts(normalize: true, sort: false, dropna: false)
|
29
|
+
norm_unsort_dropna: |-
|
30
|
+
hash.value_counts(normalize: true, sort: false, dropna: true)
|
31
|
+
norm_sort_keepna: |-
|
32
|
+
hash.value_counts(normalize: true, sort: true, dropna: false)
|
33
|
+
norm_sort_dropna: |-
|
34
|
+
hash.value_counts(normalize: true, sort: true, dropna: true)
|
35
|
+
sort_asc_keepna: |-
|
36
|
+
hash.value_counts(sort: true, ascending: true, dropna: false)
|
37
|
+
sort_asc_dropna: |-
|
38
|
+
hash.value_counts(sort: true, ascending: true, dropna: true)
|
39
|
+
norm_sort_asc_keepna: |-
|
40
|
+
hash.value_counts(normalize: true, sort: true, ascending: true, dropna: false)
|
41
|
+
norm_sort_asc_dropna: |-
|
42
|
+
hash.value_counts(normalize: true, sort: true, ascending: true, dropna: true)
|
data/bench/mean.yml
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: mean = ary.inject(:+) / n.to_f
|
11
|
+
while: |-
|
12
|
+
i, mean = 0, 0
|
13
|
+
while i < n
|
14
|
+
mean += ary[i]
|
15
|
+
i += 1
|
16
|
+
end
|
17
|
+
mean /= n.to_f
|
18
|
+
pure_ruby: |-
|
19
|
+
i, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
20
|
+
while i < n
|
21
|
+
x = ary[i]
|
22
|
+
y = x - c
|
23
|
+
t = f + y
|
24
|
+
c = (t - f) - y
|
25
|
+
f = t
|
26
|
+
|
27
|
+
i += 1
|
28
|
+
end
|
29
|
+
mean = f / n
|
30
|
+
c_ext: mean = ary.mean
|
data/bench/sum.yml
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: sum = ary.inject(:+)
|
11
|
+
while: |-
|
12
|
+
i, sum = 0, 0
|
13
|
+
while i < n
|
14
|
+
sum += ary[i]
|
15
|
+
i += 1
|
16
|
+
end
|
17
|
+
pure_ruby: |-
|
18
|
+
i, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
19
|
+
while i < n
|
20
|
+
x = ary[i]
|
21
|
+
y = x - c
|
22
|
+
t = f + y
|
23
|
+
c = (t - f) - y
|
24
|
+
f = t
|
25
|
+
|
26
|
+
i += 1
|
27
|
+
end
|
28
|
+
sum = f
|
29
|
+
sum: sum = ary.sum
|
data/bench/variance.yml
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
contexts:
|
2
|
+
- name: "master"
|
3
|
+
prelude: |-
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'enumerable/statistics'
|
6
|
+
prelude: |-
|
7
|
+
n = 1000
|
8
|
+
ary = Array.new(n) { rand }
|
9
|
+
benchmark:
|
10
|
+
inject: |-
|
11
|
+
mean = ary.mean
|
12
|
+
var = ary.inject(0.0) { |sum, x|
|
13
|
+
sum += (x - mean) ** 2
|
14
|
+
} / (n - 1).to_f
|
15
|
+
while: |-
|
16
|
+
mean = ary.mean
|
17
|
+
i, var = 0, 0
|
18
|
+
while i < n
|
19
|
+
var += (ary[i] - mean) ** 2
|
20
|
+
i += 1
|
21
|
+
end
|
22
|
+
var /= n.to_f
|
23
|
+
pure_ruby: |-
|
24
|
+
i, m, m2, f, c = 0, 0.0, 0.0, 0.0, 0.0
|
25
|
+
while i < n
|
26
|
+
x = ary[i]
|
27
|
+
y = x - c
|
28
|
+
t = f + y
|
29
|
+
c = (t - f) - y
|
30
|
+
f = t
|
31
|
+
|
32
|
+
delta = x - m
|
33
|
+
m += delta / i
|
34
|
+
m2 += delta * (x - m)
|
35
|
+
|
36
|
+
i += 1
|
37
|
+
end
|
38
|
+
var = m2 / n
|
39
|
+
c_ext: var = ary.variance
|
@@ -1,11 +1,17 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
lib = File.expand_path('../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
|
4
|
+
|
5
|
+
require 'enumerable_statistics/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
8
|
spec.name = "enumerable-statistics"
|
8
|
-
spec.version =
|
9
|
+
spec.version = [
|
10
|
+
EnumerableStatistics::Version::MAJOR,
|
11
|
+
EnumerableStatistics::Version::MINOR,
|
12
|
+
EnumerableStatistics::Version::MICRO,
|
13
|
+
EnumerableStatistics::Version::TAG
|
14
|
+
].compact.join('.')
|
9
15
|
spec.authors = ["Kenta Murata"]
|
10
16
|
spec.email = ["mrkn@mrkn.jp"]
|
11
17
|
|
@@ -19,10 +25,13 @@ Gem::Specification.new do |spec|
|
|
19
25
|
spec.require_paths = ["ext", "lib"]
|
20
26
|
spec.extensions = Dir['ext/**/extconf.rb']
|
21
27
|
|
22
|
-
spec.
|
23
|
-
|
24
|
-
spec.add_development_dependency "
|
25
|
-
spec.add_development_dependency "
|
28
|
+
spec.required_ruby_version = '>= 2.4'
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.17.2"
|
31
|
+
spec.add_development_dependency "rake"
|
32
|
+
spec.add_development_dependency "rake-compiler", ">= 0.9.8"
|
33
|
+
spec.add_development_dependency "rspec", ">= 3.4"
|
26
34
|
spec.add_development_dependency "fuubar"
|
27
|
-
spec.add_development_dependency "
|
35
|
+
spec.add_development_dependency "yard"
|
36
|
+
spec.add_development_dependency "benchmark-driver"
|
28
37
|
end
|
@@ -1,5 +1,17 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
|
3
3
|
have_type('struct RRational')
|
4
|
+
have_func('rb_rational_new')
|
5
|
+
have_func('rb_rational_num')
|
6
|
+
have_func('rb_rational_den')
|
7
|
+
have_func('rb_rational_plus')
|
8
|
+
|
4
9
|
have_type('struct RComplex')
|
10
|
+
have_func('rb_complex_raw')
|
11
|
+
have_func('rb_complex_real')
|
12
|
+
have_func('rb_complex_imag')
|
13
|
+
have_func('rb_complex_plus')
|
14
|
+
have_func('rb_complex_div')
|
15
|
+
have_func('rb_dbl_complex_new')
|
16
|
+
|
5
17
|
create_makefile('enumerable/statistics/extension')
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <ruby/ruby.h>
|
2
|
+
#include <ruby/util.h>
|
2
3
|
#include <ruby/version.h>
|
3
4
|
#include <assert.h>
|
5
|
+
#include <math.h>
|
4
6
|
|
5
7
|
#if RUBY_API_VERSION_CODE >= 20400
|
6
8
|
/* for 2.4.0 or higher */
|
@@ -16,6 +18,12 @@
|
|
16
18
|
# undef HAVE_RB_RATIONAL_PLUS
|
17
19
|
#endif
|
18
20
|
|
21
|
+
#ifdef HAVE_RB_ARITHMETIC_SEQUENCE_EXTRACT
|
22
|
+
# define HAVE_ARITHMETIC_SEQUENCE
|
23
|
+
#else
|
24
|
+
# undef HAVE_ARITHMETIC_SEQUENCE
|
25
|
+
#endif
|
26
|
+
|
19
27
|
#ifndef RB_INTEGER_TYPE_P
|
20
28
|
# define RB_INTEGER_TYPE_P(obj) enum_stat_integer_type_p(obj)
|
21
29
|
static inline int
|
@@ -86,8 +94,12 @@ struct RComplex {
|
|
86
94
|
static VALUE half_in_rational;
|
87
95
|
|
88
96
|
static ID idPow, idPLUS, idMINUS, idSTAR, idDIV, idGE;
|
89
|
-
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp;
|
90
|
-
static ID id_each, id_real_p, id_sum, id_population;
|
97
|
+
static ID id_eqeq_p, id_idiv, id_negate, id_to_f, id_cmp, id_nan_p;
|
98
|
+
static ID id_each, id_real_p, id_sum, id_population, id_closed, id_edge;
|
99
|
+
|
100
|
+
static VALUE sym_left, sym_right;
|
101
|
+
|
102
|
+
static VALUE cHistogram;
|
91
103
|
|
92
104
|
inline static VALUE
|
93
105
|
f_add(VALUE x, VALUE y)
|
@@ -131,28 +143,6 @@ complex_new(VALUE klass, VALUE real, VALUE imag)
|
|
131
143
|
return (VALUE)obj;
|
132
144
|
}
|
133
145
|
|
134
|
-
static VALUE
|
135
|
-
complex_caonicalize_new(VALUE klass, VALUE real, VALUE imag)
|
136
|
-
{
|
137
|
-
if (f_real_p(real) && f_real_p(imag))
|
138
|
-
return complex_new(klass, real, imag);
|
139
|
-
else if (f_real_p(imag)) {
|
140
|
-
VALUE new_imag;
|
141
|
-
|
142
|
-
new_imag = f_add(RCOMPLEX(real)->imag, imag);
|
143
|
-
|
144
|
-
return complex_new(klass, RCOMPLEX(real)->real, new_imag);
|
145
|
-
}
|
146
|
-
else {
|
147
|
-
VALUE new_real, new_imag;
|
148
|
-
|
149
|
-
new_real = f_sub(RCOMPLEX(real)->real, RCOMPLEX(imag)->imag);
|
150
|
-
new_imag = f_add(RCOMPLEX(real)->imag, RCOMPLEX(imag)->real);
|
151
|
-
|
152
|
-
return complex_new(klass, new_real, new_imag);
|
153
|
-
}
|
154
|
-
}
|
155
|
-
|
156
146
|
static VALUE
|
157
147
|
complex_add(VALUE self, VALUE other)
|
158
148
|
{
|
@@ -623,7 +613,7 @@ rb_rational_plus(VALUE self, VALUE other)
|
|
623
613
|
VALUE num = RRATIONAL(self)->num;
|
624
614
|
VALUE den = RRATIONAL(self)->den;
|
625
615
|
|
626
|
-
return f_addsub(self, num, den, other, ONE,
|
616
|
+
return f_addsub(self, num, den, other, ONE, '+');
|
627
617
|
}
|
628
618
|
else if (RB_TYPE_P(other, T_FLOAT)) {
|
629
619
|
return f_add(f_to_f(self), other);
|
@@ -852,11 +842,11 @@ ary_mean_variance(VALUE ary, VALUE *mean_ptr, VALUE *variance_ptr, size_t ddof)
|
|
852
842
|
static int
|
853
843
|
opt_population_p(VALUE opts)
|
854
844
|
{
|
855
|
-
ID kwargs = id_population;
|
856
845
|
VALUE population = Qfalse;
|
857
846
|
|
858
847
|
if (!NIL_P(opts)) {
|
859
848
|
#ifdef HAVE_RB_GET_KWARGS
|
849
|
+
ID kwargs = id_population;
|
860
850
|
rb_get_kwargs(opts, &kwargs, 0, 1, &population);
|
861
851
|
#else
|
862
852
|
VALUE val = rb_hash_aref(opts, ID2SYM(id_population));
|
@@ -868,7 +858,7 @@ opt_population_p(VALUE opts)
|
|
868
858
|
}
|
869
859
|
|
870
860
|
/* call-seq:
|
871
|
-
*
|
861
|
+
* ary.mean_variance(population: false)
|
872
862
|
*
|
873
863
|
* Calculate a mean and a variance of the values in `ary`.
|
874
864
|
* The first element of the result array is the mean, and the second is the variance.
|
@@ -1148,6 +1138,7 @@ enum_sum_count(VALUE obj, VALUE init, VALUE *sum_ptr, long *count_ptr)
|
|
1148
1138
|
*count_ptr = memo.count;
|
1149
1139
|
}
|
1150
1140
|
|
1141
|
+
#ifndef HAVE_ENUM_SUM
|
1151
1142
|
/* call-seq:
|
1152
1143
|
* enum.sum
|
1153
1144
|
*
|
@@ -1172,10 +1163,11 @@ enum_sum(int argc, VALUE* argv, VALUE obj)
|
|
1172
1163
|
|
1173
1164
|
return sum;
|
1174
1165
|
}
|
1166
|
+
#endif
|
1175
1167
|
|
1176
1168
|
struct enum_mean_variance_memo {
|
1177
1169
|
int block_given;
|
1178
|
-
|
1170
|
+
size_t n;
|
1179
1171
|
double m, m2, f, c;
|
1180
1172
|
};
|
1181
1173
|
|
@@ -1229,7 +1221,7 @@ enum_mean_variance_iter_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
|
|
1229
1221
|
{
|
1230
1222
|
struct enum_mean_variance_memo *memo = (struct enum_mean_variance_memo *)args;
|
1231
1223
|
ENUM_WANT_SVALUE();
|
1232
|
-
mean_variance_iter(e,
|
1224
|
+
mean_variance_iter(e, memo);
|
1233
1225
|
return Qnil;
|
1234
1226
|
}
|
1235
1227
|
|
@@ -1487,9 +1479,836 @@ ary_stdev(int argc, VALUE* argv, VALUE ary)
|
|
1487
1479
|
return stdev;
|
1488
1480
|
}
|
1489
1481
|
|
1482
|
+
static inline int
|
1483
|
+
is_na(VALUE v)
|
1484
|
+
{
|
1485
|
+
if (NIL_P(v))
|
1486
|
+
return 1;
|
1487
|
+
|
1488
|
+
if (RB_FLOAT_TYPE_P(v) && isnan(RFLOAT_VALUE(v)))
|
1489
|
+
return 1;
|
1490
|
+
|
1491
|
+
if (rb_respond_to(v, id_nan_p) && RTEST(rb_funcall(v, id_nan_p, 0)))
|
1492
|
+
return 1;
|
1493
|
+
|
1494
|
+
return 0;
|
1495
|
+
}
|
1496
|
+
|
1497
|
+
static int
|
1498
|
+
ary_percentile_sort_cmp(const void *ap, const void *bp, void *dummy)
|
1499
|
+
{
|
1500
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1501
|
+
VALUE cmp;
|
1502
|
+
|
1503
|
+
if (is_na(a)) {
|
1504
|
+
return -1;
|
1505
|
+
}
|
1506
|
+
else if (is_na(b)) {
|
1507
|
+
return 1;
|
1508
|
+
}
|
1509
|
+
|
1510
|
+
/* TODO: optimize */
|
1511
|
+
cmp = rb_funcall(a, id_cmp, 1, b);
|
1512
|
+
return rb_cmpint(cmp, a, b);
|
1513
|
+
}
|
1514
|
+
|
1515
|
+
static VALUE
|
1516
|
+
ary_percentile_make_sorted(VALUE ary)
|
1517
|
+
{
|
1518
|
+
long n, i;
|
1519
|
+
VALUE sorted;
|
1520
|
+
|
1521
|
+
n = RARRAY_LEN(ary);
|
1522
|
+
sorted = rb_ary_tmp_new(n);
|
1523
|
+
for (i = 0; i < n; ++i) {
|
1524
|
+
rb_ary_push(sorted, RARRAY_AREF(ary, i));
|
1525
|
+
}
|
1526
|
+
RARRAY_PTR_USE(sorted, ptr, {
|
1527
|
+
ruby_qsort(ptr, n, sizeof(VALUE),
|
1528
|
+
ary_percentile_sort_cmp, NULL);
|
1529
|
+
});
|
1530
|
+
return sorted;
|
1531
|
+
}
|
1532
|
+
|
1533
|
+
static inline VALUE
|
1534
|
+
ary_percentile_single_sorted(VALUE sorted, long n, double d)
|
1535
|
+
{
|
1536
|
+
VALUE x0, x1;
|
1537
|
+
double i, f;
|
1538
|
+
long l;
|
1539
|
+
|
1540
|
+
assert(RB_TYPE_P(sorted, T_ARRAY));
|
1541
|
+
assert(n == RARRAY_LEN(sorted));
|
1542
|
+
assert(n > 0);
|
1543
|
+
|
1544
|
+
if (d < 0 || 100 < d) {
|
1545
|
+
rb_raise(rb_eArgError, "percentile out of bounds");
|
1546
|
+
}
|
1547
|
+
|
1548
|
+
if (is_na(RARRAY_AREF(sorted, 0))) {
|
1549
|
+
return DBL2NUM(nan(""));
|
1550
|
+
}
|
1551
|
+
|
1552
|
+
n = RARRAY_LEN(sorted);
|
1553
|
+
if (n == 1) {
|
1554
|
+
return RARRAY_AREF(sorted, 0);
|
1555
|
+
}
|
1556
|
+
|
1557
|
+
d = (n - 1) * d / 100.0;
|
1558
|
+
f = modf(d, &i);
|
1559
|
+
l = (long)i;
|
1560
|
+
|
1561
|
+
x0 = RARRAY_AREF(sorted, l);
|
1562
|
+
if (f == 0 || l == n - 1) {
|
1563
|
+
return x0;
|
1564
|
+
}
|
1565
|
+
|
1566
|
+
x0 = rb_funcall(x0, idSTAR, 1, DBL2NUM(1 - f));
|
1567
|
+
x1 = RARRAY_AREF(sorted, l + 1);
|
1568
|
+
x1 = rb_funcall(x1, idSTAR, 1, DBL2NUM(f));
|
1569
|
+
|
1570
|
+
return rb_funcall(x0, idPLUS, 1, x1);
|
1571
|
+
}
|
1572
|
+
|
1573
|
+
static VALUE
|
1574
|
+
ary_percentile_single(VALUE ary, VALUE q)
|
1575
|
+
{
|
1576
|
+
long n;
|
1577
|
+
double d;
|
1578
|
+
VALUE qf, sorted;
|
1579
|
+
|
1580
|
+
assert(RB_TYPE_P(ary, T_ARRAY));
|
1581
|
+
|
1582
|
+
n = RARRAY_LEN(ary);
|
1583
|
+
assert(n > 0);
|
1584
|
+
|
1585
|
+
switch (TYPE(q)) {
|
1586
|
+
case T_FIXNUM:
|
1587
|
+
d = (double)FIX2LONG(q);
|
1588
|
+
break;
|
1589
|
+
case T_BIGNUM:
|
1590
|
+
d = rb_big2dbl(q);
|
1591
|
+
break;
|
1592
|
+
|
1593
|
+
case T_RATIONAL:
|
1594
|
+
/* fall through */
|
1595
|
+
default:
|
1596
|
+
qf = NUM2DBL(q);
|
1597
|
+
goto float_percentile;
|
1598
|
+
|
1599
|
+
case T_FLOAT:
|
1600
|
+
qf = q;
|
1601
|
+
float_percentile:
|
1602
|
+
d = RFLOAT_VALUE(qf);
|
1603
|
+
break;
|
1604
|
+
}
|
1605
|
+
|
1606
|
+
if (n == 1) {
|
1607
|
+
return RARRAY_AREF(ary, 0);
|
1608
|
+
}
|
1609
|
+
|
1610
|
+
sorted = ary_percentile_make_sorted(ary);
|
1611
|
+
|
1612
|
+
return ary_percentile_single_sorted(sorted, n, d);
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
/* call-seq:
|
1616
|
+
* ary.percentile(q) -> float
|
1617
|
+
*
|
1618
|
+
* Calculate specified percentiles of the values in `ary`.
|
1619
|
+
*
|
1620
|
+
* @param [Number, Array] percentile or array of percentiles to compute,
|
1621
|
+
* which must be between 0 and 100 inclusive.
|
1622
|
+
*
|
1623
|
+
* @return [Float, Array] A percentile value(s)
|
1624
|
+
*/
|
1625
|
+
static VALUE
|
1626
|
+
ary_percentile(VALUE ary, VALUE q)
|
1627
|
+
{
|
1628
|
+
long n, m, i;
|
1629
|
+
double d;
|
1630
|
+
VALUE qf, qs, sorted, res;
|
1631
|
+
|
1632
|
+
n = RARRAY_LEN(ary);
|
1633
|
+
if (n == 0) {
|
1634
|
+
rb_raise(rb_eArgError, "unable to compute percentile(s) for an empty array");
|
1635
|
+
}
|
1636
|
+
|
1637
|
+
qs = rb_check_convert_type(q, T_ARRAY, "Array", "to_ary");
|
1638
|
+
if (NIL_P(qs)) {
|
1639
|
+
return ary_percentile_single(ary, q);
|
1640
|
+
}
|
1641
|
+
|
1642
|
+
m = RARRAY_LEN(qs);
|
1643
|
+
res = rb_ary_new_capa(m);
|
1644
|
+
|
1645
|
+
if (m == 1) {
|
1646
|
+
q = RARRAY_AREF(qs, 0);
|
1647
|
+
rb_ary_push(res, ary_percentile_single(ary, q));
|
1648
|
+
}
|
1649
|
+
else {
|
1650
|
+
sorted = ary_percentile_make_sorted(ary);
|
1651
|
+
|
1652
|
+
for (i = 0; i < m; ++i) {
|
1653
|
+
VALUE x;
|
1654
|
+
|
1655
|
+
q = RARRAY_AREF(qs, i);
|
1656
|
+
switch (TYPE(q)) {
|
1657
|
+
case T_FIXNUM:
|
1658
|
+
d = (double)FIX2LONG(q);
|
1659
|
+
break;
|
1660
|
+
case T_BIGNUM:
|
1661
|
+
d = rb_big2dbl(q);
|
1662
|
+
break;
|
1663
|
+
|
1664
|
+
case T_RATIONAL:
|
1665
|
+
/* fall through */
|
1666
|
+
default:
|
1667
|
+
qf = NUM2DBL(q);
|
1668
|
+
goto float_percentile;
|
1669
|
+
|
1670
|
+
case T_FLOAT:
|
1671
|
+
qf = q;
|
1672
|
+
float_percentile:
|
1673
|
+
d = RFLOAT_VALUE(qf);
|
1674
|
+
break;
|
1675
|
+
}
|
1676
|
+
|
1677
|
+
x = ary_percentile_single_sorted(sorted, n, d);
|
1678
|
+
rb_ary_push(res, x);
|
1679
|
+
}
|
1680
|
+
}
|
1681
|
+
|
1682
|
+
return res;
|
1683
|
+
}
|
1684
|
+
|
1685
|
+
/* call-seq:
|
1686
|
+
* ary.median -> float
|
1687
|
+
*
|
1688
|
+
* Calculate a median of the values in `ary`.
|
1689
|
+
*
|
1690
|
+
* @return [Float] A median value
|
1691
|
+
*/
|
1692
|
+
static VALUE
|
1693
|
+
ary_median(VALUE ary)
|
1694
|
+
{
|
1695
|
+
long n;
|
1696
|
+
VALUE sorted, a0, a1;
|
1697
|
+
|
1698
|
+
n = RARRAY_LEN(ary);
|
1699
|
+
switch (n) {
|
1700
|
+
case 0:
|
1701
|
+
goto return_nan;
|
1702
|
+
case 1:
|
1703
|
+
return RARRAY_AREF(ary, 0);
|
1704
|
+
case 2:
|
1705
|
+
a0 = RARRAY_AREF(ary, 0);
|
1706
|
+
a1 = RARRAY_AREF(ary, 1);
|
1707
|
+
goto mean_two;
|
1708
|
+
default:
|
1709
|
+
break;
|
1710
|
+
}
|
1711
|
+
|
1712
|
+
sorted = ary_percentile_make_sorted(ary);
|
1713
|
+
|
1714
|
+
a0 = RARRAY_AREF(sorted, 0);
|
1715
|
+
if (is_na(a0)) {
|
1716
|
+
return_nan:
|
1717
|
+
return DBL2NUM(nan(""));
|
1718
|
+
}
|
1719
|
+
|
1720
|
+
a1 = RARRAY_AREF(sorted, n / 2);
|
1721
|
+
if (n % 2 == 1) {
|
1722
|
+
return a1;
|
1723
|
+
}
|
1724
|
+
else {
|
1725
|
+
a0 = RARRAY_AREF(sorted, n / 2 - 1);
|
1726
|
+
|
1727
|
+
mean_two:
|
1728
|
+
a0 = rb_funcall(a0, idPLUS, 1, a1); /* TODO: optimize */
|
1729
|
+
if (RB_INTEGER_TYPE_P(a0) || RB_FLOAT_TYPE_P(a0) || RB_TYPE_P(a0, T_RATIONAL)) {
|
1730
|
+
double d = NUM2DBL(a0);
|
1731
|
+
return DBL2NUM(d / 2.0);
|
1732
|
+
}
|
1733
|
+
|
1734
|
+
return rb_funcall(a0, idDIV, 1, DBL2NUM(2.0));
|
1735
|
+
}
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
struct value_counts_opts {
|
1739
|
+
int normalize_p;
|
1740
|
+
int sort_p;
|
1741
|
+
int ascending_p;
|
1742
|
+
int dropna_p;
|
1743
|
+
};
|
1744
|
+
|
1745
|
+
static inline void
|
1746
|
+
value_counts_extract_opts(VALUE kwargs, struct value_counts_opts *opts)
|
1747
|
+
{
|
1748
|
+
assert(opts != NULL);
|
1749
|
+
|
1750
|
+
/* default values */
|
1751
|
+
opts->normalize_p = 0;
|
1752
|
+
opts->sort_p = 1;
|
1753
|
+
opts->ascending_p = 0;
|
1754
|
+
opts->dropna_p = 1;
|
1755
|
+
|
1756
|
+
if (!NIL_P(kwargs)) {
|
1757
|
+
enum { kw_normalize, kw_sort, kw_ascending, kw_dropna };
|
1758
|
+
static ID kwarg_keys[4];
|
1759
|
+
VALUE kwarg_vals[4];
|
1760
|
+
|
1761
|
+
if (!kwarg_keys[0]) {
|
1762
|
+
kwarg_keys[kw_normalize] = rb_intern("normalize");
|
1763
|
+
kwarg_keys[kw_sort] = rb_intern("sort");
|
1764
|
+
kwarg_keys[kw_ascending] = rb_intern("ascending");
|
1765
|
+
kwarg_keys[kw_dropna] = rb_intern("dropna");
|
1766
|
+
}
|
1767
|
+
|
1768
|
+
rb_get_kwargs(kwargs, kwarg_keys, 0, 4, kwarg_vals);
|
1769
|
+
opts->normalize_p = (kwarg_vals[kw_normalize] != Qundef) && RTEST(kwarg_vals[kw_normalize]);
|
1770
|
+
opts->sort_p = (kwarg_vals[kw_sort] != Qundef) && RTEST(kwarg_vals[kw_sort]);
|
1771
|
+
opts->ascending_p = (kwarg_vals[kw_ascending] != Qundef) && RTEST(kwarg_vals[kw_ascending]);
|
1772
|
+
opts->dropna_p = (kwarg_vals[kw_dropna] != Qundef) && RTEST(kwarg_vals[kw_dropna]);
|
1773
|
+
}
|
1774
|
+
}
|
1775
|
+
|
1776
|
+
static int
|
1777
|
+
value_counts_result_to_assoc_array_i(VALUE key, VALUE val, VALUE ary)
|
1778
|
+
{
|
1779
|
+
VALUE assoc = rb_ary_tmp_new(2);
|
1780
|
+
rb_ary_push(assoc, key);
|
1781
|
+
rb_ary_push(assoc, val);
|
1782
|
+
rb_ary_push(ary, assoc);
|
1783
|
+
return ST_CONTINUE;
|
1784
|
+
}
|
1785
|
+
|
1786
|
+
static int
|
1787
|
+
value_counts_sort_cmp_asc(const void *ap, const void *bp, void *dummy)
|
1788
|
+
{
|
1789
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1790
|
+
VALUE av, bv, cmp;
|
1791
|
+
|
1792
|
+
av = RARRAY_AREF(a, 1);
|
1793
|
+
bv = RARRAY_AREF(b, 1);
|
1794
|
+
|
1795
|
+
/* TODO: optimize */
|
1796
|
+
cmp = rb_funcall(av, id_cmp, 1, bv);
|
1797
|
+
return rb_cmpint(cmp, av, bv);
|
1798
|
+
}
|
1799
|
+
|
1800
|
+
static int
|
1801
|
+
value_counts_sort_cmp_desc(const void *ap, const void *bp, void *dummy)
|
1802
|
+
{
|
1803
|
+
VALUE a = *(const VALUE *)ap, b = *(const VALUE *)bp;
|
1804
|
+
VALUE av, bv, cmp;
|
1805
|
+
|
1806
|
+
av = RARRAY_AREF(a, 1);
|
1807
|
+
bv = RARRAY_AREF(b, 1);
|
1808
|
+
|
1809
|
+
/* TODO: optimize */
|
1810
|
+
cmp = rb_funcall(bv, id_cmp, 1, av);
|
1811
|
+
return rb_cmpint(cmp, bv, av);
|
1812
|
+
}
|
1813
|
+
|
1814
|
+
static VALUE
|
1815
|
+
value_counts_sort_result(VALUE result, const int dropna_p, const int ascending_p)
|
1816
|
+
{
|
1817
|
+
VALUE na_count = Qundef, ary, sorted;
|
1818
|
+
long i;
|
1819
|
+
|
1820
|
+
if (RHASH_SIZE(result) < 1) {
|
1821
|
+
return result;
|
1822
|
+
}
|
1823
|
+
|
1824
|
+
if (!dropna_p) {
|
1825
|
+
na_count = rb_hash_lookup2(result, Qnil, Qundef);
|
1826
|
+
if (na_count != Qundef) {
|
1827
|
+
rb_hash_delete(result, Qnil);
|
1828
|
+
}
|
1829
|
+
}
|
1830
|
+
|
1831
|
+
const long len = (long)RHASH_SIZE(result);
|
1832
|
+
ary = rb_ary_tmp_new(len);
|
1833
|
+
rb_hash_foreach(result, value_counts_result_to_assoc_array_i, ary);
|
1834
|
+
if (ascending_p) {
|
1835
|
+
RARRAY_PTR_USE(ary, ptr, {
|
1836
|
+
ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
|
1837
|
+
value_counts_sort_cmp_asc, NULL);
|
1838
|
+
});
|
1839
|
+
}
|
1840
|
+
else {
|
1841
|
+
RARRAY_PTR_USE(ary, ptr, {
|
1842
|
+
ruby_qsort(ptr, RARRAY_LEN(ary), sizeof(VALUE),
|
1843
|
+
value_counts_sort_cmp_desc, NULL);
|
1844
|
+
});
|
1845
|
+
}
|
1846
|
+
|
1847
|
+
#ifdef HAVE_RB_HASH_NEW_WITH_SIZE
|
1848
|
+
sorted = rb_hash_new_with_size(len);
|
1849
|
+
#else
|
1850
|
+
sorted = rb_hash_new();
|
1851
|
+
#endif
|
1852
|
+
|
1853
|
+
if (na_count != Qundef && ascending_p) {
|
1854
|
+
rb_hash_aset(sorted, Qnil, na_count);
|
1855
|
+
}
|
1856
|
+
|
1857
|
+
for (i = 0; i < len; ++i) {
|
1858
|
+
VALUE a = RARRAY_AREF(ary, i);
|
1859
|
+
VALUE k = RARRAY_AREF(a, 0);
|
1860
|
+
VALUE v = RARRAY_AREF(a, 1);
|
1861
|
+
rb_hash_aset(sorted, k, v);
|
1862
|
+
}
|
1863
|
+
|
1864
|
+
if (na_count != Qundef && !ascending_p) {
|
1865
|
+
rb_hash_aset(sorted, Qnil, na_count);
|
1866
|
+
}
|
1867
|
+
|
1868
|
+
return sorted;
|
1869
|
+
}
|
1870
|
+
|
1871
|
+
struct value_counts_normalize_params {
|
1872
|
+
VALUE result;
|
1873
|
+
long total;
|
1874
|
+
};
|
1875
|
+
|
1876
|
+
static int
|
1877
|
+
value_counts_normalize_i(VALUE key, VALUE val, VALUE arg)
|
1878
|
+
{
|
1879
|
+
struct value_counts_normalize_params *params = (struct value_counts_normalize_params *)arg;
|
1880
|
+
double new_val;
|
1881
|
+
|
1882
|
+
new_val = NUM2DBL(val) / params->total;
|
1883
|
+
rb_hash_aset(params->result, key, DBL2NUM(new_val));
|
1884
|
+
|
1885
|
+
return ST_CONTINUE;
|
1886
|
+
}
|
1887
|
+
|
1888
|
+
struct value_counts_memo {
|
1889
|
+
int dropna_p;
|
1890
|
+
long total;
|
1891
|
+
long na_count;
|
1892
|
+
VALUE result;
|
1893
|
+
};
|
1894
|
+
|
1895
|
+
static VALUE
|
1896
|
+
any_value_counts(int argc, VALUE *argv, VALUE obj,
|
1897
|
+
void (* counter)(VALUE, struct value_counts_memo *))
|
1898
|
+
{
|
1899
|
+
VALUE kwargs;
|
1900
|
+
struct value_counts_opts opts;
|
1901
|
+
struct value_counts_memo memo;
|
1902
|
+
|
1903
|
+
rb_scan_args(argc, argv, ":", &kwargs);
|
1904
|
+
value_counts_extract_opts(kwargs, &opts);
|
1905
|
+
|
1906
|
+
memo.result = rb_hash_new();
|
1907
|
+
memo.total = 0;
|
1908
|
+
memo.na_count = 0;
|
1909
|
+
memo.dropna_p = opts.dropna_p;
|
1910
|
+
|
1911
|
+
if (!opts.dropna_p) {
|
1912
|
+
rb_hash_aset(memo.result, Qnil, INT2FIX(0)); // reserve the room for NA
|
1913
|
+
}
|
1914
|
+
|
1915
|
+
counter(obj, &memo);
|
1916
|
+
|
1917
|
+
if (!opts.dropna_p) {
|
1918
|
+
if (memo.na_count == 0)
|
1919
|
+
rb_hash_delete(memo.result, Qnil);
|
1920
|
+
else
|
1921
|
+
rb_hash_aset(memo.result, Qnil, LONG2NUM(memo.na_count));
|
1922
|
+
}
|
1923
|
+
|
1924
|
+
if (opts.sort_p) {
|
1925
|
+
memo.result = value_counts_sort_result(memo.result, opts.dropna_p, opts.ascending_p);
|
1926
|
+
}
|
1927
|
+
|
1928
|
+
if (opts.normalize_p) {
|
1929
|
+
struct value_counts_normalize_params params;
|
1930
|
+
params.result = memo.result;
|
1931
|
+
params.total = memo.total - (opts.dropna_p ? memo.na_count : 0);
|
1932
|
+
rb_hash_foreach(memo.result, value_counts_normalize_i, (VALUE)¶ms);
|
1933
|
+
}
|
1934
|
+
|
1935
|
+
return memo.result;
|
1936
|
+
}
|
1937
|
+
|
1938
|
+
static VALUE
|
1939
|
+
enum_value_counts_without_sort_i(RB_BLOCK_CALL_FUNC_ARGLIST(e, args))
|
1940
|
+
{
|
1941
|
+
struct value_counts_memo *memo = (struct value_counts_memo *)args;
|
1942
|
+
|
1943
|
+
ENUM_WANT_SVALUE();
|
1944
|
+
|
1945
|
+
if (is_na(e)) {
|
1946
|
+
++memo->na_count;
|
1947
|
+
}
|
1948
|
+
else {
|
1949
|
+
VALUE cnt = rb_hash_lookup2(memo->result, e, INT2FIX(0));
|
1950
|
+
rb_hash_aset(memo->result, e, rb_int_plus(cnt, INT2FIX(1)));
|
1951
|
+
}
|
1952
|
+
|
1953
|
+
++memo->total;
|
1954
|
+
|
1955
|
+
return Qnil;
|
1956
|
+
}
|
1957
|
+
|
1958
|
+
static void
|
1959
|
+
enum_value_counts_without_sort(VALUE obj, struct value_counts_memo *memo)
|
1960
|
+
{
|
1961
|
+
rb_block_call(obj, id_each, 0, 0, enum_value_counts_without_sort_i, (VALUE)memo);
|
1962
|
+
}
|
1963
|
+
|
1964
|
+
static VALUE
|
1965
|
+
enum_value_counts(int argc, VALUE* argv, VALUE obj)
|
1966
|
+
{
|
1967
|
+
return any_value_counts(argc, argv, obj, enum_value_counts_without_sort);
|
1968
|
+
}
|
1969
|
+
|
1970
|
+
static void
|
1971
|
+
ary_value_counts_without_sort(VALUE ary, struct value_counts_memo *memo)
|
1972
|
+
{
|
1973
|
+
const VALUE zero = INT2FIX(0);
|
1974
|
+
const VALUE one = INT2FIX(1);
|
1975
|
+
long i, na_count = 0;
|
1976
|
+
long const n = RARRAY_LEN(ary);
|
1977
|
+
|
1978
|
+
for (i = 0; i < n; ++i) {
|
1979
|
+
VALUE val = RARRAY_AREF(ary, i);
|
1980
|
+
|
1981
|
+
if (is_na(val)) {
|
1982
|
+
++na_count;
|
1983
|
+
}
|
1984
|
+
else {
|
1985
|
+
VALUE cnt = rb_hash_lookup2(memo->result, val, zero);
|
1986
|
+
rb_hash_aset(memo->result, val, rb_int_plus(cnt, one));
|
1987
|
+
}
|
1988
|
+
}
|
1989
|
+
|
1990
|
+
memo->total = n;
|
1991
|
+
memo->na_count = na_count;
|
1992
|
+
}
|
1993
|
+
|
1994
|
+
/* call-seq:
|
1995
|
+
* ary.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
|
1996
|
+
*
|
1997
|
+
* Returns a hash that contains the counts of values in `ary`.
|
1998
|
+
*
|
1999
|
+
* This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
|
2000
|
+
* as the same thing, and stores the count of them as the value for `nil`.
|
2001
|
+
*
|
2002
|
+
* @param [false,true] normalize If `true`, the result contains the relative
|
2003
|
+
* frequencies of the unique values.
|
2004
|
+
* @param [true,false] sort Sort by values.
|
2005
|
+
* @param [false,true] ascending Sort in ascending order.
|
2006
|
+
* @param [true,false] dropna Don't include counts of NAs.
|
2007
|
+
*
|
2008
|
+
* @return [Hash] A hash consists of the counts of the values
|
2009
|
+
*/
|
2010
|
+
static VALUE
|
2011
|
+
ary_value_counts(int argc, VALUE* argv, VALUE ary)
|
2012
|
+
{
|
2013
|
+
return any_value_counts(argc, argv, ary, ary_value_counts_without_sort);
|
2014
|
+
}
|
2015
|
+
|
2016
|
+
static int
|
2017
|
+
hash_value_counts_without_sort_i(VALUE key, VALUE val, VALUE arg)
|
2018
|
+
{
|
2019
|
+
struct value_counts_memo *memo = (struct value_counts_memo *)arg;
|
2020
|
+
|
2021
|
+
if (is_na(val)) {
|
2022
|
+
++memo->na_count;
|
2023
|
+
|
2024
|
+
if (memo->dropna_p) {
|
2025
|
+
return ST_CONTINUE;
|
2026
|
+
}
|
2027
|
+
}
|
2028
|
+
else {
|
2029
|
+
VALUE cnt = rb_hash_lookup2(memo->result, val, INT2FIX(0));
|
2030
|
+
rb_hash_aset(memo->result, val, rb_int_plus(cnt, INT2FIX(1)));
|
2031
|
+
}
|
2032
|
+
|
2033
|
+
return ST_CONTINUE;
|
2034
|
+
}
|
2035
|
+
|
2036
|
+
static void
|
2037
|
+
hash_value_counts_without_sort(VALUE hash, struct value_counts_memo *memo)
|
2038
|
+
{
|
2039
|
+
rb_hash_foreach(hash, hash_value_counts_without_sort_i, (VALUE)memo);
|
2040
|
+
memo->total = RHASH_SIZE(hash);
|
2041
|
+
}
|
2042
|
+
|
2043
|
+
/* call-seq:
|
2044
|
+
* hash.value_counts(normalize: false, sort: true, ascending: false, dropna: true) -> hash
|
2045
|
+
*
|
2046
|
+
* Returns a hash that contains the counts of values in `hash`.
|
2047
|
+
*
|
2048
|
+
* This method treats `nil` and NaN, the objects who respond `true` to `nan?`,
|
2049
|
+
* as the same thing, and stores the count of them as the value for `nil`.
|
2050
|
+
*
|
2051
|
+
* @param [false,true] normalize If `true`, the result contains the relative
|
2052
|
+
* frequencies of the unique values.
|
2053
|
+
* @param [true,false] sort Sort by values.
|
2054
|
+
* @param [false,true] ascending Sort in ascending order.
|
2055
|
+
* @param [true,false] dropna Don't include counts of NAs.
|
2056
|
+
*
|
2057
|
+
* @return [Hash] A hash consists of the counts of the values
|
2058
|
+
*/
|
2059
|
+
static VALUE
|
2060
|
+
hash_value_counts(int argc, VALUE* argv, VALUE hash)
|
2061
|
+
{
|
2062
|
+
return any_value_counts(argc, argv, hash, hash_value_counts_without_sort);
|
2063
|
+
}
|
2064
|
+
|
2065
|
+
static long
|
2066
|
+
histogram_edge_bin_index(VALUE edge, VALUE rb_x, int left_p)
|
2067
|
+
{
|
2068
|
+
double x, y;
|
2069
|
+
long lo, hi, mid;
|
2070
|
+
|
2071
|
+
x = NUM2DBL(rb_x);
|
2072
|
+
|
2073
|
+
lo = -1;
|
2074
|
+
hi = RARRAY_LEN(edge);
|
2075
|
+
|
2076
|
+
if (left_p) {
|
2077
|
+
while (hi - lo > 1) {
|
2078
|
+
mid = lo + (hi - lo)/2;
|
2079
|
+
y = NUM2DBL(RARRAY_AREF(edge, mid));
|
2080
|
+
if (y <= x) {
|
2081
|
+
lo = mid;
|
2082
|
+
}
|
2083
|
+
else {
|
2084
|
+
hi = mid;
|
2085
|
+
}
|
2086
|
+
}
|
2087
|
+
return lo;
|
2088
|
+
}
|
2089
|
+
else {
|
2090
|
+
while (hi - lo > 1) {
|
2091
|
+
mid = lo + (hi - lo)/2;
|
2092
|
+
y = NUM2DBL(RARRAY_AREF(edge, mid));
|
2093
|
+
if (y < x) {
|
2094
|
+
lo = mid;
|
2095
|
+
}
|
2096
|
+
else {
|
2097
|
+
hi = mid;
|
2098
|
+
}
|
2099
|
+
}
|
2100
|
+
return hi - 1;
|
2101
|
+
}
|
2102
|
+
}
|
2103
|
+
|
2104
|
+
static void
|
2105
|
+
histogram_weights_push_values(VALUE weights, VALUE edge, VALUE values, int left_p)
|
2106
|
+
{
|
2107
|
+
VALUE x, cur;
|
2108
|
+
long i, n, bi;
|
2109
|
+
|
2110
|
+
n = RARRAY_LEN(values);
|
2111
|
+
for (i = 0; i < n; ++i) {
|
2112
|
+
x = RARRAY_AREF(values, i);
|
2113
|
+
|
2114
|
+
bi = histogram_edge_bin_index(edge, x, left_p);
|
2115
|
+
|
2116
|
+
cur = rb_ary_entry(weights, bi);
|
2117
|
+
if (NIL_P(cur)) {
|
2118
|
+
cur = INT2FIX(1);
|
2119
|
+
}
|
2120
|
+
else {
|
2121
|
+
cur = rb_funcall(cur, idPLUS, 1, INT2FIX(1));
|
2122
|
+
}
|
2123
|
+
|
2124
|
+
rb_ary_store(weights, bi, cur);
|
2125
|
+
}
|
2126
|
+
}
|
2127
|
+
|
2128
|
+
static int
|
2129
|
+
opt_closed_left_p(VALUE opts)
|
2130
|
+
{
|
2131
|
+
int left_p = 1;
|
2132
|
+
|
2133
|
+
if (!NIL_P(opts)) {
|
2134
|
+
VALUE closed;
|
2135
|
+
#ifdef HAVE_RB_GET_KWARGS
|
2136
|
+
ID kwargs = id_closed;
|
2137
|
+
rb_get_kwargs(opts, &kwargs, 0, 1, &closed);
|
2138
|
+
#else
|
2139
|
+
closed = rb_hash_lookup2(opts, ID2SYM(id_closed), sym_left);
|
2140
|
+
#endif
|
2141
|
+
left_p = (closed != sym_right);
|
2142
|
+
if (left_p && closed != sym_left) {
|
2143
|
+
rb_raise(rb_eArgError, "invalid value for :closed keyword "
|
2144
|
+
"(%"PRIsVALUE" for :left or :right)", closed);
|
2145
|
+
}
|
2146
|
+
}
|
2147
|
+
|
2148
|
+
return left_p;
|
2149
|
+
}
|
2150
|
+
|
2151
|
+
static inline long
|
2152
|
+
sturges(long n)
|
2153
|
+
{
|
2154
|
+
if (n == 0) return 1L;
|
2155
|
+
return (long)(ceil(log2(n)) + 1);
|
2156
|
+
}
|
2157
|
+
|
2158
|
+
static VALUE
|
2159
|
+
ary_histogram_calculate_edge_lo_hi(const double lo, const double hi, const long nbins, const int left_p)
|
2160
|
+
{
|
2161
|
+
VALUE edge;
|
2162
|
+
double bw, lbw, start, step, divisor, r;
|
2163
|
+
long i, len;
|
2164
|
+
|
2165
|
+
if (hi == lo) {
|
2166
|
+
start = hi;
|
2167
|
+
step = 1;
|
2168
|
+
divisor = 1;
|
2169
|
+
len = 1;
|
2170
|
+
}
|
2171
|
+
else {
|
2172
|
+
bw = (hi - lo) / nbins;
|
2173
|
+
lbw = log10(bw);
|
2174
|
+
if (lbw >= 0) {
|
2175
|
+
step = pow(10, floor(lbw));
|
2176
|
+
r = bw / step;
|
2177
|
+
if (r <= 1.1) {
|
2178
|
+
/* do nothing */
|
2179
|
+
}
|
2180
|
+
else if (r <= 2.2) {
|
2181
|
+
step *= 2;
|
2182
|
+
}
|
2183
|
+
else if (r <= 5.5) {
|
2184
|
+
step *= 5;
|
2185
|
+
}
|
2186
|
+
else {
|
2187
|
+
step *= 10;
|
2188
|
+
}
|
2189
|
+
divisor = 1.0;
|
2190
|
+
start = step * floor(lo / step);
|
2191
|
+
len = (long)ceil((hi - start) / step);
|
2192
|
+
}
|
2193
|
+
else {
|
2194
|
+
divisor = pow(10, -floor(lbw));
|
2195
|
+
r = bw * divisor;
|
2196
|
+
if (r <= 1.1) {
|
2197
|
+
/* do nothing */
|
2198
|
+
}
|
2199
|
+
else if (r <= 2.2) {
|
2200
|
+
divisor /= 2;
|
2201
|
+
}
|
2202
|
+
else if (r <= 5.5) {
|
2203
|
+
divisor /= 5;
|
2204
|
+
}
|
2205
|
+
else {
|
2206
|
+
divisor /= 10;
|
2207
|
+
}
|
2208
|
+
step = 1.0;
|
2209
|
+
start = floor(lo * divisor);
|
2210
|
+
len = (long)ceil(hi * divisor - start);
|
2211
|
+
}
|
2212
|
+
}
|
2213
|
+
|
2214
|
+
if (left_p) {
|
2215
|
+
while (lo < start/divisor) {
|
2216
|
+
start -= step;
|
2217
|
+
}
|
2218
|
+
while ((start + (len - 1)*step)/divisor <= hi) {
|
2219
|
+
++len;
|
2220
|
+
}
|
2221
|
+
}
|
2222
|
+
else {
|
2223
|
+
while (lo <= start/divisor) {
|
2224
|
+
start -= step;
|
2225
|
+
}
|
2226
|
+
while ((start + (len - 1)*step)/divisor < hi) {
|
2227
|
+
++len;
|
2228
|
+
}
|
2229
|
+
}
|
2230
|
+
|
2231
|
+
edge = rb_ary_new_capa(len);
|
2232
|
+
for (i = 0; i < len; ++i) {
|
2233
|
+
rb_ary_push(edge, DBL2NUM(start/divisor));
|
2234
|
+
start += step;
|
2235
|
+
}
|
2236
|
+
|
2237
|
+
return edge;
|
2238
|
+
}
|
2239
|
+
|
2240
|
+
static VALUE
|
2241
|
+
ary_histogram_calculate_edge(VALUE ary, const long nbins, const int left_p)
|
2242
|
+
{
|
2243
|
+
long n;
|
2244
|
+
VALUE minmax;
|
2245
|
+
VALUE edge = Qnil;
|
2246
|
+
double lo, hi;
|
2247
|
+
|
2248
|
+
Check_Type(ary, T_ARRAY);
|
2249
|
+
n = RARRAY_LEN(ary);
|
2250
|
+
|
2251
|
+
if (n == 0 && nbins < 0) {
|
2252
|
+
rb_raise(rb_eArgError, "nbins must be >= 0 for an empty array, got %ld", nbins);
|
2253
|
+
}
|
2254
|
+
else if (n > 0 && nbins < 1) {
|
2255
|
+
rb_raise(rb_eArgError, "nbins must be >= 1 for a non-empty array, got %ld", nbins);
|
2256
|
+
}
|
2257
|
+
else if (n == 0) {
|
2258
|
+
edge = rb_ary_new_capa(1);
|
2259
|
+
rb_ary_push(edge, DBL2NUM(0.0));
|
2260
|
+
return edge;
|
2261
|
+
}
|
2262
|
+
|
2263
|
+
minmax = rb_funcall(ary, rb_intern("minmax"), 0);
|
2264
|
+
lo = NUM2DBL(RARRAY_AREF(minmax, 0));
|
2265
|
+
hi = NUM2DBL(RARRAY_AREF(minmax, 1));
|
2266
|
+
|
2267
|
+
edge = ary_histogram_calculate_edge_lo_hi(lo, hi, nbins, left_p);
|
2268
|
+
|
2269
|
+
return edge;
|
2270
|
+
}
|
2271
|
+
|
2272
|
+
/* call-seq:
|
2273
|
+
* ary.histogram(nbins=:auto, closed: :left)
|
2274
|
+
*
|
2275
|
+
* @param [Integer] nbins The approximate number of bins
|
2276
|
+
* @param [:left, :right] closed
|
2277
|
+
* If :left (the default), the bin interval are left-closed.
|
2278
|
+
* If :right, the bin interval are right-closed.
|
2279
|
+
*
|
2280
|
+
* @return [EnumerableStatistics::Histogram] The histogram struct.
|
2281
|
+
*/
|
2282
|
+
static VALUE
|
2283
|
+
ary_histogram(int argc, VALUE *argv, VALUE ary)
|
2284
|
+
{
|
2285
|
+
VALUE arg0, opts, edge, weights;
|
2286
|
+
int left_p;
|
2287
|
+
long nbins;
|
2288
|
+
|
2289
|
+
rb_scan_args(argc, argv, "01:", &arg0, &opts);
|
2290
|
+
if (NIL_P(arg0)) {
|
2291
|
+
nbins = sturges(RARRAY_LEN(ary));
|
2292
|
+
}
|
2293
|
+
else {
|
2294
|
+
nbins = NUM2LONG(arg0);
|
2295
|
+
}
|
2296
|
+
left_p = opt_closed_left_p(opts);
|
2297
|
+
|
2298
|
+
edge = ary_histogram_calculate_edge(ary, nbins, left_p);
|
2299
|
+
weights = rb_ary_new_capa(RARRAY_LEN(edge) - 1);
|
2300
|
+
histogram_weights_push_values(weights, edge, ary, left_p);
|
2301
|
+
|
2302
|
+
return rb_struct_new(cHistogram, edge, weights,
|
2303
|
+
left_p ? sym_left : sym_right,
|
2304
|
+
Qfalse);
|
2305
|
+
}
|
2306
|
+
|
1490
2307
|
void
|
1491
2308
|
Init_extension(void)
|
1492
2309
|
{
|
2310
|
+
VALUE mEnumerableStatistics;
|
2311
|
+
|
1493
2312
|
#ifndef HAVE_ENUM_SUM
|
1494
2313
|
rb_define_method(rb_mEnumerable, "sum", enum_sum, -1);
|
1495
2314
|
#endif
|
@@ -1499,6 +2318,7 @@ Init_extension(void)
|
|
1499
2318
|
rb_define_method(rb_mEnumerable, "variance", enum_variance, -1);
|
1500
2319
|
rb_define_method(rb_mEnumerable, "mean_stdev", enum_mean_stdev, -1);
|
1501
2320
|
rb_define_method(rb_mEnumerable, "stdev", enum_stdev, -1);
|
2321
|
+
rb_define_method(rb_mEnumerable, "value_counts", enum_value_counts, -1);
|
1502
2322
|
|
1503
2323
|
#ifndef HAVE_ARRAY_SUM
|
1504
2324
|
rb_define_method(rb_cArray, "sum", ary_sum, -1);
|
@@ -1508,10 +2328,20 @@ Init_extension(void)
|
|
1508
2328
|
rb_define_method(rb_cArray, "variance", ary_variance, -1);
|
1509
2329
|
rb_define_method(rb_cArray, "mean_stdev", ary_mean_stdev, -1);
|
1510
2330
|
rb_define_method(rb_cArray, "stdev", ary_stdev, -1);
|
2331
|
+
rb_define_method(rb_cArray, "percentile", ary_percentile, 1);
|
2332
|
+
rb_define_method(rb_cArray, "median", ary_median, 0);
|
2333
|
+
rb_define_method(rb_cArray, "value_counts", ary_value_counts, -1);
|
2334
|
+
|
2335
|
+
rb_define_method(rb_cHash, "value_counts", hash_value_counts, -1);
|
1511
2336
|
|
1512
2337
|
half_in_rational = nurat_s_new_internal(rb_cRational, INT2FIX(1), INT2FIX(2));
|
1513
2338
|
rb_gc_register_mark_object(half_in_rational);
|
1514
2339
|
|
2340
|
+
mEnumerableStatistics = rb_const_get_at(rb_cObject, rb_intern("EnumerableStatistics"));
|
2341
|
+
cHistogram = rb_const_get_at(mEnumerableStatistics, rb_intern("Histogram"));
|
2342
|
+
|
2343
|
+
rb_define_method(rb_cArray, "histogram", ary_histogram, -1);
|
2344
|
+
|
1515
2345
|
idPLUS = '+';
|
1516
2346
|
idMINUS = '-';
|
1517
2347
|
idSTAR = '*';
|
@@ -1523,8 +2353,14 @@ Init_extension(void)
|
|
1523
2353
|
id_negate = rb_intern("-@");
|
1524
2354
|
id_to_f = rb_intern("to_f");
|
1525
2355
|
id_cmp = rb_intern("<=>");
|
2356
|
+
id_nan_p = rb_intern("nan?");
|
1526
2357
|
id_each = rb_intern("each");
|
1527
2358
|
id_real_p = rb_intern("real?");
|
1528
2359
|
id_sum = rb_intern("sum");
|
1529
2360
|
id_population = rb_intern("population");
|
2361
|
+
id_closed = rb_intern("closed");
|
2362
|
+
id_edge = rb_intern("edge");
|
2363
|
+
|
2364
|
+
sym_left = ID2SYM(rb_intern("left"));
|
2365
|
+
sym_right = ID2SYM(rb_intern("right"));
|
1530
2366
|
}
|