distribution 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +9 -0
- data/Manifest.txt +12 -0
- data/README.txt +24 -7
- data/Rakefile +2 -0
- data/benchmark/binomial_coefficient.rb +55 -0
- data/benchmark/factorial_method.rb +4 -1
- data/benchmark/odd.rb +22 -0
- data/lib/distribution.rb +11 -7
- data/lib/distribution/binomial.rb +22 -20
- data/lib/distribution/binomial/gsl.rb +14 -0
- data/lib/distribution/binomial/java.rb +9 -0
- data/lib/distribution/binomial/ruby.rb +26 -0
- data/lib/distribution/bivariatenormal.rb +7 -5
- data/lib/distribution/bivariatenormal/java.rb +9 -0
- data/lib/distribution/chisquare.rb +1 -0
- data/lib/distribution/chisquare/gsl.rb +3 -3
- data/lib/distribution/chisquare/java.rb +9 -0
- data/lib/distribution/f.rb +1 -0
- data/lib/distribution/f/gsl.rb +3 -3
- data/lib/distribution/f/java.rb +9 -0
- data/lib/distribution/hypergeometric.rb +2 -0
- data/lib/distribution/hypergeometric/gsl.rb +2 -2
- data/lib/distribution/hypergeometric/java.rb +9 -0
- data/lib/distribution/hypergeometric/ruby.rb +24 -27
- data/lib/distribution/math_extension.rb +90 -8
- data/lib/distribution/normal.rb +1 -0
- data/lib/distribution/normal/java.rb +9 -0
- data/lib/distribution/t.rb +2 -0
- data/lib/distribution/t/java.rb +9 -0
- data/lib/distribution/t/statistics2.rb +1 -1
- data/spec/binomial_spec.rb +118 -0
- data/spec/distribution_spec.rb +1 -1
- data/spec/f_spec.rb +1 -1
- data/spec/hypergeometric_spec.rb +49 -13
- data/spec/math_extension_spec.rb +57 -9
- data/spec/normal_spec.rb +7 -3
- data/spec/shorthand_spec.rb +16 -2
- data/spec/t_spec.rb +1 -1
- metadata +44 -5
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
=== 0.3.0 / 2011-01-28
|
2
|
+
|
3
|
+
* Included support for binomial distribution. p_value is not accurate
|
4
|
+
* Included alias for ruby exact methods on discrete distributions, when they are available
|
5
|
+
* Works on Jruby and Ruby 1.8.7
|
6
|
+
* Binomial coefficient optimized. Falling factorial method with Swing Prime on factorial provides a 10x improvement over naive version.
|
7
|
+
* Working on binomial coefficient using gamma. The same problem as using Sterling: faster than exact version, but when it requires BigDecimal, is 2-3x slower
|
8
|
+
* Deleted Fixnum extension on Hypergeometric
|
9
|
+
|
1
10
|
=== 0.2.0 / 2011-01-27
|
2
11
|
* Shorthands for F and T are 'fdist' and 'tdist' now, to avoid confussions
|
3
12
|
* Added Hypergeometric distribution with pdf, cdf, p_value. The Ruby version uses a very slow factorial process, but also has pdf_with_stirling if you don't care about accuracy at all. This latter method needs to be improved, actually, in terms of which choose function it calls for the numerator and denominator. Once that's done, it can replace the slower pdf function. [John Woods]
|
data/Manifest.txt
CHANGED
@@ -3,35 +3,47 @@ History.txt
|
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
5
|
Rakefile
|
6
|
+
benchmark/binomial_coefficient.rb
|
6
7
|
benchmark/factorial_method.rb
|
8
|
+
benchmark/odd.rb
|
7
9
|
bin/distribution
|
8
10
|
lib/distribution.rb
|
9
11
|
lib/distribution/binomial.rb
|
12
|
+
lib/distribution/binomial/gsl.rb
|
13
|
+
lib/distribution/binomial/java.rb
|
14
|
+
lib/distribution/binomial/ruby.rb
|
10
15
|
lib/distribution/bivariatenormal.rb
|
11
16
|
lib/distribution/bivariatenormal/gsl.rb
|
17
|
+
lib/distribution/bivariatenormal/java.rb
|
12
18
|
lib/distribution/bivariatenormal/ruby.rb
|
13
19
|
lib/distribution/bivariatenormal/statistics2.rb
|
14
20
|
lib/distribution/chisquare.rb
|
15
21
|
lib/distribution/chisquare/gsl.rb
|
22
|
+
lib/distribution/chisquare/java.rb
|
16
23
|
lib/distribution/chisquare/ruby.rb
|
17
24
|
lib/distribution/chisquare/statistics2.rb
|
18
25
|
lib/distribution/f.rb
|
19
26
|
lib/distribution/f/gsl.rb
|
27
|
+
lib/distribution/f/java.rb
|
20
28
|
lib/distribution/f/ruby.rb
|
21
29
|
lib/distribution/f/statistics2.rb
|
22
30
|
lib/distribution/hypergeometric.rb
|
23
31
|
lib/distribution/hypergeometric/gsl.rb
|
32
|
+
lib/distribution/hypergeometric/java.rb
|
24
33
|
lib/distribution/hypergeometric/ruby.rb
|
25
34
|
lib/distribution/math_extension.rb
|
26
35
|
lib/distribution/normal.rb
|
27
36
|
lib/distribution/normal/gsl.rb
|
37
|
+
lib/distribution/normal/java.rb
|
28
38
|
lib/distribution/normal/ruby.rb
|
29
39
|
lib/distribution/normal/statistics2.rb
|
30
40
|
lib/distribution/normalmultivariate.rb
|
31
41
|
lib/distribution/t.rb
|
32
42
|
lib/distribution/t/gsl.rb
|
43
|
+
lib/distribution/t/java.rb
|
33
44
|
lib/distribution/t/ruby.rb
|
34
45
|
lib/distribution/t/statistics2.rb
|
46
|
+
spec/binomial_spec.rb
|
35
47
|
spec/bivariatenormal_spec.rb
|
36
48
|
spec/chisquare_spec.rb
|
37
49
|
spec/distribution_spec.rb
|
data/README.txt
CHANGED
@@ -11,17 +11,26 @@ Includes code from statistics2
|
|
11
11
|
|
12
12
|
== FEATURES/PROBLEMS:
|
13
13
|
|
14
|
-
*
|
15
|
-
*
|
14
|
+
* Very fast ruby 1.8.7/1.9.+ implementation, with improved method to calculate factorials and others common functions
|
15
|
+
* All methods tested on several ranges. See spec/
|
16
|
+
* On Jruby, BivariateNormal returns incorrect pdf
|
16
17
|
|
17
18
|
== API structure
|
18
19
|
|
19
20
|
Distribution::<name>.(cdf|pdf|p_value|rng)
|
20
21
|
|
22
|
+
On discrete distributions, exact Ruby implementations of pdf, cdf and p_value could be provided, using
|
23
|
+
|
24
|
+
Distribution::<name>.exact_(cdf|pdf|p_value)
|
25
|
+
|
21
26
|
module Distribution::Shorthand provides (you guess?) shortands method to call all methods
|
22
27
|
|
23
28
|
<Distribution shortname>_(cdf|pdf|p|r)
|
24
29
|
|
30
|
+
On discrete distributions, exact cdf, pdf and p_value are
|
31
|
+
|
32
|
+
<Distribution shortname>_(ecdf|epdf|ep)
|
33
|
+
|
25
34
|
Shortnames are:
|
26
35
|
|
27
36
|
* Normal: norm
|
@@ -29,14 +38,17 @@ Shortnames are:
|
|
29
38
|
* T: tdist
|
30
39
|
* F: fdist
|
31
40
|
* Chi Square: chisq
|
41
|
+
* Binomial: bino
|
32
42
|
* Hypergeometric: hypg
|
43
|
+
|
33
44
|
For example
|
34
45
|
|
35
|
-
Distribution::T.
|
46
|
+
Distribution::T.cdf
|
36
47
|
|
37
48
|
could be called after including Distribution::Shorthand
|
38
|
-
|
39
|
-
|
49
|
+
|
50
|
+
tdist_cdf
|
51
|
+
|
40
52
|
|
41
53
|
== SYNOPSIS:
|
42
54
|
# Returns Gaussian PDF for x
|
@@ -48,15 +60,20 @@ could be called after including Distribution::Shorthand
|
|
48
60
|
|
49
61
|
== REQUIREMENTS:
|
50
62
|
|
51
|
-
I try to provide a Ruby version for each method.
|
63
|
+
I try to provide a Ruby version for each method. To increase (notably!) the speed, please install
|
52
64
|
|
53
65
|
* Ruby 1.8-1.9: gsl or statistics2
|
54
|
-
* Java: Apache Math
|
66
|
+
* Java: Apache Math (not yet implemented)
|
55
67
|
|
56
68
|
== INSTALL:
|
57
69
|
|
58
70
|
gem install distribution
|
59
71
|
|
72
|
+
To speep up
|
73
|
+
|
74
|
+
gem install gsl
|
75
|
+
gem install statistics
|
76
|
+
|
60
77
|
== DEVELOPERS:
|
61
78
|
|
62
79
|
After checking out the source, run:
|
data/Rakefile
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+"/../lib")
|
2
|
+
require 'distribution'
|
3
|
+
require 'bench_press'
|
4
|
+
|
5
|
+
extend BenchPress
|
6
|
+
|
7
|
+
name 'binomial coefficient: multiplicative, factorial and optimized factorial methods'
|
8
|
+
author 'Claudio Bustos'
|
9
|
+
date '2011-01-27'
|
10
|
+
summary "Exact calculation of Binomial Coefficient could be obtained using multiplicative, pure factorial or optimized factorial algorithm.
|
11
|
+
Which one is faster?
|
12
|
+
|
13
|
+
Lower k is the best for all
|
14
|
+
k=n/2 is the worst case for optimized algorithm
|
15
|
+
k near n is the worst for multiplicative
|
16
|
+
|
17
|
+
The factorial method uses the fastest Swing Prime Algorithm."
|
18
|
+
|
19
|
+
reps 10 #number of repetitions
|
20
|
+
|
21
|
+
x=100
|
22
|
+
|
23
|
+
n=100
|
24
|
+
k=50
|
25
|
+
|
26
|
+
samples=10.times.map {|i| 2**(i+1)}
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
measure "Multiplicative" do
|
31
|
+
samples.each do |n|
|
32
|
+
[5,n/2,n-1].each do |k|
|
33
|
+
k=[k,n-k].min
|
34
|
+
(1..k).inject(1) {|ac, i| (ac*(n-k+i).quo(i))}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
measure "Factorial" do
|
40
|
+
samples.each do |n|
|
41
|
+
[5,n/2,n-1].each do |k|
|
42
|
+
k=[k,n-k].min
|
43
|
+
Math.factorial(n).quo(Math.factorial(k) * Math.factorial(n - k))
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
measure "Optimized Factorial" do
|
49
|
+
samples.each do |n|
|
50
|
+
[5,n/2,n-1].each do |k|
|
51
|
+
k=[k,n-k].min
|
52
|
+
(((n-k+1)..n).inject(1) {|ac,v| ac * v}).quo(Math.factorial(k))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -8,7 +8,9 @@ name 'aprox vs exact factorization method'
|
|
8
8
|
author 'Claudio Bustos'
|
9
9
|
date '2011-01-27'
|
10
10
|
summary "
|
11
|
-
Factorization requires a lot of processing, so approximation method
|
11
|
+
Factorization requires a lot of processing, so approximation method could be required. But for greats value, bigdecimal are required and things start to get harder.
|
12
|
+
* Approximation (fast_factorial): Luschny f.3
|
13
|
+
* Exact (factorial): Luschny Swing Prime
|
12
14
|
"
|
13
15
|
|
14
16
|
reps 10 #number of repetitions
|
@@ -20,6 +22,7 @@ measure "Math.factorial(#{x})" do
|
|
20
22
|
end
|
21
23
|
|
22
24
|
measure "Math.fast_factorial(#{x})" do
|
25
|
+
|
23
26
|
Math.fast_factorial(x)
|
24
27
|
end
|
25
28
|
|
data/benchmark/odd.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
require 'bench_press'
|
3
|
+
|
4
|
+
extend BenchPress
|
5
|
+
|
6
|
+
name 'n&1==1 vs n%2==1 to detect odd numbers'
|
7
|
+
author 'Claudio Bustos'
|
8
|
+
date '2011-01-28'
|
9
|
+
summary "
|
10
|
+
Which is faster, n%1==1 or n%2==1
|
11
|
+
"
|
12
|
+
|
13
|
+
reps 10_000 #number of repetitions
|
14
|
+
n=100000
|
15
|
+
measure "Using &" do
|
16
|
+
n%1==1
|
17
|
+
end
|
18
|
+
|
19
|
+
measure "Using %" do
|
20
|
+
n%2==1
|
21
|
+
end
|
22
|
+
|
data/lib/distribution.rb
CHANGED
@@ -39,6 +39,7 @@ end
|
|
39
39
|
end
|
40
40
|
require 'distribution/math_extension'
|
41
41
|
|
42
|
+
|
42
43
|
# Several distributions modules to calculate pdf, cdf, inverse cdf and generate
|
43
44
|
# pseudo-random numbers for several statistical distributions
|
44
45
|
#
|
@@ -48,15 +49,15 @@ require 'distribution/math_extension'
|
|
48
49
|
# Distribution::Normal.p_value(0.95)
|
49
50
|
# => 1.64485364660836
|
50
51
|
module Distribution
|
51
|
-
VERSION="0.
|
52
|
+
VERSION="0.3.0"
|
52
53
|
|
53
54
|
module Shorthand
|
54
|
-
EQUIVALENCES={:p_value=>:p, :cdf=>:cdf, :pdf=>:pdf, :rng=>:r}
|
55
|
-
def self.add_shortcut(sh,m
|
55
|
+
EQUIVALENCES={:p_value=>:p, :cdf=>:cdf, :pdf=>:pdf, :rng=>:r, :exact_pdf=>:epdf, :exact_cdf=>:ecdf, :exact_p_value=>:ep}
|
56
|
+
def self.add_shortcut(sh,m, &block)
|
56
57
|
if EQUIVALENCES.include? m.to_sym
|
57
58
|
sh_name=sh+"_#{m}"
|
58
59
|
define_method(sh_name,&block)
|
59
|
-
sh_name=sh+"_#{EQUIVALENCES[m]}"
|
60
|
+
sh_name=sh+"_#{EQUIVALENCES[m.to_sym]}"
|
60
61
|
define_method(sh_name,&block)
|
61
62
|
|
62
63
|
end
|
@@ -107,9 +108,10 @@ module Distribution
|
|
107
108
|
#
|
108
109
|
# Kids: Metaprogramming trickery! Don't do at work.
|
109
110
|
# This section was created between a very long reunion
|
110
|
-
# and a
|
111
|
-
def create_distribution_methods()
|
112
|
-
|
111
|
+
# and a 456 Km. travel
|
112
|
+
def create_distribution_methods()
|
113
|
+
|
114
|
+
Distribution.libraries_order.each do |l_name|
|
113
115
|
if const_defined? l_name
|
114
116
|
l =const_get(l_name)
|
115
117
|
# Add methods from engine to base base, if not yet included
|
@@ -141,6 +143,8 @@ module Distribution
|
|
141
143
|
autoload(:T, 'distribution/t')
|
142
144
|
autoload(:F, 'distribution/f')
|
143
145
|
autoload(:BivariateNormal, 'distribution/bivariatenormal')
|
146
|
+
autoload(:Binomial, 'distribution/binomial')
|
147
|
+
|
144
148
|
autoload(:Hypergeometric, 'distribution/hypergeometric')
|
145
149
|
end
|
146
150
|
|
@@ -1,26 +1,28 @@
|
|
1
1
|
require 'distribution/binomial/ruby'
|
2
2
|
require 'distribution/binomial/gsl'
|
3
|
+
require 'distribution/binomial/java'
|
3
4
|
module Distribution
|
4
5
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
##
|
12
|
-
# :singleton-method: pdf(x,k)
|
13
|
-
# Returns the integral of T distribution
|
14
|
-
# with +k+ degrees of freedom over [0, +x+]
|
15
|
-
|
16
|
-
##
|
17
|
-
# :singleton-method: p_value(qn, k)
|
18
|
-
# Return the P-value of the corresponding integral +qn+ with
|
19
|
-
# +k+ degrees of freedom
|
6
|
+
# Calculate statisticals for T Distribution.
|
7
|
+
module Binomial
|
8
|
+
SHORTHAND = 'bino'
|
9
|
+
|
10
|
+
extend Distributable
|
11
|
+
create_distribution_methods
|
20
12
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
13
|
+
##
|
14
|
+
# :singleton-method: pdf(x,k)
|
15
|
+
# Returns the integral of T distribution
|
16
|
+
# with +k+ degrees of freedom over [0, +x+]
|
17
|
+
|
18
|
+
##
|
19
|
+
# :singleton-method: p_value(qn, k)
|
20
|
+
# Return the P-value of the corresponding integral +qn+ with
|
21
|
+
# +k+ degrees of freedom
|
22
|
+
|
23
|
+
##
|
24
|
+
# :singleton-method: cdf(x,k)
|
25
|
+
# Returns the integral of T distribution
|
26
|
+
# with +k+ degrees of freedom over [0, +x+]
|
27
|
+
end
|
26
28
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Distribution
|
2
|
+
module Binomial
|
3
|
+
module Ruby_
|
4
|
+
class << self
|
5
|
+
def pdf(k,n,pr)
|
6
|
+
Math.binomial_coefficient(n,k)*(pr**k)*(1-pr)**(n-k)
|
7
|
+
end
|
8
|
+
def cdf(k,n,pr)
|
9
|
+
#(0..x.floor).inject(0) {|ac,i| ac+pdf(i,n,pr)}
|
10
|
+
Math.regularized_beta_function(1-pr,n - k,k+1)
|
11
|
+
end
|
12
|
+
def p_value(prob,n,pr)
|
13
|
+
ac=0
|
14
|
+
(0..n).each do |i|
|
15
|
+
ac+=pdf(i,n,pr)
|
16
|
+
return i if ac>=prob
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
alias :exact_pdf :pdf
|
21
|
+
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'distribution/bivariatenormal/ruby'
|
2
2
|
require 'distribution/bivariatenormal/gsl'
|
3
|
+
require 'distribution/bivariatenormal/java'
|
3
4
|
module Distribution
|
4
5
|
# Calculate pdf and cdf for bivariate normal distribution.
|
5
6
|
#
|
@@ -12,14 +13,15 @@ module Distribution
|
|
12
13
|
create_distribution_methods
|
13
14
|
|
14
15
|
##
|
15
|
-
# :singleton-method: pdf(
|
16
|
-
# Probability density function for
|
16
|
+
# :singleton-method: pdf(k,n,prob)
|
17
|
+
# Probability density function for exactly +k+ successes in +n+ trials
|
18
|
+
# with success probability +prob+
|
17
19
|
#
|
18
20
|
|
19
21
|
##
|
20
|
-
# :singleton-method: cdf(
|
21
|
-
#
|
22
|
-
#
|
22
|
+
# :singleton-method: cdf(k,n,prob)
|
23
|
+
# Cumulative density function for +k+ or less successes in +n+ trials
|
24
|
+
# with success probability +prob+
|
23
25
|
|
24
26
|
end
|
25
27
|
end
|
@@ -6,12 +6,12 @@ module Distribution
|
|
6
6
|
|
7
7
|
end
|
8
8
|
def pdf(x,k)
|
9
|
-
GSL::Ran::chisq_pdf(x,k)
|
9
|
+
GSL::Ran::chisq_pdf(x.to_f,k.to_i)
|
10
10
|
end
|
11
11
|
# Return the P-value of the corresponding integral with
|
12
12
|
# k degrees of freedom
|
13
13
|
def p_value(pr,k)
|
14
|
-
GSL::Cdf::chisq_Pinv(pr,k)
|
14
|
+
GSL::Cdf::chisq_Pinv(pr.to_f,k.to_i)
|
15
15
|
end
|
16
16
|
# Chi-square cumulative distribution function (cdf).
|
17
17
|
#
|
@@ -19,7 +19,7 @@ module Distribution
|
|
19
19
|
# with k degrees of freedom over [0, x]
|
20
20
|
#
|
21
21
|
def cdf(x, k)
|
22
|
-
GSL::Cdf::chisq_P(x,k)
|
22
|
+
GSL::Cdf::chisq_P(x.to_f,k.to_i)
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|