distribution 0.6.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +13 -0
  4. data/.yardopts +5 -0
  5. data/Gemfile +5 -0
  6. data/History.txt +24 -8
  7. data/LICENCE.md +26 -0
  8. data/README.md +155 -0
  9. data/Rakefile +15 -19
  10. data/benchmark/binomial_coefficient.rb +19 -23
  11. data/benchmark/binomial_coefficient/experiment.rb +33 -36
  12. data/benchmark/factorial_hash.rb +7 -8
  13. data/benchmark/factorial_method.rb +4 -6
  14. data/benchmark/odd.rb +6 -7
  15. data/benchmark/power.rb +11 -11
  16. data/bin/distribution +26 -26
  17. data/data/template/spec.erb +7 -6
  18. data/distribution.gemspec +25 -0
  19. data/lib/distribution.rb +79 -124
  20. data/lib/distribution/beta.rb +6 -8
  21. data/lib/distribution/beta/gsl.rb +14 -9
  22. data/lib/distribution/beta/java.rb +1 -1
  23. data/lib/distribution/beta/ruby.rb +41 -7
  24. data/lib/distribution/binomial.rb +10 -11
  25. data/lib/distribution/binomial/gsl.rb +6 -5
  26. data/lib/distribution/binomial/java.rb +1 -1
  27. data/lib/distribution/binomial/ruby.rb +22 -15
  28. data/lib/distribution/bivariatenormal.rb +4 -5
  29. data/lib/distribution/bivariatenormal/gsl.rb +2 -2
  30. data/lib/distribution/bivariatenormal/java.rb +1 -1
  31. data/lib/distribution/bivariatenormal/ruby.rb +245 -254
  32. data/lib/distribution/chisquare.rb +8 -10
  33. data/lib/distribution/chisquare/gsl.rb +24 -19
  34. data/lib/distribution/chisquare/java.rb +1 -1
  35. data/lib/distribution/chisquare/ruby.rb +60 -55
  36. data/lib/distribution/chisquare/statistics2.rb +16 -13
  37. data/lib/distribution/distributable.rb +40 -0
  38. data/lib/distribution/exponential.rb +4 -5
  39. data/lib/distribution/exponential/gsl.rb +13 -9
  40. data/lib/distribution/exponential/ruby.rb +17 -11
  41. data/lib/distribution/f.rb +10 -11
  42. data/lib/distribution/f/gsl.rb +26 -22
  43. data/lib/distribution/f/java.rb +1 -1
  44. data/lib/distribution/f/ruby.rb +104 -105
  45. data/lib/distribution/f/statistics2.rb +22 -19
  46. data/lib/distribution/gamma.rb +5 -7
  47. data/lib/distribution/gamma/gsl.rb +13 -9
  48. data/lib/distribution/gamma/java.rb +1 -1
  49. data/lib/distribution/gamma/ruby.rb +5 -11
  50. data/lib/distribution/hypergeometric.rb +5 -8
  51. data/lib/distribution/hypergeometric/gsl.rb +5 -6
  52. data/lib/distribution/hypergeometric/java.rb +1 -1
  53. data/lib/distribution/hypergeometric/ruby.rb +34 -35
  54. data/lib/distribution/logistic.rb +6 -9
  55. data/lib/distribution/logistic/ruby.rb +14 -9
  56. data/lib/distribution/lognormal.rb +37 -0
  57. data/lib/distribution/lognormal/gsl.rb +21 -0
  58. data/lib/distribution/lognormal/ruby.rb +16 -0
  59. data/lib/distribution/math_extension.rb +187 -231
  60. data/lib/distribution/math_extension/chebyshev_series.rb +281 -272
  61. data/lib/distribution/math_extension/erfc.rb +28 -31
  62. data/lib/distribution/math_extension/exponential_integral.rb +17 -17
  63. data/lib/distribution/math_extension/gammastar.rb +19 -20
  64. data/lib/distribution/math_extension/gsl_utilities.rb +12 -12
  65. data/lib/distribution/math_extension/incomplete_beta.rb +52 -61
  66. data/lib/distribution/math_extension/incomplete_gamma.rb +166 -168
  67. data/lib/distribution/math_extension/log_utilities.rb +20 -22
  68. data/lib/distribution/normal.rb +11 -13
  69. data/lib/distribution/normal/gsl.rb +13 -10
  70. data/lib/distribution/normal/java.rb +30 -1
  71. data/lib/distribution/normal/ruby.rb +69 -59
  72. data/lib/distribution/normal/statistics2.rb +5 -2
  73. data/lib/distribution/normalmultivariate.rb +64 -64
  74. data/lib/distribution/poisson.rb +12 -14
  75. data/lib/distribution/poisson/gsl.rb +7 -7
  76. data/lib/distribution/poisson/java.rb +26 -0
  77. data/lib/distribution/poisson/ruby.rb +38 -9
  78. data/lib/distribution/shorthand.rb +17 -0
  79. data/lib/distribution/t.rb +16 -16
  80. data/lib/distribution/t/gsl.rb +27 -24
  81. data/lib/distribution/t/java.rb +1 -1
  82. data/lib/distribution/t/ruby.rb +99 -100
  83. data/lib/distribution/t/statistics2.rb +19 -19
  84. data/lib/distribution/uniform.rb +26 -0
  85. data/lib/distribution/uniform/gsl.rb +36 -0
  86. data/lib/distribution/uniform/ruby.rb +91 -0
  87. data/lib/distribution/version.rb +3 -0
  88. data/lib/distribution/weibull.rb +10 -0
  89. data/lib/distribution/weibull/gsl.rb +21 -0
  90. data/lib/distribution/weibull/ruby.rb +34 -0
  91. data/spec/beta_spec.rb +48 -50
  92. data/spec/binomial_spec.rb +80 -84
  93. data/spec/bivariatenormal_spec.rb +28 -35
  94. data/spec/chisquare_spec.rb +49 -52
  95. data/spec/distribution_spec.rb +11 -11
  96. data/spec/exponential_spec.rb +48 -39
  97. data/spec/f_spec.rb +73 -71
  98. data/spec/gamma_spec.rb +50 -53
  99. data/spec/hypergeometric_spec.rb +63 -69
  100. data/spec/logistic_spec.rb +31 -37
  101. data/spec/lognormal_spec.rb +54 -0
  102. data/spec/math_extension_spec.rb +192 -209
  103. data/spec/normal_spec.rb +80 -73
  104. data/spec/poisson_spec.rb +78 -36
  105. data/spec/shorthand_spec.rb +19 -22
  106. data/spec/spec_helper.rb +31 -6
  107. data/spec/t_spec.rb +63 -77
  108. data/spec/uniform_spec.rb +154 -0
  109. data/spec/weibull_spec.rb +17 -0
  110. data/vendor/java/commons-math-2.2.jar +0 -0
  111. metadata +91 -111
  112. data.tar.gz.sig +0 -0
  113. data/.autotest +0 -23
  114. data/.gemtest +0 -0
  115. data/Manifest.txt +0 -95
  116. data/README.txt +0 -100
  117. metadata.gz.sig +0 -0
@@ -1,27 +1,31 @@
1
1
  module Distribution
2
2
  module F
3
- module GSL_
4
- class << self
5
- def pdf(x,k1,k2)
6
- GSL::Ran.fdist_pdf(x.to_f,k1,k2)
7
- end
8
- # Return the P-value of the corresponding integral with
9
- # k degrees of freedom
10
- #
11
- # Distribution::F.p_value(0.95,1,2)
12
- def p_value(pr,k1,k2)
13
- GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
14
- end
15
- # F cumulative distribution function (cdf).
16
- #
17
- # Returns the integral of F-distribution
18
- # with k1 and k2 degrees of freedom
19
- # over [0, x].
20
- # Distribution::F.cdf(20,3,2)
21
- #
22
- def cdf(x, k1, k2)
23
- GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
24
- end
3
+ module GSL_
4
+ class << self
5
+ def pdf(x, k1, k2)
6
+ GSL::Ran.fdist_pdf(x.to_f, k1, k2)
7
+ end
8
+
9
+ # F cumulative distribution function (cdf).
10
+ #
11
+ # Returns the integral of F-distribution
12
+ # with k1 and k2 degrees of freedom
13
+ # over [0, x].
14
+ # Distribution::F.cdf(20,3,2)
15
+ #
16
+ def cdf(x, k1, k2)
17
+ GSL::Cdf.fdist_P(x.to_f.to_f, k1, k2)
18
+ end
19
+
20
+ # Return the P-value of the corresponding integral with
21
+ # k degrees of freedom
22
+ #
23
+ # Distribution::F.p_value(0.95,1,2)
24
+ def quantile(pr, k1, k2)
25
+ GSL::Cdf.fdist_Pinv(pr.to_f, k1, k2)
26
+ end
27
+
28
+ alias_method :p_value, :quantile
25
29
  end
26
30
  end
27
31
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -1,116 +1,115 @@
1
1
  module Distribution
2
2
  module F
3
+ # Continuous random number distributions are defined by a probability density function, p(x), such that the probability of x occurring in the infinitesimal range x to x+dx is p dx.
4
+
5
+ # The cumulative distribution function for the lower tail P(x) is defined by the integral,
6
+
7
+ # P(x) = \int_{-\infty}^{x} dx' p(x')
8
+ # and gives the probability of a variate taking a value less than x.
9
+
10
+ # The cumulative distribution function for the upper tail Q(x) is defined by the integral,
11
+
12
+ # Q(x) = \int_{x}^{+\infty} dx' p(x')
13
+ # and gives the probability of a variate taking a value greater than x.
14
+
15
+ # The upper and lower cumulative distribution functions are related by P(x) + Q(x) = 1 and satisfy 0 <= P(x) <= 1, 0 <= Q(x).
3
16
  module Ruby_
4
- class << self
17
+ extend Distribution::MathExtension
5
18
 
6
- def c_pdf(f,df)
7
- Distribution::ChiSquare.pdf(f,df)
8
- end
9
- def pdf(x,d1,d2)
10
- Math.sqrt(((d1*x)**d1*(d2**d2)).quo((d1*x+d2)**(d1+d2))).quo( x*Math.beta(d1/2.0, d2/2.0))
11
- end
12
- # F-distribution ([1])
13
- # Integral over [x, \infty)
14
- def q_f(df1, df2, f)
15
- if (f <= 0.0) then return 1.0; end
16
- if (df1 % 2 != 0 && df2 % 2 == 0)
17
- return 1.0 - q_f(df2, df1, 1.0 / f)
18
- end
19
- cos2 = 1.0 / (1.0 + df1.to_f * f / df2.to_f)
20
- sin2 = 1.0 - cos2
21
-
22
- if (df1 % 2 == 0)
23
- prob = cos2 ** (df2.to_f / 2.0)
24
- temp = prob
25
- i = 2
26
- while i < df1
27
- temp *= (df2.to_f + i - 2) * sin2 / i
28
- prob += temp
29
- i += 2
30
- end
31
- return prob
32
- end
33
- prob = Math.atan(Math.sqrt(df2.to_f / (df1.to_f * f)))
34
- temp = Math.sqrt(sin2 * cos2)
35
- i = 3
36
- while i <= df1
37
- prob += temp
38
- temp *= (i - 1).to_f * sin2 / i.to_f;
39
- i += 2.0
40
- end
41
- temp *= df1.to_f
42
- i = 3
43
- while i <= df2
44
- prob -= temp
45
- temp *= (df1.to_f + i - 2) * cos2 / i.to_f
46
- i += 2
47
- end
48
- prob * 2.0 / Math::PI
49
- end
19
+ # functions needed:
20
+ # - pdf
21
+ # - cdf (lower cumulative function, P(x))
22
+ # - Q(x), upper cumulative function
23
+ # - mean
24
+ # - mode
25
+ # - kurtosis
26
+ # - skewness
27
+ # - entropy
28
+ # - "fit" (maximum likelihood?)
29
+ # - expected value (given a function)
30
+ # - lower-tail quantile -> P
31
+ # - upper tail quantile -> Q
50
32
 
51
- # inverse of F-distribution ([2])
52
- def pfsub(x, y, z)
53
- (Math.sqrt(z) - y) / x / 2.0
54
- end
33
+ class << self
34
+ # F Distribution (Ruby) -- Probability Density Function
35
+ def pdf(x, n, m)
36
+ x = x.to_f
37
+ numerator = ((n * x)**n * (m**m)) / (n * x + m)**(n + m)
38
+ denominator = x * Math.beta(n / 2.0, m / 2.0)
55
39
 
56
- # Inverse CDF
57
- # [x, \infty)
58
- def pf(q, n1, n2)
59
- if(q < 0.0 || q > 1.0 || n1 < 1 || n2 < 1)
60
- $stderr.printf("Error : Illegal parameter in pf()!\n")
61
- return 0.0
62
- end
40
+ Math.sqrt(numerator) / denominator
41
+ end
63
42
 
64
- if n1 <= 240 || n2 <= 240
65
- eps = 1.0e-5
66
- if(n2 == 1) then eps = 1.0e-4 end
67
- fw = 0.0
68
- s = 1000.0
69
- loop do
70
- fw += s
71
- if s <= eps then return fw end
72
- if (qe = q_f(n1, n2, fw) - q) == 0.0 then return fw end
73
- if qe < 0.0
74
- fw -= s
75
- s /= 10.0 #/
76
- end
77
- end
78
- end
43
+ # Cumulative Distribution Function.
44
+ def cdf(x, n, m)
45
+ x = x.to_f
46
+ xx = (x * n).to_f / (x * n + m).to_f
47
+ regularized_beta(xx, n / 2.0, m / 2.0)
48
+ end
79
49
 
80
- eps = 1.0e-6
81
- qn = q
82
- if q < 0.5 then qn = 1.0 - q
83
- u = pnorm(qn)
84
- w1 = 2.0 / n1 / 9.0
85
- w2 = 2.0 / n2 / 9.0
86
- w3 = 1.0 - w1
87
- w4 = 1.0 - w2
88
- u2 = u * u
89
- a = w4 * w4 - u2 * w2
90
- b = -2. * w3 * w4
91
- c = w3 * w3 - u2 * w1
92
- d = b * b - 4 * a * c
93
- if(d < 0.0)
94
- fw = pfsub(a, b, 0.0)
95
- else
96
- if(a.abs > eps)
97
- fw = pfsub(a, b, d)
98
- else
99
- if(b.abs > eps) then return -c / b end
100
- fw = pfsub(a, b, 0.0)
101
- end
102
- end
103
- fw * fw * fw
104
- end
105
- end
106
- # F-distribution interface
107
- def cdf(f,n1, n2)
108
- 1.0 - q_f(n1, n2, f)
109
- end
110
- def p_value(y, n1, n2)
111
- pf(1.0 - y, n1, n2)
112
- end
113
-
50
+ # Upper cumulative function.
51
+ #
52
+ # If cdf(x, n, m) = p, then q(x, n, m) = 1 - p
53
+ def q(x, n, m)
54
+ 1.0 - cdf(x, n, m)
55
+ end
56
+
57
+ # Return the F value corresponding to `probability` with degrees of
58
+ # freedom `n` and `m`.
59
+ #
60
+ # If x = quantile(p, n, m), then cdf(x, n, m) = p.
61
+ #
62
+ # Taken from:
63
+ # https://github.com/JuliaLang/Rmath-julia/blob/master/src/qf.c
64
+ def quantile(probability, n, m)
65
+ return Float::NAN if n <= 0.0 || m <= 0.0
66
+
67
+ if n == Float::INFINITY || n == -Float::INFINITY || m == Float::INFINITY || m == -Float::INFINITY
68
+ return 1.0
69
+ end
70
+
71
+ if n <= m && m > 4e5
72
+ return Distribution::ChiSquare.p_value(probability, n) / n.to_f
73
+ elsif n > 4e5 # thus n > m
74
+ return m.to_f / Distribution::ChiSquare.p_value(1.0 - probability, m)
75
+ else
76
+ # O problema está aqui.
77
+ tmp = Distribution::Beta.p_value(1.0 - probability, m.to_f / 2, n.to_f / 2)
78
+ value = (1.0 / tmp - 1.0) * (m.to_f / n.to_f)
79
+ return value.nan? ? Float::NAN : value
80
+ end
81
+ end
82
+
83
+ alias_method :p_value, :quantile
84
+
85
+ # Complementary quantile function.
86
+ #
87
+ # def cquantile(prob, n, m)
88
+ # quantile(1.0 - probability, n, m)
89
+ # end
90
+
91
+ # Return the corresponding F value for a p-value `y` with `n` and `m`
92
+ # degrees of freedom.
93
+ #
94
+ # @param y [Float] Value corresponding to the desired p-value. Between 0 and 1.
95
+ # @param n [Float] Degree of freedom of the first random variable.
96
+ # @param m [Float] Degree of freedom of the second random variable.
97
+ # @return [Float] Value of the F distribution that gives a p-value of `y`.
98
+
99
+ def mean
100
+ end
101
+
102
+ def mode
103
+ end
104
+
105
+ def skewness
106
+ end
107
+
108
+ def kurtosis
109
+ end
110
+
111
+ def entropy
112
+ end
114
113
  end
115
114
  end
116
115
  end
@@ -1,26 +1,29 @@
1
1
  module Distribution
2
2
  module F
3
3
  module Statistics2_
4
- class << self
5
- # Return the P-value of the corresponding integral with
6
- # k degrees of freedom
7
- #
8
- # Distribution::F.p_value(0.95,1,2)
9
- # Statistics2 have some problem with extreme values
10
- def p_value(pr,k1,k2)
11
- Statistics2.pfdist(k1,k2, pr)
4
+ class << self
5
+ # F cumulative distribution function (cdf).
6
+ #
7
+ # Returns the integral of F-distribution
8
+ # with k1 and k2 degrees of freedom
9
+ # over [0, x].
10
+ # Distribution::F.cdf(20,3,2)
11
+ #
12
+ def cdf(x, k1, k2)
13
+ Statistics2.fdist(k1, k2, x)
14
+ end
15
+
16
+ # Return the P-value of the corresponding integral with
17
+ # k degrees of freedom
18
+ #
19
+ # Distribution::F.p_value(0.95,1,2)
20
+ # Statistics2 have some problem with extreme values
21
+ def quantile(pr, k1, k2)
22
+ Statistics2.pfdist(k1, k2, pr)
23
+ end
24
+
25
+ alias_method :p_value, :quantile
12
26
  end
13
- # F cumulative distribution function (cdf).
14
- #
15
- # Returns the integral of F-distribution
16
- # with k1 and k2 degrees of freedom
17
- # over [0, x].
18
- # Distribution::F.cdf(20,3,2)
19
- #
20
- def cdf(x, k1, k2)
21
- Statistics2.fdist(k1, k2,x)
22
- end
23
- end
24
27
  end
25
28
  end
26
29
  end
@@ -5,15 +5,15 @@ require 'distribution/gamma/java'
5
5
 
6
6
  module Distribution
7
7
  # From Wikipedia:
8
- # The gamma distribution is a two-parameter family of
8
+ # The gamma distribution is a two-parameter family of
9
9
  # continuous probability distributions. It has a scale parameter a
10
10
  # and a shape parameter b.
11
- #
11
+ #
12
12
  # Calculate pdf, cdf and inverse cdf for Gamma Distribution.
13
13
  #
14
14
  module Gamma
15
15
  extend Distributable
16
- SHORTHAND='gamma'
16
+ SHORTHAND = 'gamma'
17
17
  create_distribution_methods
18
18
 
19
19
  ##
@@ -21,7 +21,6 @@ module Distribution
21
21
  # Returns PDF of of Gamma distribution with +a+ as scale
22
22
  # parameter and +b+ as shape parameter
23
23
 
24
-
25
24
  ##
26
25
  # :singleton-method: cdf(x,a,b)
27
26
  # Returns the integral of Gamma distribution with +a+ as scale
@@ -29,9 +28,8 @@ module Distribution
29
28
 
30
29
  ##
31
30
  # :singleton-method: p_value(qn,a,b)
32
- # Return the upper limit for the integral of a
33
- # gamma distribution which returns +qn+
31
+ # Return the upper limit for the integral of a
32
+ # gamma distribution which returns +qn+
34
33
  # with scale +a+ and shape +b+
35
-
36
34
  end
37
35
  end
@@ -2,22 +2,26 @@ module Distribution
2
2
  module Gamma
3
3
  module GSL_
4
4
  class << self
5
- def pdf(x,a,b)
6
- GSL::Ran::gamma_pdf(x.to_f, a.to_f, b.to_f)
7
- end
8
- # Return the P-value of the corresponding integral with
9
- # k degrees of freedom
10
- def p_value(pr,a,b)
11
- GSL::Cdf::gamma_Pinv(pr.to_f, a.to_f, b.to_f)
5
+ def pdf(x, a, b)
6
+ GSL::Ran.gamma_pdf(x.to_f, a.to_f, b.to_f)
12
7
  end
8
+
13
9
  # Chi-square cumulative distribution function (cdf).
14
10
  #
15
11
  # Returns the integral of Chi-squared distribution
16
12
  # with k degrees of freedom over [0, x]
17
13
  #
18
- def cdf(x,a,b)
19
- GSL::Cdf::gamma_P(x.to_f, a.to_f, b.to_f)
14
+ def cdf(x, a, b)
15
+ GSL::Cdf.gamma_P(x.to_f, a.to_f, b.to_f)
16
+ end
17
+
18
+ # Return the P-value of the corresponding integral with
19
+ # k degrees of freedom
20
+ def quantile(pr, a, b)
21
+ GSL::Cdf.gamma_Pinv(pr.to_f, a.to_f, b.to_f)
20
22
  end
23
+
24
+ alias_method :p_value, :quantile
21
25
  end
22
26
  end
23
27
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -3,7 +3,6 @@ module Distribution
3
3
  module Gamma
4
4
  module Ruby_
5
5
  class << self
6
-
7
6
  include Math
8
7
  # Gamma distribution probability density function
9
8
  #
@@ -22,7 +21,7 @@ module Distribution
22
21
  # ==References
23
22
  # * http://www.gnu.org/software/gsl/manual/html_node/The-Gamma-Distribution.html
24
23
  # * http://en.wikipedia.org/wiki/Gamma_distribution
25
- def pdf(x,a,b)
24
+ def pdf(x, a, b)
26
25
  return 0 if x < 0
27
26
  if x == 0
28
27
  return 1.quo(b) if a == 1
@@ -30,23 +29,18 @@ module Distribution
30
29
  elsif a == 1
31
30
  Math.exp(-x.quo(b)).quo(b)
32
31
  else
33
- Math.exp((a-1)*Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
32
+ Math.exp((a - 1) * Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
34
33
  end
35
34
  end
36
35
 
37
36
  # Gamma cumulative distribution function
38
- def cdf(x,a,b)
37
+ def cdf(x, a, b)
39
38
  return 0.0 if x <= 0.0
40
39
 
41
40
  y = x.quo(b)
42
- return (1-Math::IncompleteGamma.q(a, y)) if y > a
43
- return (Math::IncompleteGamma.p(a, y))
41
+ return (1 - Math::IncompleteGamma.q(a, y)) if y > a
42
+ (Math::IncompleteGamma.p(a, y))
44
43
  end
45
-
46
- #def p_value(pr,a,b)
47
- # cdf(1.0-pr,a,b)
48
- #end
49
-
50
44
  end
51
45
  end
52
46
  end