distribution 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/.travis.yml +13 -0
  4. data/.yardopts +5 -0
  5. data/Gemfile +5 -0
  6. data/History.txt +24 -8
  7. data/LICENCE.md +26 -0
  8. data/README.md +155 -0
  9. data/Rakefile +15 -19
  10. data/benchmark/binomial_coefficient.rb +19 -23
  11. data/benchmark/binomial_coefficient/experiment.rb +33 -36
  12. data/benchmark/factorial_hash.rb +7 -8
  13. data/benchmark/factorial_method.rb +4 -6
  14. data/benchmark/odd.rb +6 -7
  15. data/benchmark/power.rb +11 -11
  16. data/bin/distribution +26 -26
  17. data/data/template/spec.erb +7 -6
  18. data/distribution.gemspec +25 -0
  19. data/lib/distribution.rb +79 -124
  20. data/lib/distribution/beta.rb +6 -8
  21. data/lib/distribution/beta/gsl.rb +14 -9
  22. data/lib/distribution/beta/java.rb +1 -1
  23. data/lib/distribution/beta/ruby.rb +41 -7
  24. data/lib/distribution/binomial.rb +10 -11
  25. data/lib/distribution/binomial/gsl.rb +6 -5
  26. data/lib/distribution/binomial/java.rb +1 -1
  27. data/lib/distribution/binomial/ruby.rb +22 -15
  28. data/lib/distribution/bivariatenormal.rb +4 -5
  29. data/lib/distribution/bivariatenormal/gsl.rb +2 -2
  30. data/lib/distribution/bivariatenormal/java.rb +1 -1
  31. data/lib/distribution/bivariatenormal/ruby.rb +245 -254
  32. data/lib/distribution/chisquare.rb +8 -10
  33. data/lib/distribution/chisquare/gsl.rb +24 -19
  34. data/lib/distribution/chisquare/java.rb +1 -1
  35. data/lib/distribution/chisquare/ruby.rb +60 -55
  36. data/lib/distribution/chisquare/statistics2.rb +16 -13
  37. data/lib/distribution/distributable.rb +40 -0
  38. data/lib/distribution/exponential.rb +4 -5
  39. data/lib/distribution/exponential/gsl.rb +13 -9
  40. data/lib/distribution/exponential/ruby.rb +17 -11
  41. data/lib/distribution/f.rb +10 -11
  42. data/lib/distribution/f/gsl.rb +26 -22
  43. data/lib/distribution/f/java.rb +1 -1
  44. data/lib/distribution/f/ruby.rb +104 -105
  45. data/lib/distribution/f/statistics2.rb +22 -19
  46. data/lib/distribution/gamma.rb +5 -7
  47. data/lib/distribution/gamma/gsl.rb +13 -9
  48. data/lib/distribution/gamma/java.rb +1 -1
  49. data/lib/distribution/gamma/ruby.rb +5 -11
  50. data/lib/distribution/hypergeometric.rb +5 -8
  51. data/lib/distribution/hypergeometric/gsl.rb +5 -6
  52. data/lib/distribution/hypergeometric/java.rb +1 -1
  53. data/lib/distribution/hypergeometric/ruby.rb +34 -35
  54. data/lib/distribution/logistic.rb +6 -9
  55. data/lib/distribution/logistic/ruby.rb +14 -9
  56. data/lib/distribution/lognormal.rb +37 -0
  57. data/lib/distribution/lognormal/gsl.rb +21 -0
  58. data/lib/distribution/lognormal/ruby.rb +16 -0
  59. data/lib/distribution/math_extension.rb +187 -231
  60. data/lib/distribution/math_extension/chebyshev_series.rb +281 -272
  61. data/lib/distribution/math_extension/erfc.rb +28 -31
  62. data/lib/distribution/math_extension/exponential_integral.rb +17 -17
  63. data/lib/distribution/math_extension/gammastar.rb +19 -20
  64. data/lib/distribution/math_extension/gsl_utilities.rb +12 -12
  65. data/lib/distribution/math_extension/incomplete_beta.rb +52 -61
  66. data/lib/distribution/math_extension/incomplete_gamma.rb +166 -168
  67. data/lib/distribution/math_extension/log_utilities.rb +20 -22
  68. data/lib/distribution/normal.rb +11 -13
  69. data/lib/distribution/normal/gsl.rb +13 -10
  70. data/lib/distribution/normal/java.rb +30 -1
  71. data/lib/distribution/normal/ruby.rb +69 -59
  72. data/lib/distribution/normal/statistics2.rb +5 -2
  73. data/lib/distribution/normalmultivariate.rb +64 -64
  74. data/lib/distribution/poisson.rb +12 -14
  75. data/lib/distribution/poisson/gsl.rb +7 -7
  76. data/lib/distribution/poisson/java.rb +26 -0
  77. data/lib/distribution/poisson/ruby.rb +38 -9
  78. data/lib/distribution/shorthand.rb +17 -0
  79. data/lib/distribution/t.rb +16 -16
  80. data/lib/distribution/t/gsl.rb +27 -24
  81. data/lib/distribution/t/java.rb +1 -1
  82. data/lib/distribution/t/ruby.rb +99 -100
  83. data/lib/distribution/t/statistics2.rb +19 -19
  84. data/lib/distribution/uniform.rb +26 -0
  85. data/lib/distribution/uniform/gsl.rb +36 -0
  86. data/lib/distribution/uniform/ruby.rb +91 -0
  87. data/lib/distribution/version.rb +3 -0
  88. data/lib/distribution/weibull.rb +10 -0
  89. data/lib/distribution/weibull/gsl.rb +21 -0
  90. data/lib/distribution/weibull/ruby.rb +34 -0
  91. data/spec/beta_spec.rb +48 -50
  92. data/spec/binomial_spec.rb +80 -84
  93. data/spec/bivariatenormal_spec.rb +28 -35
  94. data/spec/chisquare_spec.rb +49 -52
  95. data/spec/distribution_spec.rb +11 -11
  96. data/spec/exponential_spec.rb +48 -39
  97. data/spec/f_spec.rb +73 -71
  98. data/spec/gamma_spec.rb +50 -53
  99. data/spec/hypergeometric_spec.rb +63 -69
  100. data/spec/logistic_spec.rb +31 -37
  101. data/spec/lognormal_spec.rb +54 -0
  102. data/spec/math_extension_spec.rb +192 -209
  103. data/spec/normal_spec.rb +80 -73
  104. data/spec/poisson_spec.rb +78 -36
  105. data/spec/shorthand_spec.rb +19 -22
  106. data/spec/spec_helper.rb +31 -6
  107. data/spec/t_spec.rb +63 -77
  108. data/spec/uniform_spec.rb +154 -0
  109. data/spec/weibull_spec.rb +17 -0
  110. data/vendor/java/commons-math-2.2.jar +0 -0
  111. metadata +91 -111
  112. data.tar.gz.sig +0 -0
  113. data/.autotest +0 -23
  114. data/.gemtest +0 -0
  115. data/Manifest.txt +0 -95
  116. data/README.txt +0 -100
  117. metadata.gz.sig +0 -0
@@ -1,27 +1,31 @@
1
1
  module Distribution
2
2
  module F
3
- module GSL_
4
- class << self
5
- def pdf(x,k1,k2)
6
- GSL::Ran.fdist_pdf(x.to_f,k1,k2)
7
- end
8
- # Return the P-value of the corresponding integral with
9
- # k degrees of freedom
10
- #
11
- # Distribution::F.p_value(0.95,1,2)
12
- def p_value(pr,k1,k2)
13
- GSL::Cdf.fdist_Pinv(pr.to_f,k1,k2)
14
- end
15
- # F cumulative distribution function (cdf).
16
- #
17
- # Returns the integral of F-distribution
18
- # with k1 and k2 degrees of freedom
19
- # over [0, x].
20
- # Distribution::F.cdf(20,3,2)
21
- #
22
- def cdf(x, k1, k2)
23
- GSL::Cdf.fdist_P(x.to_f.to_f,k1,k2)
24
- end
3
+ module GSL_
4
+ class << self
5
+ def pdf(x, k1, k2)
6
+ GSL::Ran.fdist_pdf(x.to_f, k1, k2)
7
+ end
8
+
9
+ # F cumulative distribution function (cdf).
10
+ #
11
+ # Returns the integral of F-distribution
12
+ # with k1 and k2 degrees of freedom
13
+ # over [0, x].
14
+ # Distribution::F.cdf(20,3,2)
15
+ #
16
+ def cdf(x, k1, k2)
17
+ GSL::Cdf.fdist_P(x.to_f.to_f, k1, k2)
18
+ end
19
+
20
+ # Return the P-value of the corresponding integral with
21
+ # k degrees of freedom
22
+ #
23
+ # Distribution::F.p_value(0.95,1,2)
24
+ def quantile(pr, k1, k2)
25
+ GSL::Cdf.fdist_Pinv(pr.to_f, k1, k2)
26
+ end
27
+
28
+ alias_method :p_value, :quantile
25
29
  end
26
30
  end
27
31
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -1,116 +1,115 @@
1
1
  module Distribution
2
2
  module F
3
+ # Continuous random number distributions are defined by a probability density function, p(x), such that the probability of x occurring in the infinitesimal range x to x+dx is p dx.
4
+
5
+ # The cumulative distribution function for the lower tail P(x) is defined by the integral,
6
+
7
+ # P(x) = \int_{-\infty}^{x} dx' p(x')
8
+ # and gives the probability of a variate taking a value less than x.
9
+
10
+ # The cumulative distribution function for the upper tail Q(x) is defined by the integral,
11
+
12
+ # Q(x) = \int_{x}^{+\infty} dx' p(x')
13
+ # and gives the probability of a variate taking a value greater than x.
14
+
15
+ # The upper and lower cumulative distribution functions are related by P(x) + Q(x) = 1 and satisfy 0 <= P(x) <= 1, 0 <= Q(x).
3
16
  module Ruby_
4
- class << self
17
+ extend Distribution::MathExtension
5
18
 
6
- def c_pdf(f,df)
7
- Distribution::ChiSquare.pdf(f,df)
8
- end
9
- def pdf(x,d1,d2)
10
- Math.sqrt(((d1*x)**d1*(d2**d2)).quo((d1*x+d2)**(d1+d2))).quo( x*Math.beta(d1/2.0, d2/2.0))
11
- end
12
- # F-distribution ([1])
13
- # Integral over [x, \infty)
14
- def q_f(df1, df2, f)
15
- if (f <= 0.0) then return 1.0; end
16
- if (df1 % 2 != 0 && df2 % 2 == 0)
17
- return 1.0 - q_f(df2, df1, 1.0 / f)
18
- end
19
- cos2 = 1.0 / (1.0 + df1.to_f * f / df2.to_f)
20
- sin2 = 1.0 - cos2
21
-
22
- if (df1 % 2 == 0)
23
- prob = cos2 ** (df2.to_f / 2.0)
24
- temp = prob
25
- i = 2
26
- while i < df1
27
- temp *= (df2.to_f + i - 2) * sin2 / i
28
- prob += temp
29
- i += 2
30
- end
31
- return prob
32
- end
33
- prob = Math.atan(Math.sqrt(df2.to_f / (df1.to_f * f)))
34
- temp = Math.sqrt(sin2 * cos2)
35
- i = 3
36
- while i <= df1
37
- prob += temp
38
- temp *= (i - 1).to_f * sin2 / i.to_f;
39
- i += 2.0
40
- end
41
- temp *= df1.to_f
42
- i = 3
43
- while i <= df2
44
- prob -= temp
45
- temp *= (df1.to_f + i - 2) * cos2 / i.to_f
46
- i += 2
47
- end
48
- prob * 2.0 / Math::PI
49
- end
19
+ # functions needed:
20
+ # - pdf
21
+ # - cdf (lower cumulative function, P(x))
22
+ # - Q(x), upper cumulative function
23
+ # - mean
24
+ # - mode
25
+ # - kurtosis
26
+ # - skewness
27
+ # - entropy
28
+ # - "fit" (maximum likelihood?)
29
+ # - expected value (given a function)
30
+ # - lower-tail quantile -> P
31
+ # - upper tail quantile -> Q
50
32
 
51
- # inverse of F-distribution ([2])
52
- def pfsub(x, y, z)
53
- (Math.sqrt(z) - y) / x / 2.0
54
- end
33
+ class << self
34
+ # F Distribution (Ruby) -- Probability Density Function
35
+ def pdf(x, n, m)
36
+ x = x.to_f
37
+ numerator = ((n * x)**n * (m**m)) / (n * x + m)**(n + m)
38
+ denominator = x * Math.beta(n / 2.0, m / 2.0)
55
39
 
56
- # Inverse CDF
57
- # [x, \infty)
58
- def pf(q, n1, n2)
59
- if(q < 0.0 || q > 1.0 || n1 < 1 || n2 < 1)
60
- $stderr.printf("Error : Illegal parameter in pf()!\n")
61
- return 0.0
62
- end
40
+ Math.sqrt(numerator) / denominator
41
+ end
63
42
 
64
- if n1 <= 240 || n2 <= 240
65
- eps = 1.0e-5
66
- if(n2 == 1) then eps = 1.0e-4 end
67
- fw = 0.0
68
- s = 1000.0
69
- loop do
70
- fw += s
71
- if s <= eps then return fw end
72
- if (qe = q_f(n1, n2, fw) - q) == 0.0 then return fw end
73
- if qe < 0.0
74
- fw -= s
75
- s /= 10.0 #/
76
- end
77
- end
78
- end
43
+ # Cumulative Distribution Function.
44
+ def cdf(x, n, m)
45
+ x = x.to_f
46
+ xx = (x * n).to_f / (x * n + m).to_f
47
+ regularized_beta(xx, n / 2.0, m / 2.0)
48
+ end
79
49
 
80
- eps = 1.0e-6
81
- qn = q
82
- if q < 0.5 then qn = 1.0 - q
83
- u = pnorm(qn)
84
- w1 = 2.0 / n1 / 9.0
85
- w2 = 2.0 / n2 / 9.0
86
- w3 = 1.0 - w1
87
- w4 = 1.0 - w2
88
- u2 = u * u
89
- a = w4 * w4 - u2 * w2
90
- b = -2. * w3 * w4
91
- c = w3 * w3 - u2 * w1
92
- d = b * b - 4 * a * c
93
- if(d < 0.0)
94
- fw = pfsub(a, b, 0.0)
95
- else
96
- if(a.abs > eps)
97
- fw = pfsub(a, b, d)
98
- else
99
- if(b.abs > eps) then return -c / b end
100
- fw = pfsub(a, b, 0.0)
101
- end
102
- end
103
- fw * fw * fw
104
- end
105
- end
106
- # F-distribution interface
107
- def cdf(f,n1, n2)
108
- 1.0 - q_f(n1, n2, f)
109
- end
110
- def p_value(y, n1, n2)
111
- pf(1.0 - y, n1, n2)
112
- end
113
-
50
+ # Upper cumulative function.
51
+ #
52
+ # If cdf(x, n, m) = p, then q(x, n, m) = 1 - p
53
+ def q(x, n, m)
54
+ 1.0 - cdf(x, n, m)
55
+ end
56
+
57
+ # Return the F value corresponding to `probability` with degrees of
58
+ # freedom `n` and `m`.
59
+ #
60
+ # If x = quantile(p, n, m), then cdf(x, n, m) = p.
61
+ #
62
+ # Taken from:
63
+ # https://github.com/JuliaLang/Rmath-julia/blob/master/src/qf.c
64
+ def quantile(probability, n, m)
65
+ return Float::NAN if n <= 0.0 || m <= 0.0
66
+
67
+ if n == Float::INFINITY || n == -Float::INFINITY || m == Float::INFINITY || m == -Float::INFINITY
68
+ return 1.0
69
+ end
70
+
71
+ if n <= m && m > 4e5
72
+ return Distribution::ChiSquare.p_value(probability, n) / n.to_f
73
+ elsif n > 4e5 # thus n > m
74
+ return m.to_f / Distribution::ChiSquare.p_value(1.0 - probability, m)
75
+ else
76
+ # O problema está aqui.
77
+ tmp = Distribution::Beta.p_value(1.0 - probability, m.to_f / 2, n.to_f / 2)
78
+ value = (1.0 / tmp - 1.0) * (m.to_f / n.to_f)
79
+ return value.nan? ? Float::NAN : value
80
+ end
81
+ end
82
+
83
+ alias_method :p_value, :quantile
84
+
85
+ # Complementary quantile function.
86
+ #
87
+ # def cquantile(prob, n, m)
88
+ # quantile(1.0 - probability, n, m)
89
+ # end
90
+
91
+ # Return the corresponding F value for a p-value `y` with `n` and `m`
92
+ # degrees of freedom.
93
+ #
94
+ # @param y [Float] Value corresponding to the desired p-value. Between 0 and 1.
95
+ # @param n [Float] Degree of freedom of the first random variable.
96
+ # @param m [Float] Degree of freedom of the second random variable.
97
+ # @return [Float] Value of the F distribution that gives a p-value of `y`.
98
+
99
+ def mean
100
+ end
101
+
102
+ def mode
103
+ end
104
+
105
+ def skewness
106
+ end
107
+
108
+ def kurtosis
109
+ end
110
+
111
+ def entropy
112
+ end
114
113
  end
115
114
  end
116
115
  end
@@ -1,26 +1,29 @@
1
1
  module Distribution
2
2
  module F
3
3
  module Statistics2_
4
- class << self
5
- # Return the P-value of the corresponding integral with
6
- # k degrees of freedom
7
- #
8
- # Distribution::F.p_value(0.95,1,2)
9
- # Statistics2 have some problem with extreme values
10
- def p_value(pr,k1,k2)
11
- Statistics2.pfdist(k1,k2, pr)
4
+ class << self
5
+ # F cumulative distribution function (cdf).
6
+ #
7
+ # Returns the integral of F-distribution
8
+ # with k1 and k2 degrees of freedom
9
+ # over [0, x].
10
+ # Distribution::F.cdf(20,3,2)
11
+ #
12
+ def cdf(x, k1, k2)
13
+ Statistics2.fdist(k1, k2, x)
14
+ end
15
+
16
+ # Return the P-value of the corresponding integral with
17
+ # k degrees of freedom
18
+ #
19
+ # Distribution::F.p_value(0.95,1,2)
20
+ # Statistics2 have some problem with extreme values
21
+ def quantile(pr, k1, k2)
22
+ Statistics2.pfdist(k1, k2, pr)
23
+ end
24
+
25
+ alias_method :p_value, :quantile
12
26
  end
13
- # F cumulative distribution function (cdf).
14
- #
15
- # Returns the integral of F-distribution
16
- # with k1 and k2 degrees of freedom
17
- # over [0, x].
18
- # Distribution::F.cdf(20,3,2)
19
- #
20
- def cdf(x, k1, k2)
21
- Statistics2.fdist(k1, k2,x)
22
- end
23
- end
24
27
  end
25
28
  end
26
29
  end
@@ -5,15 +5,15 @@ require 'distribution/gamma/java'
5
5
 
6
6
  module Distribution
7
7
  # From Wikipedia:
8
- # The gamma distribution is a two-parameter family of
8
+ # The gamma distribution is a two-parameter family of
9
9
  # continuous probability distributions. It has a scale parameter a
10
10
  # and a shape parameter b.
11
- #
11
+ #
12
12
  # Calculate pdf, cdf and inverse cdf for Gamma Distribution.
13
13
  #
14
14
  module Gamma
15
15
  extend Distributable
16
- SHORTHAND='gamma'
16
+ SHORTHAND = 'gamma'
17
17
  create_distribution_methods
18
18
 
19
19
  ##
@@ -21,7 +21,6 @@ module Distribution
21
21
  # Returns PDF of of Gamma distribution with +a+ as scale
22
22
  # parameter and +b+ as shape parameter
23
23
 
24
-
25
24
  ##
26
25
  # :singleton-method: cdf(x,a,b)
27
26
  # Returns the integral of Gamma distribution with +a+ as scale
@@ -29,9 +28,8 @@ module Distribution
29
28
 
30
29
  ##
31
30
  # :singleton-method: p_value(qn,a,b)
32
- # Return the upper limit for the integral of a
33
- # gamma distribution which returns +qn+
31
+ # Return the upper limit for the integral of a
32
+ # gamma distribution which returns +qn+
34
33
  # with scale +a+ and shape +b+
35
-
36
34
  end
37
35
  end
@@ -2,22 +2,26 @@ module Distribution
2
2
  module Gamma
3
3
  module GSL_
4
4
  class << self
5
- def pdf(x,a,b)
6
- GSL::Ran::gamma_pdf(x.to_f, a.to_f, b.to_f)
7
- end
8
- # Return the P-value of the corresponding integral with
9
- # k degrees of freedom
10
- def p_value(pr,a,b)
11
- GSL::Cdf::gamma_Pinv(pr.to_f, a.to_f, b.to_f)
5
+ def pdf(x, a, b)
6
+ GSL::Ran.gamma_pdf(x.to_f, a.to_f, b.to_f)
12
7
  end
8
+
13
9
  # Chi-square cumulative distribution function (cdf).
14
10
  #
15
11
  # Returns the integral of Chi-squared distribution
16
12
  # with k degrees of freedom over [0, x]
17
13
  #
18
- def cdf(x,a,b)
19
- GSL::Cdf::gamma_P(x.to_f, a.to_f, b.to_f)
14
+ def cdf(x, a, b)
15
+ GSL::Cdf.gamma_P(x.to_f, a.to_f, b.to_f)
16
+ end
17
+
18
+ # Return the P-value of the corresponding integral with
19
+ # k degrees of freedom
20
+ def quantile(pr, a, b)
21
+ GSL::Cdf.gamma_Pinv(pr.to_f, a.to_f, b.to_f)
20
22
  end
23
+
24
+ alias_method :p_value, :quantile
21
25
  end
22
26
  end
23
27
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -3,7 +3,6 @@ module Distribution
3
3
  module Gamma
4
4
  module Ruby_
5
5
  class << self
6
-
7
6
  include Math
8
7
  # Gamma distribution probability density function
9
8
  #
@@ -22,7 +21,7 @@ module Distribution
22
21
  # ==References
23
22
  # * http://www.gnu.org/software/gsl/manual/html_node/The-Gamma-Distribution.html
24
23
  # * http://en.wikipedia.org/wiki/Gamma_distribution
25
- def pdf(x,a,b)
24
+ def pdf(x, a, b)
26
25
  return 0 if x < 0
27
26
  if x == 0
28
27
  return 1.quo(b) if a == 1
@@ -30,23 +29,18 @@ module Distribution
30
29
  elsif a == 1
31
30
  Math.exp(-x.quo(b)).quo(b)
32
31
  else
33
- Math.exp((a-1)*Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
32
+ Math.exp((a - 1) * Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
34
33
  end
35
34
  end
36
35
 
37
36
  # Gamma cumulative distribution function
38
- def cdf(x,a,b)
37
+ def cdf(x, a, b)
39
38
  return 0.0 if x <= 0.0
40
39
 
41
40
  y = x.quo(b)
42
- return (1-Math::IncompleteGamma.q(a, y)) if y > a
43
- return (Math::IncompleteGamma.p(a, y))
41
+ return (1 - Math::IncompleteGamma.q(a, y)) if y > a
42
+ (Math::IncompleteGamma.p(a, y))
44
43
  end
45
-
46
- #def p_value(pr,a,b)
47
- # cdf(1.0-pr,a,b)
48
- #end
49
-
50
44
  end
51
45
  end
52
46
  end