distribution 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +4 -6
  3. data/.yardopts +5 -0
  4. data/History.txt +3 -0
  5. data/README.md +87 -44
  6. data/benchmark/binomial_coefficient.rb +19 -23
  7. data/benchmark/binomial_coefficient/experiment.rb +33 -36
  8. data/benchmark/factorial_hash.rb +7 -8
  9. data/benchmark/factorial_method.rb +4 -6
  10. data/benchmark/odd.rb +6 -7
  11. data/benchmark/power.rb +11 -11
  12. data/bin/distribution +26 -26
  13. data/distribution.gemspec +3 -4
  14. data/lib/distribution.rb +55 -96
  15. data/lib/distribution/beta/gsl.rb +10 -5
  16. data/lib/distribution/beta/ruby.rb +3 -1
  17. data/lib/distribution/binomial/ruby.rb +5 -2
  18. data/lib/distribution/bivariatenormal.rb +4 -5
  19. data/lib/distribution/bivariatenormal/gsl.rb +2 -2
  20. data/lib/distribution/bivariatenormal/java.rb +1 -1
  21. data/lib/distribution/bivariatenormal/ruby.rb +245 -254
  22. data/lib/distribution/chisquare.rb +8 -10
  23. data/lib/distribution/chisquare/gsl.rb +24 -19
  24. data/lib/distribution/chisquare/java.rb +1 -1
  25. data/lib/distribution/chisquare/ruby.rb +25 -25
  26. data/lib/distribution/chisquare/statistics2.rb +16 -13
  27. data/lib/distribution/distributable.rb +40 -0
  28. data/lib/distribution/exponential.rb +4 -5
  29. data/lib/distribution/exponential/gsl.rb +13 -9
  30. data/lib/distribution/exponential/ruby.rb +14 -9
  31. data/lib/distribution/f.rb +1 -1
  32. data/lib/distribution/f/gsl.rb +26 -22
  33. data/lib/distribution/f/java.rb +1 -1
  34. data/lib/distribution/f/ruby.rb +16 -19
  35. data/lib/distribution/f/statistics2.rb +22 -19
  36. data/lib/distribution/gamma.rb +5 -7
  37. data/lib/distribution/gamma/gsl.rb +13 -9
  38. data/lib/distribution/gamma/java.rb +1 -1
  39. data/lib/distribution/gamma/ruby.rb +5 -11
  40. data/lib/distribution/hypergeometric.rb +5 -8
  41. data/lib/distribution/hypergeometric/gsl.rb +4 -5
  42. data/lib/distribution/hypergeometric/java.rb +1 -1
  43. data/lib/distribution/hypergeometric/ruby.rb +34 -35
  44. data/lib/distribution/logistic.rb +5 -8
  45. data/lib/distribution/logistic/ruby.rb +13 -8
  46. data/lib/distribution/lognormal.rb +5 -7
  47. data/lib/distribution/lognormal/gsl.rb +8 -6
  48. data/lib/distribution/lognormal/ruby.rb +5 -9
  49. data/lib/distribution/math_extension.rb +6 -15
  50. data/lib/distribution/math_extension/chebyshev_series.rb +281 -272
  51. data/lib/distribution/math_extension/erfc.rb +26 -29
  52. data/lib/distribution/math_extension/exponential_integral.rb +17 -17
  53. data/lib/distribution/math_extension/gammastar.rb +19 -20
  54. data/lib/distribution/math_extension/gsl_utilities.rb +12 -12
  55. data/lib/distribution/math_extension/incomplete_beta.rb +52 -61
  56. data/lib/distribution/math_extension/incomplete_gamma.rb +166 -168
  57. data/lib/distribution/math_extension/log_utilities.rb +20 -22
  58. data/lib/distribution/normal.rb +11 -13
  59. data/lib/distribution/normal/gsl.rb +13 -10
  60. data/lib/distribution/normal/java.rb +14 -13
  61. data/lib/distribution/normal/ruby.rb +68 -58
  62. data/lib/distribution/normal/statistics2.rb +5 -2
  63. data/lib/distribution/normalmultivariate.rb +64 -64
  64. data/lib/distribution/poisson.rb +11 -13
  65. data/lib/distribution/poisson/gsl.rb +7 -7
  66. data/lib/distribution/poisson/java.rb +19 -24
  67. data/lib/distribution/poisson/ruby.rb +38 -9
  68. data/lib/distribution/shorthand.rb +17 -0
  69. data/lib/distribution/t.rb +13 -15
  70. data/lib/distribution/t/gsl.rb +27 -24
  71. data/lib/distribution/t/java.rb +1 -1
  72. data/lib/distribution/t/ruby.rb +99 -100
  73. data/lib/distribution/t/statistics2.rb +19 -19
  74. data/lib/distribution/uniform.rb +26 -0
  75. data/lib/distribution/uniform/gsl.rb +36 -0
  76. data/lib/distribution/uniform/ruby.rb +91 -0
  77. data/lib/distribution/version.rb +1 -1
  78. data/lib/distribution/weibull.rb +6 -7
  79. data/lib/distribution/weibull/gsl.rb +16 -16
  80. data/lib/distribution/weibull/ruby.rb +30 -23
  81. data/spec/beta_spec.rb +45 -47
  82. data/spec/binomial_spec.rb +77 -85
  83. data/spec/bivariatenormal_spec.rb +28 -35
  84. data/spec/chisquare_spec.rb +48 -52
  85. data/spec/distribution_spec.rb +10 -10
  86. data/spec/exponential_spec.rb +44 -49
  87. data/spec/f_spec.rb +4 -4
  88. data/spec/gamma_spec.rb +50 -53
  89. data/spec/hypergeometric_spec.rb +63 -69
  90. data/spec/logistic_spec.rb +32 -37
  91. data/spec/lognormal_spec.rb +25 -31
  92. data/spec/math_extension_spec.rb +192 -210
  93. data/spec/normal_spec.rb +80 -73
  94. data/spec/poisson_spec.rb +63 -41
  95. data/spec/shorthand_spec.rb +19 -22
  96. data/spec/spec_helper.rb +8 -9
  97. data/spec/t_spec.rb +63 -77
  98. data/spec/uniform_spec.rb +154 -0
  99. data/spec/weibull_spec.rb +13 -14
  100. metadata +17 -8
@@ -1,26 +1,29 @@
1
1
  module Distribution
2
2
  module F
3
3
  module Statistics2_
4
- class << self
5
- # Return the P-value of the corresponding integral with
6
- # k degrees of freedom
7
- #
8
- # Distribution::F.p_value(0.95,1,2)
9
- # Statistics2 have some problem with extreme values
10
- def p_value(pr,k1,k2)
11
- Statistics2.pfdist(k1,k2, pr)
4
+ class << self
5
+ # F cumulative distribution function (cdf).
6
+ #
7
+ # Returns the integral of F-distribution
8
+ # with k1 and k2 degrees of freedom
9
+ # over [0, x].
10
+ # Distribution::F.cdf(20,3,2)
11
+ #
12
+ def cdf(x, k1, k2)
13
+ Statistics2.fdist(k1, k2, x)
14
+ end
15
+
16
+ # Return the P-value of the corresponding integral with
17
+ # k degrees of freedom
18
+ #
19
+ # Distribution::F.p_value(0.95,1,2)
20
+ # Statistics2 have some problem with extreme values
21
+ def quantile(pr, k1, k2)
22
+ Statistics2.pfdist(k1, k2, pr)
23
+ end
24
+
25
+ alias_method :p_value, :quantile
12
26
  end
13
- # F cumulative distribution function (cdf).
14
- #
15
- # Returns the integral of F-distribution
16
- # with k1 and k2 degrees of freedom
17
- # over [0, x].
18
- # Distribution::F.cdf(20,3,2)
19
- #
20
- def cdf(x, k1, k2)
21
- Statistics2.fdist(k1, k2,x)
22
- end
23
- end
24
27
  end
25
28
  end
26
29
  end
@@ -5,15 +5,15 @@ require 'distribution/gamma/java'
5
5
 
6
6
  module Distribution
7
7
  # From Wikipedia:
8
- # The gamma distribution is a two-parameter family of
8
+ # The gamma distribution is a two-parameter family of
9
9
  # continuous probability distributions. It has a scale parameter a
10
10
  # and a shape parameter b.
11
- #
11
+ #
12
12
  # Calculate pdf, cdf and inverse cdf for Gamma Distribution.
13
13
  #
14
14
  module Gamma
15
15
  extend Distributable
16
- SHORTHAND='gamma'
16
+ SHORTHAND = 'gamma'
17
17
  create_distribution_methods
18
18
 
19
19
  ##
@@ -21,7 +21,6 @@ module Distribution
21
21
  # Returns PDF of of Gamma distribution with +a+ as scale
22
22
  # parameter and +b+ as shape parameter
23
23
 
24
-
25
24
  ##
26
25
  # :singleton-method: cdf(x,a,b)
27
26
  # Returns the integral of Gamma distribution with +a+ as scale
@@ -29,9 +28,8 @@ module Distribution
29
28
 
30
29
  ##
31
30
  # :singleton-method: p_value(qn,a,b)
32
- # Return the upper limit for the integral of a
33
- # gamma distribution which returns +qn+
31
+ # Return the upper limit for the integral of a
32
+ # gamma distribution which returns +qn+
34
33
  # with scale +a+ and shape +b+
35
-
36
34
  end
37
35
  end
@@ -2,22 +2,26 @@ module Distribution
2
2
  module Gamma
3
3
  module GSL_
4
4
  class << self
5
- def pdf(x,a,b)
6
- GSL::Ran::gamma_pdf(x.to_f, a.to_f, b.to_f)
7
- end
8
- # Return the P-value of the corresponding integral with
9
- # k degrees of freedom
10
- def p_value(pr,a,b)
11
- GSL::Cdf::gamma_Pinv(pr.to_f, a.to_f, b.to_f)
5
+ def pdf(x, a, b)
6
+ GSL::Ran.gamma_pdf(x.to_f, a.to_f, b.to_f)
12
7
  end
8
+
13
9
  # Chi-square cumulative distribution function (cdf).
14
10
  #
15
11
  # Returns the integral of Chi-squared distribution
16
12
  # with k degrees of freedom over [0, x]
17
13
  #
18
- def cdf(x,a,b)
19
- GSL::Cdf::gamma_P(x.to_f, a.to_f, b.to_f)
14
+ def cdf(x, a, b)
15
+ GSL::Cdf.gamma_P(x.to_f, a.to_f, b.to_f)
16
+ end
17
+
18
+ # Return the P-value of the corresponding integral with
19
+ # k degrees of freedom
20
+ def quantile(pr, a, b)
21
+ GSL::Cdf.gamma_Pinv(pr.to_f, a.to_f, b.to_f)
20
22
  end
23
+
24
+ alias_method :p_value, :quantile
21
25
  end
22
26
  end
23
27
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -3,7 +3,6 @@ module Distribution
3
3
  module Gamma
4
4
  module Ruby_
5
5
  class << self
6
-
7
6
  include Math
8
7
  # Gamma distribution probability density function
9
8
  #
@@ -22,7 +21,7 @@ module Distribution
22
21
  # ==References
23
22
  # * http://www.gnu.org/software/gsl/manual/html_node/The-Gamma-Distribution.html
24
23
  # * http://en.wikipedia.org/wiki/Gamma_distribution
25
- def pdf(x,a,b)
24
+ def pdf(x, a, b)
26
25
  return 0 if x < 0
27
26
  if x == 0
28
27
  return 1.quo(b) if a == 1
@@ -30,23 +29,18 @@ module Distribution
30
29
  elsif a == 1
31
30
  Math.exp(-x.quo(b)).quo(b)
32
31
  else
33
- Math.exp((a-1)*Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
32
+ Math.exp((a - 1) * Math.log(x.quo(b)) - x.quo(b) - Math.lgamma(a).first).quo(b)
34
33
  end
35
34
  end
36
35
 
37
36
  # Gamma cumulative distribution function
38
- def cdf(x,a,b)
37
+ def cdf(x, a, b)
39
38
  return 0.0 if x <= 0.0
40
39
 
41
40
  y = x.quo(b)
42
- return (1-Math::IncompleteGamma.q(a, y)) if y > a
43
- return (Math::IncompleteGamma.p(a, y))
41
+ return (1 - Math::IncompleteGamma.q(a, y)) if y > a
42
+ (Math::IncompleteGamma.p(a, y))
44
43
  end
45
-
46
- #def p_value(pr,a,b)
47
- # cdf(1.0-pr,a,b)
48
- #end
49
-
50
44
  end
51
45
  end
52
46
  end
@@ -2,38 +2,35 @@ require 'distribution/hypergeometric/ruby'
2
2
  require 'distribution/hypergeometric/gsl'
3
3
  require 'distribution/hypergeometric/java'
4
4
 
5
-
6
5
  module Distribution
7
6
  # From Wikipedia:
8
7
  # In probability theory and statistics, the hypergeometric distribution is a discrete probability distribution that
9
8
  # describes the number of successes in a sequence of n draws from a finite population without replacement, just as
10
9
  # the binomial distribution describes the number of successes for draws with replacement.
11
10
  module Hypergeometric
12
- SHORTHAND='hypg'
11
+ SHORTHAND = 'hypg'
13
12
  extend Distributable
14
13
 
15
14
  create_distribution_methods
16
15
 
17
16
  ##
18
17
  # :singleton-method: pdf(k,m,n,total)
19
- # This function computes the probability p(k) of obtaining k
20
- # from a hypergeometric distribution with parameters
18
+ # This function computes the probability p(k) of obtaining k
19
+ # from a hypergeometric distribution with parameters
21
20
  # m, n t.
22
21
  # * m: number of elements with desired attribute on population
23
- # * n: sample size
22
+ # * n: sample size
24
23
  # * t: population size
25
24
 
26
25
  ##
27
26
  # :singleton-method: cdf(k,m,n,total)
28
27
  # These functions compute the cumulative distribution function P(k)
29
- # for the hypergeometric distribution with parameters m, n and t.
28
+ # for the hypergeometric distribution with parameters m, n and t.
30
29
  # * m: number of elements with desired attribute on population
31
30
  # * n: sample size
32
31
  # * t: population size
33
-
34
32
 
35
33
  ##
36
34
  # :singleton-method: p_value(k,m,n,total)
37
-
38
35
  end
39
36
  end
@@ -3,14 +3,13 @@ module Distribution
3
3
  module GSL_
4
4
  class << self
5
5
  def pdf(k, m, n, total) # :nodoc:
6
- GSL::Ran::hypergeometric_pdf(k, m, total-m, n)
6
+ GSL::Ran.hypergeometric_pdf(k, m, total - m, n)
7
7
  end
8
- # The GSL::Cdf function for hypergeometric
9
- #
8
+ # The GSL::Cdf function for hypergeometric
9
+ #
10
10
  def cdf(k, m, n, total) # :nodoc:
11
- GSL::Cdf::hypergeometric_P(k, m, total-m, n)
11
+ GSL::Cdf.hypergeometric_P(k, m, total - m, n)
12
12
  end
13
-
14
13
  end
15
14
  end
16
15
  end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -1,13 +1,14 @@
1
- # Added by John O. Woods, SciRuby project.
1
+ # Added by John O. Woods, SciRuby project.
2
2
  # Optimized by Claudio Bustos
3
3
 
4
4
  module Distribution
5
5
  module Hypergeometric
6
6
  module Ruby_
7
7
  class << self
8
- def bc(n,k)
9
- Math.binomial_coefficient(n,k)
8
+ def bc(n, k)
9
+ Math.binomial_coefficient(n, k)
10
10
  end
11
+
11
12
  # Hypergeometric probability density function
12
13
  #
13
14
  # Probability p(+k+, +m+, +n+, +total+) of drawing sets of size +m+ and +n+ with an intersection of size +k+
@@ -17,50 +18,48 @@ module Distribution
17
18
  # * http://www.gnu.org/software/gsl/manual/html_node/The-Hypergeometric-Distribution.html
18
19
  # * http://en.wikipedia.org/wiki/Hypergeometric_distribution
19
20
  def pdf(k, m, n, total)
20
- min_m_n=m<n ? m : n
21
- max_t=[0,m+n-total].max
22
- return 0 if k>min_m_n or k<max_t
23
- (bc(m,k) * bc(total-m,n-k)).quo(bc(total,n))
24
- end
25
-
26
-
27
-
28
- def pdf_with_den(k,m,n,total,den)
29
- (bc(m,k) * bc(total-m,n-k)).quo(den)
21
+ min_m_n = m < n ? m : n
22
+ max_t = [0, m + n - total].max
23
+ return 0 if k > min_m_n || k < max_t
24
+ (bc(m, k) * bc(total - m, n - k)).quo(bc(total, n))
30
25
  end
31
26
 
32
- # p-value:
33
-
34
- def p_value(pr, m, n, total)
35
- ac=0
36
- den=bc(total,n)
37
-
38
- (0..total).each do |i|
39
- ac+=pdf_with_den(i,m,n,total,den)
40
- return i if ac>=pr
41
- end
27
+ alias_method :exact_pdf, :pdf
28
+
29
+ def pdf_with_den(k, m, n, total, den)
30
+ (bc(m, k) * bc(total - m, n - k)).quo(den)
42
31
  end
32
+
43
33
  # Cumulative distribution function.
44
34
  # The probability of obtain, from a sample of
45
35
  # size +n+, +k+ or less elements
46
36
  # in a population size +total+ with +m+ interesting elements.
47
- #
37
+ #
48
38
  # Slow, but secure
49
39
  def cdf(k, m, n, total)
50
- raise(ArgumentError, "k>m") if k>m
51
- raise(ArgumentError, "k>n") if k>n
40
+ fail(ArgumentError, 'k>m') if k > m
41
+ fail(ArgumentError, 'k>n') if k > n
52
42
  # Store the den
53
- den=bc(total,n)
54
- (0..k).collect { |ki| pdf_with_den(ki,m,n,total,den) }.inject { |sum,v| sum+v}
43
+ den = bc(total, n)
44
+ (0..k).collect { |ki| pdf_with_den(ki, m, n, total, den) }.inject { |sum, v| sum + v }
45
+ end
46
+
47
+ alias_method :exact_cdf, :cdf
48
+
49
+ # p-value:
50
+ def quantile(pr, m, n, total)
51
+ ac = 0
52
+ den = bc(total, n)
53
+
54
+ (0..total).each do |i|
55
+ ac += pdf_with_den(i, m, n, total, den)
56
+ return i if ac >= pr
57
+ end
55
58
  end
56
-
57
-
58
- alias :exact_pdf :pdf
59
- alias :exact_p_value :p_value
60
- alias :exact_cdf :cdf
61
59
 
62
-
60
+ alias_method :p_value, :quantile
61
+ alias_method :exact_p_value, :p_value
63
62
  end
64
63
  end
65
64
  end
66
- end
65
+ end
@@ -1,13 +1,12 @@
1
1
  require 'distribution/logistic/ruby'
2
- #require 'distribution/logistic/gsl'
3
- #require 'distribution/logistic/java'
4
-
2
+ # require 'distribution/logistic/gsl'
3
+ # require 'distribution/logistic/java'
5
4
 
6
5
  module Distribution
7
6
  # From Wikipedia:
8
7
  # In probability theory and statistics, the logistic distribution is a continuous probability distribution. Its cumulative distribution function is the logistic function, which appears in logistic regression and feedforward neural networks. It resembles the normal distribution in shape but has heavier tails (higher kurtosis).
9
8
  module Logistic
10
- SHORTHAND='logis'
9
+ SHORTHAND = 'logis'
11
10
  extend Distributable
12
11
  create_distribution_methods
13
12
  ##
@@ -21,7 +20,7 @@ module Distribution
21
20
  # Returns the pdf for logistic distribution (f(x,u,s))
22
21
  # * u: mean
23
22
  # * s: variance related parameter
24
-
23
+
25
24
  ##
26
25
  # :singleton-method: cdf(x,u,s)
27
26
  # Returns the cdf for logistic distribution (F(x,u,s))
@@ -30,11 +29,9 @@ module Distribution
30
29
 
31
30
  ##
32
31
  # :singleton-method: p_value(pr , u,s)
33
- # Returns the inverse cdf for logistic distribution
32
+ # Returns the inverse cdf for logistic distribution
34
33
  # (F^-1(pr,u,s))
35
34
  # * u: mean
36
35
  # * s: variance related parameter
37
-
38
-
39
36
  end
40
37
  end
@@ -2,18 +2,23 @@ module Distribution
2
2
  module Logistic
3
3
  module Ruby_
4
4
  class << self
5
- def rng(u,s)
6
- lambda {p_value(rand(),u,s)}
5
+ def rng(u, s)
6
+ -> { p_value(rand, u, s) }
7
7
  end
8
- def pdf(x,u,s )
9
- (Math.exp(-(x-u) / s)) / (s*(1+Math.exp(-(x-u) / s)**2))
8
+
9
+ def pdf(x, u, s)
10
+ (Math.exp(-(x - u) / s)) / (s * (1 + Math.exp(-(x - u) / s)**2))
10
11
  end
11
- def cdf(x,u,s )
12
- 1/(1+Math.exp(-(x-u) / s))
12
+
13
+ def cdf(x, u, s)
14
+ 1 / (1 + Math.exp(-(x - u) / s))
13
15
  end
14
- def p_value(pr,u,s )
15
- u+s*Math.log(pr/(1-pr))
16
+
17
+ def quantile(pr, u, s)
18
+ u + s * Math.log(pr / (1 - pr))
16
19
  end
20
+
21
+ alias_method :p_value, :quantile
17
22
  end
18
23
  end
19
24
  end
@@ -12,28 +12,26 @@ module Distribution
12
12
  # normally distributed, then so is logb(Y), for any two positive numbers
13
13
  # a, b ≠ 1.)
14
14
  #
15
- # This module calculate the pdf, cdf and inverse cdf for Beta Distribution.
15
+ # This module calculates the pdf, cdf and inverse cdf for the Lognormal distribution.
16
16
  #
17
17
  module LogNormal
18
18
  extend Distributable
19
- SHORTHAND='lognormal'
19
+ SHORTHAND = 'lognormal'
20
20
  create_distribution_methods
21
21
 
22
22
  ##
23
23
  # :singleton-method: pdf(x,u,s)
24
- # Returns PDF of of Lognormal distribution with parameters u (position) and
24
+ # Returns the PDF of the Lognormal distribution of x with parameters u (position) and
25
25
  # s (deviation)
26
26
 
27
-
28
27
  ##
29
28
  # :singleton-method: cdf(x,u,s)
30
- # Returns the CDF of Lognormal distribution of x with parameters
31
- # u (position) and s(deviation)
29
+ # Returns the CDF of the Lognormal distribution of x with parameters u (position) and
30
+ # s(deviation)
32
31
 
33
32
  ##
34
33
  # :singleton-method: p_value(pr,u,s)
35
34
  # Return the quantile of the corresponding integral +pr+
36
35
  # on a lognormal distribution's cdf with parameters z and s
37
-
38
36
  end
39
37
  end