distribution 0.7.3 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (100) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +4 -6
  3. data/.yardopts +5 -0
  4. data/History.txt +3 -0
  5. data/README.md +87 -44
  6. data/benchmark/binomial_coefficient.rb +19 -23
  7. data/benchmark/binomial_coefficient/experiment.rb +33 -36
  8. data/benchmark/factorial_hash.rb +7 -8
  9. data/benchmark/factorial_method.rb +4 -6
  10. data/benchmark/odd.rb +6 -7
  11. data/benchmark/power.rb +11 -11
  12. data/bin/distribution +26 -26
  13. data/distribution.gemspec +3 -4
  14. data/lib/distribution.rb +55 -96
  15. data/lib/distribution/beta/gsl.rb +10 -5
  16. data/lib/distribution/beta/ruby.rb +3 -1
  17. data/lib/distribution/binomial/ruby.rb +5 -2
  18. data/lib/distribution/bivariatenormal.rb +4 -5
  19. data/lib/distribution/bivariatenormal/gsl.rb +2 -2
  20. data/lib/distribution/bivariatenormal/java.rb +1 -1
  21. data/lib/distribution/bivariatenormal/ruby.rb +245 -254
  22. data/lib/distribution/chisquare.rb +8 -10
  23. data/lib/distribution/chisquare/gsl.rb +24 -19
  24. data/lib/distribution/chisquare/java.rb +1 -1
  25. data/lib/distribution/chisquare/ruby.rb +25 -25
  26. data/lib/distribution/chisquare/statistics2.rb +16 -13
  27. data/lib/distribution/distributable.rb +40 -0
  28. data/lib/distribution/exponential.rb +4 -5
  29. data/lib/distribution/exponential/gsl.rb +13 -9
  30. data/lib/distribution/exponential/ruby.rb +14 -9
  31. data/lib/distribution/f.rb +1 -1
  32. data/lib/distribution/f/gsl.rb +26 -22
  33. data/lib/distribution/f/java.rb +1 -1
  34. data/lib/distribution/f/ruby.rb +16 -19
  35. data/lib/distribution/f/statistics2.rb +22 -19
  36. data/lib/distribution/gamma.rb +5 -7
  37. data/lib/distribution/gamma/gsl.rb +13 -9
  38. data/lib/distribution/gamma/java.rb +1 -1
  39. data/lib/distribution/gamma/ruby.rb +5 -11
  40. data/lib/distribution/hypergeometric.rb +5 -8
  41. data/lib/distribution/hypergeometric/gsl.rb +4 -5
  42. data/lib/distribution/hypergeometric/java.rb +1 -1
  43. data/lib/distribution/hypergeometric/ruby.rb +34 -35
  44. data/lib/distribution/logistic.rb +5 -8
  45. data/lib/distribution/logistic/ruby.rb +13 -8
  46. data/lib/distribution/lognormal.rb +5 -7
  47. data/lib/distribution/lognormal/gsl.rb +8 -6
  48. data/lib/distribution/lognormal/ruby.rb +5 -9
  49. data/lib/distribution/math_extension.rb +6 -15
  50. data/lib/distribution/math_extension/chebyshev_series.rb +281 -272
  51. data/lib/distribution/math_extension/erfc.rb +26 -29
  52. data/lib/distribution/math_extension/exponential_integral.rb +17 -17
  53. data/lib/distribution/math_extension/gammastar.rb +19 -20
  54. data/lib/distribution/math_extension/gsl_utilities.rb +12 -12
  55. data/lib/distribution/math_extension/incomplete_beta.rb +52 -61
  56. data/lib/distribution/math_extension/incomplete_gamma.rb +166 -168
  57. data/lib/distribution/math_extension/log_utilities.rb +20 -22
  58. data/lib/distribution/normal.rb +11 -13
  59. data/lib/distribution/normal/gsl.rb +13 -10
  60. data/lib/distribution/normal/java.rb +14 -13
  61. data/lib/distribution/normal/ruby.rb +68 -58
  62. data/lib/distribution/normal/statistics2.rb +5 -2
  63. data/lib/distribution/normalmultivariate.rb +64 -64
  64. data/lib/distribution/poisson.rb +11 -13
  65. data/lib/distribution/poisson/gsl.rb +7 -7
  66. data/lib/distribution/poisson/java.rb +19 -24
  67. data/lib/distribution/poisson/ruby.rb +38 -9
  68. data/lib/distribution/shorthand.rb +17 -0
  69. data/lib/distribution/t.rb +13 -15
  70. data/lib/distribution/t/gsl.rb +27 -24
  71. data/lib/distribution/t/java.rb +1 -1
  72. data/lib/distribution/t/ruby.rb +99 -100
  73. data/lib/distribution/t/statistics2.rb +19 -19
  74. data/lib/distribution/uniform.rb +26 -0
  75. data/lib/distribution/uniform/gsl.rb +36 -0
  76. data/lib/distribution/uniform/ruby.rb +91 -0
  77. data/lib/distribution/version.rb +1 -1
  78. data/lib/distribution/weibull.rb +6 -7
  79. data/lib/distribution/weibull/gsl.rb +16 -16
  80. data/lib/distribution/weibull/ruby.rb +30 -23
  81. data/spec/beta_spec.rb +45 -47
  82. data/spec/binomial_spec.rb +77 -85
  83. data/spec/bivariatenormal_spec.rb +28 -35
  84. data/spec/chisquare_spec.rb +48 -52
  85. data/spec/distribution_spec.rb +10 -10
  86. data/spec/exponential_spec.rb +44 -49
  87. data/spec/f_spec.rb +4 -4
  88. data/spec/gamma_spec.rb +50 -53
  89. data/spec/hypergeometric_spec.rb +63 -69
  90. data/spec/logistic_spec.rb +32 -37
  91. data/spec/lognormal_spec.rb +25 -31
  92. data/spec/math_extension_spec.rb +192 -210
  93. data/spec/normal_spec.rb +80 -73
  94. data/spec/poisson_spec.rb +63 -41
  95. data/spec/shorthand_spec.rb +19 -22
  96. data/spec/spec_helper.rb +8 -9
  97. data/spec/t_spec.rb +63 -77
  98. data/spec/uniform_spec.rb +154 -0
  99. data/spec/weibull_spec.rb +13 -14
  100. metadata +17 -8
@@ -2,33 +2,31 @@ require 'distribution/poisson/ruby'
2
2
  require 'distribution/poisson/gsl'
3
3
  require 'distribution/poisson/java'
4
4
 
5
-
6
5
  module Distribution
7
6
  # From Wikipedia
8
- # In probability theory and statistics, the Poisson distribution is
9
- # a discrete probability distribution that expresses the probability of
10
- # a number of events occurring in a fixed period of time if these
11
- # events occur with a known average rate and independently of the time
7
+ # In probability theory and statistics, the Poisson distribution is
8
+ # a discrete probability distribution that expresses the probability of
9
+ # a number of events occurring in a fixed period of time if these
10
+ # events occur with a known average rate and independently of the time
12
11
  # since the last event.
13
12
  module Poisson
14
- SHORTHAND='pois'
13
+ SHORTHAND = 'pois'
15
14
  extend Distributable
16
15
  create_distribution_methods
17
16
 
18
17
  ##
19
18
  # :singleton-method: pdf(k , l)
20
- # PDF for Poisson distribution,
21
- # [+k+] is the number of occurrences of an event
19
+ # PDF for Poisson distribution,
20
+ # [+k+] is the number of occurrences of an event
22
21
  # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
23
-
22
+
24
23
  ##
25
24
  # :singleton-method: cdf(k , l)
26
25
  # CDF for Poisson distribution
27
- # [+k+] is the number of occurrences of an event
28
- # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
29
-
26
+ # [+k+] is the number of occurrences of an event
27
+ # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
28
+
30
29
  # TODO: Not implemented yet
31
30
  # :singleton-method: p_value(pr , l)
32
-
33
31
  end
34
32
  end
@@ -2,16 +2,16 @@ module Distribution
2
2
  module Poisson
3
3
  module GSL_
4
4
  class << self
5
- def pdf(k,l)
6
- return 0 if k<0
7
- GSL::Ran.poisson_pdf(k,l.to_f)
5
+ def pdf(k, l)
6
+ return 0 if k < 0
7
+ GSL::Ran.poisson_pdf(k, l.to_f)
8
8
  end
9
- def cdf(k,l)
10
- return 0 if k<0
9
+
10
+ def cdf(k, l)
11
+ return 0 if k < 0
11
12
  GSL::Cdf.poisson_P(k, l.to_f)
12
13
  end
13
-
14
14
  end
15
15
  end
16
16
  end
17
- end
17
+ end
@@ -2,30 +2,25 @@ module Distribution
2
2
  module Poisson
3
3
  module Java_
4
4
  class << self
5
- #==
6
- # Create the PoissonDistributionImpl object for use in calculations
7
- # with mean of l
8
- def create_distribution(l)
9
- PoissonDistributionImpl.new(l)
10
- end
11
-
12
- #==
13
- #
14
- def pdf(k,l)
15
- dist = create_distribution(l)
16
- dist.probability(k)
17
- end
18
-
19
- def cdf(k,l)
20
- dist = create_distribution(l)
21
- dist.cumulativeProbability(k)
22
- end
23
-
24
- # def p_value(pr,l)
25
- # dist = create_distribution(l)
26
- # dist.inverseCumulativeProbability(pr)
27
- # end
5
+ #==
6
+ # Create the PoissonDistributionImpl object for use in calculations
7
+ # with mean of l
8
+ def create_distribution(l)
9
+ PoissonDistributionImpl.new(l)
10
+ end
11
+
12
+ #==
13
+ #
14
+ def pdf(k, l)
15
+ dist = create_distribution(l)
16
+ dist.probability(k)
17
+ end
18
+
19
+ def cdf(k, l)
20
+ dist = create_distribution(l)
21
+ dist.cumulativeProbability(k)
22
+ end
28
23
  end
29
24
  end
30
25
  end
31
- end
26
+ end
@@ -2,20 +2,49 @@ module Distribution
2
2
  module Poisson
3
3
  module Ruby_
4
4
  class << self
5
- def pdf(k,l )
6
- (l**k*Math.exp(-l)).quo(Math.factorial(k))
5
+
6
+ # Return a Proc object which returns a random number drawn
7
+ # from the poisson distribution with lambda.
8
+ #
9
+ # == Arguments
10
+ # * +lambda_val+ - mean of the poisson distribution
11
+ # * +seed+ - seed, an integer value to set the initial state
12
+ #
13
+ # == Algorithm
14
+ # * Donald Knuth
15
+ #
16
+ def rng(lambda_val = 1, seed = nil)
17
+ seed = Random.new_seed if seed.nil?
18
+ r = Random.new(seed).rand
19
+ x = 0
20
+ l = Math.exp(-lambda_val)
21
+ s = l
22
+ while r > s
23
+ x += 1
24
+ l *= lambda_val / x.to_f
25
+ s += l
26
+ end
27
+ x
28
+ end
29
+
30
+ def pdf(k, l)
31
+ (l**k * Math.exp(-l)).quo(Math.factorial(k))
7
32
  end
8
- def cdf(k,l)
9
- Math.exp(-l)*(0..k).inject(0) {|ac,i| ac+ (l**i).quo(Math.factorial(i))}
33
+
34
+ def cdf(k, l)
35
+ Math.exp(-l) * (0..k).inject(0) { |ac, i| ac + (l**i).quo(Math.factorial(i)) }
10
36
  end
11
- def p_value(prob,l)
12
- ac=0
37
+
38
+ def quantile(prob, l)
39
+ ac = 0
13
40
  (0..100).each do |i|
14
- ac+=pdf(i,l)
15
- return i if prob<=ac
41
+ ac += pdf(i, l)
42
+ return i if prob <= ac
16
43
  end
17
44
  end
45
+
46
+ alias_method :p_value, :quantile
18
47
  end
19
48
  end
20
49
  end
21
- end
50
+ end
@@ -0,0 +1,17 @@
1
+ module Distribution
2
+ module Shorthand
3
+ EQUIVALENCES = { p_value: :p, cdf: :cdf, pdf: :pdf, rng: :r,
4
+ exact_pdf: :epdf, exact_cdf: :ecdf, exact_p_value: :ep }
5
+
6
+ def self.add_shortcut(shortcut, method, &block)
7
+ if EQUIVALENCES.include? method.to_sym
8
+ name = shortcut + "_#{method}"
9
+ define_method(name, &block)
10
+
11
+ name = shortcut + "_#{EQUIVALENCES[method.to_sym]}"
12
+ define_method(name, &block)
13
+
14
+ end
15
+ end
16
+ end
17
+ end
@@ -6,26 +6,24 @@ require 'distribution/t/gsl'
6
6
  require 'distribution/t/java'
7
7
 
8
8
  module Distribution
9
+ # Calculate statisticals for T Distribution.
10
+ module T
11
+ SHORTHAND = 'tdist'
12
+ extend Distributable
13
+ create_distribution_methods
9
14
 
10
- # Calculate statisticals for T Distribution.
11
- module T
12
- SHORTHAND='tdist'
13
- extend Distributable
14
- create_distribution_methods
15
-
16
15
  ##
17
16
  # :singleton-method: pdf(x,k)
18
- # Returns the integral of T distribution
19
- # with +k+ degrees of freedom over [0, +x+]
20
-
17
+ # Returns the integral of T distribution
18
+ # with +k+ degrees of freedom over [0, +x+]
21
19
  ##
22
20
  # :singleton-method: p_value(qn, k)
23
- # Return the P-value of the corresponding integral +qn+ with
24
- # +k+ degrees of freedom
25
-
21
+ # Return the P-value of the corresponding integral +qn+ with
22
+ # +k+ degrees of freedom
23
+
26
24
  ##
27
25
  # :singleton-method: cdf(x,k)
28
- # Returns the integral of T distribution
29
- # with +k+ degrees of freedom over [0, +x+]
30
- end
26
+ # Returns the integral of T distribution
27
+ # with +k+ degrees of freedom over [0, +x+]
28
+ end
31
29
  end
@@ -1,29 +1,32 @@
1
1
  module Distribution
2
2
  module T
3
- module GSL_
4
- class << self
5
-
6
- def pdf(x,k)
7
- GSL::Ran.tdist_pdf(x,k)
8
- end
9
- # Return the P-value of the corresponding integral with
10
- # k degrees of freedom
11
- #
12
- # Distribution::F.p_value(0.95,1,2)
13
- def p_value(pr,k)
14
- GSL::Cdf.tdist_Pinv(pr,k)
15
- end
16
- # F cumulative distribution function (cdf).
17
- #
18
- # Returns the integral of F-distribution
19
- # with k1 and k2 degrees of freedom
20
- # over [0, x].
21
- # Distribution::F.cdf(20,3,2)
22
- #
23
- def cdf(x, k)
24
- GSL::Cdf.tdist_P(x.to_f, k)
3
+ module GSL_
4
+ class << self
5
+ def pdf(x, k)
6
+ GSL::Ran.tdist_pdf(x, k)
7
+ end
8
+
9
+ # F cumulative distribution function (cdf).
10
+ #
11
+ # Returns the integral of F-distribution
12
+ # with k1 and k2 degrees of freedom
13
+ # over [0, x].
14
+ # Distribution::F.cdf(20,3,2)
15
+ #
16
+ def cdf(x, k)
17
+ GSL::Cdf.tdist_P(x.to_f, k)
18
+ end
19
+
20
+ # Return the P-value of the corresponding integral with
21
+ # k degrees of freedom
22
+ #
23
+ # Distribution::F.p_value(0.95,1,2)
24
+ def quantile(pr, k)
25
+ GSL::Cdf.tdist_Pinv(pr, k)
26
+ end
27
+
28
+ alias_method :p_value, :quantile
25
29
  end
26
30
  end
27
31
  end
28
- end
29
- end
32
+ end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -1,111 +1,110 @@
1
1
  module Distribution
2
2
  module T
3
- module Ruby_
4
- class << self
5
- def pdf(t,v)
6
- ((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
7
-
8
- end
9
- # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
- def cdf(t, n)
11
- p_t(n, t)
12
- end
13
-
14
- # t-distribution ([1])
15
- # (-\infty, x]
16
- def p_t(df, t)
17
- if df.to_i!=df
18
- x=(t+Math.sqrt(t**2+df)) / (2*Math.sqrt(t**2+df))
19
- return Math.regularized_beta(x,df/2.0,df/2.0)
3
+ module Ruby_
4
+ class << self
5
+ def pdf(t, v)
6
+ ((Math.gamma((v + 1) / 2.0)) / (Math.sqrt(v * Math::PI) * Math.gamma(v / 2.0))) * ((1 + (t**2 / v.to_f))**(-(v + 1) / 2.0))
20
7
  end
21
- df=df.to_i
22
- c2 = df.to_f / (df + t * t);
23
- s = Math.sqrt(1.0 - c2)
24
- s = -s if t < 0.0
25
- p = 0.0;
26
- i = df % 2 + 2
27
- while i <= df
28
- p += s
29
- s *= (i - 1) * c2 / i
30
- i += 2
8
+
9
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
+ def cdf(t, n)
11
+ p_t(n, t)
31
12
  end
32
-
33
- if df.is_a? Float or df & 1 != 0
34
- 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
35
- else
36
- (1.0 + p) / 2.0
13
+
14
+ # t-distribution ([1])
15
+ # (-\infty, x]
16
+ def p_t(df, t)
17
+ if df.to_i != df
18
+ x = (t + Math.sqrt(t**2 + df)) / (2 * Math.sqrt(t**2 + df))
19
+ return Math.regularized_beta(x, df / 2.0, df / 2.0)
20
+ end
21
+ df = df.to_i
22
+ c2 = df.to_f / (df + t * t)
23
+ s = Math.sqrt(1.0 - c2)
24
+ s = -s if t < 0.0
25
+ p = 0.0
26
+ i = df % 2 + 2
27
+ while i <= df
28
+ p += s
29
+ s *= (i - 1) * c2 / i
30
+ i += 2
31
+ end
32
+
33
+ if df.is_a?(Float) || df & 1 != 0
34
+ 0.5 + (p * Math.sqrt(c2) + Math.atan(t / Math.sqrt(df))) / Math::PI
35
+ else
36
+ (1.0 + p) / 2.0
37
+ end
37
38
  end
38
- end
39
-
40
-
41
- # inverse of t-distribution ([2])
42
- # (-\infty, -q/2] + [q/2, \infty)
43
- def ptsub(q, n)
44
- q = q.to_f
45
- if(n == 1 && 0.001 < q && q < 0.01)
46
- eps = 1.0e-4
47
- elsif (n == 2 && q < 0.0001)
48
- eps = 1.0e-4
49
- elsif (n == 1 && q < 0.001)
50
- eps = 1.0e-2
51
- else
52
- eps = 1.0e-5
53
- end
54
- s = 10000.0
55
- w = 0.0
56
- loop do
57
- w += s
58
- if(s <= eps) then return w end
59
- if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
60
- if(qe < 0.0)
61
- w -= s
62
- s /= 10.0 #/
63
- end
64
- end
65
- end
66
-
67
- def pt(q, n)
68
- q = q.to_f
69
- if(q < 1.0e-5 || q > 1.0 || n < 1)
70
- $stderr.printf("Error : Illegal parameter in pt()!\n")
71
- return 0.0
39
+
40
+ # inverse of t-distribution ([2])
41
+ # (-\infty, -q/2] + [q/2, \infty)
42
+ def ptsub(q, n)
43
+ q = q.to_f
44
+ if n == 1 && 0.001 < q && q < 0.01
45
+ eps = 1.0e-4
46
+ elsif n == 2 && q < 0.0001
47
+ eps = 1.0e-4
48
+ elsif n == 1 && q < 0.001
49
+ eps = 1.0e-2
50
+ else
51
+ eps = 1.0e-5
52
+ end
53
+ s = 10_000.0
54
+ w = 0.0
55
+ loop do
56
+ w += s
57
+ return w if (s <= eps)
58
+ if ((qe = 2.0 - p_t(n, w) * 2.0 - q) == 0.0) then return w end
59
+ if qe < 0.0
60
+ w -= s
61
+ s /= 10.0 # /
62
+ end
63
+ end
72
64
  end
73
-
74
- if(n <= 5) then return ptsub(q, n) end
75
- if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
76
-
77
- f1 = 4.0 * (f = n.to_f)
78
- f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
79
- f2 *= 96.0
80
- f3 *= 384.0
81
- f4 *= 92160.0
82
- f5 *= 368640.0
83
- u = Normal.p_value(1.0 - q / 2.0)
84
-
85
- w0 = (u2 = u * u) * u
86
- w1 = w0 * u2
87
- w2 = w1 * u2
88
- w3 = w2 * u2
89
- w4 = w3 * u2
90
- w = (w0 + u) / f1
91
- w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
92
- w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
93
- w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
94
- w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
95
- u + w
96
- end
97
-
98
- # Returns the P-value of tdist().
99
- def p_value(y,n)
100
- if y > 0.5
101
- pt(2.0 - y*2.0, n)
102
- else
103
- - pt(y*2.0, n)
65
+
66
+ def pt(q, n)
67
+ q = q.to_f
68
+ if q < 1.0e-5 || q > 1.0 || n < 1
69
+ $stderr.printf("Error : Illegal parameter in pt()!\n")
70
+ return 0.0
71
+ end
72
+
73
+ return ptsub(q, n) if (n <= 5)
74
+ return ptsub(q, n) if q <= 5.0e-3 && n <= 13
75
+
76
+ f1 = 4.0 * (f = n.to_f)
77
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
78
+ f2 *= 96.0
79
+ f3 *= 384.0
80
+ f4 *= 92_160.0
81
+ f5 *= 368_640.0
82
+ u = Normal.p_value(1.0 - q / 2.0)
83
+
84
+ w0 = (u2 = u * u) * u
85
+ w1 = w0 * u2
86
+ w2 = w1 * u2
87
+ w3 = w2 * u2
88
+ w4 = w3 * u2
89
+ w = (w0 + u) / f1
90
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
91
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
92
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
93
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17_955.0 * u) / f5
94
+ u + w
104
95
  end
96
+
97
+ # Returns the P-value of tdist().
98
+ def quantile(y, n)
99
+ if y > 0.5
100
+ pt(2.0 - y * 2.0, n)
101
+ else
102
+ - pt(y * 2.0, n)
103
+ end
104
+ end
105
+
106
+ alias_method :p_value, :quantile
105
107
  end
106
-
107
-
108
108
  end
109
109
  end
110
110
  end
111
- end