distribution 0.7.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +5 -5
  2. data/.travis.yml +4 -6
  3. data/.yardopts +5 -0
  4. data/History.txt +3 -0
  5. data/README.md +87 -44
  6. data/benchmark/binomial_coefficient.rb +19 -23
  7. data/benchmark/binomial_coefficient/experiment.rb +33 -36
  8. data/benchmark/factorial_hash.rb +7 -8
  9. data/benchmark/factorial_method.rb +4 -6
  10. data/benchmark/odd.rb +6 -7
  11. data/benchmark/power.rb +11 -11
  12. data/bin/distribution +26 -26
  13. data/distribution.gemspec +3 -4
  14. data/lib/distribution.rb +55 -96
  15. data/lib/distribution/beta/gsl.rb +10 -5
  16. data/lib/distribution/beta/ruby.rb +3 -1
  17. data/lib/distribution/binomial/ruby.rb +5 -2
  18. data/lib/distribution/bivariatenormal.rb +4 -5
  19. data/lib/distribution/bivariatenormal/gsl.rb +2 -2
  20. data/lib/distribution/bivariatenormal/java.rb +1 -1
  21. data/lib/distribution/bivariatenormal/ruby.rb +245 -254
  22. data/lib/distribution/chisquare.rb +8 -10
  23. data/lib/distribution/chisquare/gsl.rb +24 -19
  24. data/lib/distribution/chisquare/java.rb +1 -1
  25. data/lib/distribution/chisquare/ruby.rb +25 -25
  26. data/lib/distribution/chisquare/statistics2.rb +16 -13
  27. data/lib/distribution/distributable.rb +40 -0
  28. data/lib/distribution/exponential.rb +4 -5
  29. data/lib/distribution/exponential/gsl.rb +13 -9
  30. data/lib/distribution/exponential/ruby.rb +14 -9
  31. data/lib/distribution/f.rb +1 -1
  32. data/lib/distribution/f/gsl.rb +26 -22
  33. data/lib/distribution/f/java.rb +1 -1
  34. data/lib/distribution/f/ruby.rb +16 -19
  35. data/lib/distribution/f/statistics2.rb +22 -19
  36. data/lib/distribution/gamma.rb +5 -7
  37. data/lib/distribution/gamma/gsl.rb +13 -9
  38. data/lib/distribution/gamma/java.rb +1 -1
  39. data/lib/distribution/gamma/ruby.rb +5 -11
  40. data/lib/distribution/hypergeometric.rb +5 -8
  41. data/lib/distribution/hypergeometric/gsl.rb +4 -5
  42. data/lib/distribution/hypergeometric/java.rb +1 -1
  43. data/lib/distribution/hypergeometric/ruby.rb +34 -35
  44. data/lib/distribution/logistic.rb +5 -8
  45. data/lib/distribution/logistic/ruby.rb +13 -8
  46. data/lib/distribution/lognormal.rb +5 -7
  47. data/lib/distribution/lognormal/gsl.rb +8 -6
  48. data/lib/distribution/lognormal/ruby.rb +5 -9
  49. data/lib/distribution/math_extension.rb +6 -15
  50. data/lib/distribution/math_extension/chebyshev_series.rb +281 -272
  51. data/lib/distribution/math_extension/erfc.rb +26 -29
  52. data/lib/distribution/math_extension/exponential_integral.rb +17 -17
  53. data/lib/distribution/math_extension/gammastar.rb +19 -20
  54. data/lib/distribution/math_extension/gsl_utilities.rb +12 -12
  55. data/lib/distribution/math_extension/incomplete_beta.rb +52 -61
  56. data/lib/distribution/math_extension/incomplete_gamma.rb +166 -168
  57. data/lib/distribution/math_extension/log_utilities.rb +20 -22
  58. data/lib/distribution/normal.rb +11 -13
  59. data/lib/distribution/normal/gsl.rb +13 -10
  60. data/lib/distribution/normal/java.rb +14 -13
  61. data/lib/distribution/normal/ruby.rb +68 -58
  62. data/lib/distribution/normal/statistics2.rb +5 -2
  63. data/lib/distribution/normalmultivariate.rb +64 -64
  64. data/lib/distribution/poisson.rb +11 -13
  65. data/lib/distribution/poisson/gsl.rb +7 -7
  66. data/lib/distribution/poisson/java.rb +19 -24
  67. data/lib/distribution/poisson/ruby.rb +38 -9
  68. data/lib/distribution/shorthand.rb +17 -0
  69. data/lib/distribution/t.rb +13 -15
  70. data/lib/distribution/t/gsl.rb +27 -24
  71. data/lib/distribution/t/java.rb +1 -1
  72. data/lib/distribution/t/ruby.rb +99 -100
  73. data/lib/distribution/t/statistics2.rb +19 -19
  74. data/lib/distribution/uniform.rb +26 -0
  75. data/lib/distribution/uniform/gsl.rb +36 -0
  76. data/lib/distribution/uniform/ruby.rb +91 -0
  77. data/lib/distribution/version.rb +1 -1
  78. data/lib/distribution/weibull.rb +6 -7
  79. data/lib/distribution/weibull/gsl.rb +16 -16
  80. data/lib/distribution/weibull/ruby.rb +30 -23
  81. data/spec/beta_spec.rb +45 -47
  82. data/spec/binomial_spec.rb +77 -85
  83. data/spec/bivariatenormal_spec.rb +28 -35
  84. data/spec/chisquare_spec.rb +48 -52
  85. data/spec/distribution_spec.rb +10 -10
  86. data/spec/exponential_spec.rb +44 -49
  87. data/spec/f_spec.rb +4 -4
  88. data/spec/gamma_spec.rb +50 -53
  89. data/spec/hypergeometric_spec.rb +63 -69
  90. data/spec/logistic_spec.rb +32 -37
  91. data/spec/lognormal_spec.rb +25 -31
  92. data/spec/math_extension_spec.rb +192 -210
  93. data/spec/normal_spec.rb +80 -73
  94. data/spec/poisson_spec.rb +63 -41
  95. data/spec/shorthand_spec.rb +19 -22
  96. data/spec/spec_helper.rb +8 -9
  97. data/spec/t_spec.rb +63 -77
  98. data/spec/uniform_spec.rb +154 -0
  99. data/spec/weibull_spec.rb +13 -14
  100. metadata +17 -8
@@ -2,33 +2,31 @@ require 'distribution/poisson/ruby'
2
2
  require 'distribution/poisson/gsl'
3
3
  require 'distribution/poisson/java'
4
4
 
5
-
6
5
  module Distribution
7
6
  # From Wikipedia
8
- # In probability theory and statistics, the Poisson distribution is
9
- # a discrete probability distribution that expresses the probability of
10
- # a number of events occurring in a fixed period of time if these
11
- # events occur with a known average rate and independently of the time
7
+ # In probability theory and statistics, the Poisson distribution is
8
+ # a discrete probability distribution that expresses the probability of
9
+ # a number of events occurring in a fixed period of time if these
10
+ # events occur with a known average rate and independently of the time
12
11
  # since the last event.
13
12
  module Poisson
14
- SHORTHAND='pois'
13
+ SHORTHAND = 'pois'
15
14
  extend Distributable
16
15
  create_distribution_methods
17
16
 
18
17
  ##
19
18
  # :singleton-method: pdf(k , l)
20
- # PDF for Poisson distribution,
21
- # [+k+] is the number of occurrences of an event
19
+ # PDF for Poisson distribution,
20
+ # [+k+] is the number of occurrences of an event
22
21
  # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
23
-
22
+
24
23
  ##
25
24
  # :singleton-method: cdf(k , l)
26
25
  # CDF for Poisson distribution
27
- # [+k+] is the number of occurrences of an event
28
- # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
29
-
26
+ # [+k+] is the number of occurrences of an event
27
+ # [+l+] is a positive real number, equal to the expected number of occurrences that occur during the given interval.
28
+
30
29
  # TODO: Not implemented yet
31
30
  # :singleton-method: p_value(pr , l)
32
-
33
31
  end
34
32
  end
@@ -2,16 +2,16 @@ module Distribution
2
2
  module Poisson
3
3
  module GSL_
4
4
  class << self
5
- def pdf(k,l)
6
- return 0 if k<0
7
- GSL::Ran.poisson_pdf(k,l.to_f)
5
+ def pdf(k, l)
6
+ return 0 if k < 0
7
+ GSL::Ran.poisson_pdf(k, l.to_f)
8
8
  end
9
- def cdf(k,l)
10
- return 0 if k<0
9
+
10
+ def cdf(k, l)
11
+ return 0 if k < 0
11
12
  GSL::Cdf.poisson_P(k, l.to_f)
12
13
  end
13
-
14
14
  end
15
15
  end
16
16
  end
17
- end
17
+ end
@@ -2,30 +2,25 @@ module Distribution
2
2
  module Poisson
3
3
  module Java_
4
4
  class << self
5
- #==
6
- # Create the PoissonDistributionImpl object for use in calculations
7
- # with mean of l
8
- def create_distribution(l)
9
- PoissonDistributionImpl.new(l)
10
- end
11
-
12
- #==
13
- #
14
- def pdf(k,l)
15
- dist = create_distribution(l)
16
- dist.probability(k)
17
- end
18
-
19
- def cdf(k,l)
20
- dist = create_distribution(l)
21
- dist.cumulativeProbability(k)
22
- end
23
-
24
- # def p_value(pr,l)
25
- # dist = create_distribution(l)
26
- # dist.inverseCumulativeProbability(pr)
27
- # end
5
+ #==
6
+ # Create the PoissonDistributionImpl object for use in calculations
7
+ # with mean of l
8
+ def create_distribution(l)
9
+ PoissonDistributionImpl.new(l)
10
+ end
11
+
12
+ #==
13
+ #
14
+ def pdf(k, l)
15
+ dist = create_distribution(l)
16
+ dist.probability(k)
17
+ end
18
+
19
+ def cdf(k, l)
20
+ dist = create_distribution(l)
21
+ dist.cumulativeProbability(k)
22
+ end
28
23
  end
29
24
  end
30
25
  end
31
- end
26
+ end
@@ -2,20 +2,49 @@ module Distribution
2
2
  module Poisson
3
3
  module Ruby_
4
4
  class << self
5
- def pdf(k,l )
6
- (l**k*Math.exp(-l)).quo(Math.factorial(k))
5
+
6
+ # Return a Proc object which returns a random number drawn
7
+ # from the poisson distribution with lambda.
8
+ #
9
+ # == Arguments
10
+ # * +lambda_val+ - mean of the poisson distribution
11
+ # * +seed+ - seed, an integer value to set the initial state
12
+ #
13
+ # == Algorithm
14
+ # * Donald Knuth
15
+ #
16
+ def rng(lambda_val = 1, seed = nil)
17
+ seed = Random.new_seed if seed.nil?
18
+ r = Random.new(seed).rand
19
+ x = 0
20
+ l = Math.exp(-lambda_val)
21
+ s = l
22
+ while r > s
23
+ x += 1
24
+ l *= lambda_val / x.to_f
25
+ s += l
26
+ end
27
+ x
28
+ end
29
+
30
+ def pdf(k, l)
31
+ (l**k * Math.exp(-l)).quo(Math.factorial(k))
7
32
  end
8
- def cdf(k,l)
9
- Math.exp(-l)*(0..k).inject(0) {|ac,i| ac+ (l**i).quo(Math.factorial(i))}
33
+
34
+ def cdf(k, l)
35
+ Math.exp(-l) * (0..k).inject(0) { |ac, i| ac + (l**i).quo(Math.factorial(i)) }
10
36
  end
11
- def p_value(prob,l)
12
- ac=0
37
+
38
+ def quantile(prob, l)
39
+ ac = 0
13
40
  (0..100).each do |i|
14
- ac+=pdf(i,l)
15
- return i if prob<=ac
41
+ ac += pdf(i, l)
42
+ return i if prob <= ac
16
43
  end
17
44
  end
45
+
46
+ alias_method :p_value, :quantile
18
47
  end
19
48
  end
20
49
  end
21
- end
50
+ end
@@ -0,0 +1,17 @@
1
+ module Distribution
2
+ module Shorthand
3
+ EQUIVALENCES = { p_value: :p, cdf: :cdf, pdf: :pdf, rng: :r,
4
+ exact_pdf: :epdf, exact_cdf: :ecdf, exact_p_value: :ep }
5
+
6
+ def self.add_shortcut(shortcut, method, &block)
7
+ if EQUIVALENCES.include? method.to_sym
8
+ name = shortcut + "_#{method}"
9
+ define_method(name, &block)
10
+
11
+ name = shortcut + "_#{EQUIVALENCES[method.to_sym]}"
12
+ define_method(name, &block)
13
+
14
+ end
15
+ end
16
+ end
17
+ end
@@ -6,26 +6,24 @@ require 'distribution/t/gsl'
6
6
  require 'distribution/t/java'
7
7
 
8
8
  module Distribution
9
+ # Calculate statisticals for T Distribution.
10
+ module T
11
+ SHORTHAND = 'tdist'
12
+ extend Distributable
13
+ create_distribution_methods
9
14
 
10
- # Calculate statisticals for T Distribution.
11
- module T
12
- SHORTHAND='tdist'
13
- extend Distributable
14
- create_distribution_methods
15
-
16
15
  ##
17
16
  # :singleton-method: pdf(x,k)
18
- # Returns the integral of T distribution
19
- # with +k+ degrees of freedom over [0, +x+]
20
-
17
+ # Returns the integral of T distribution
18
+ # with +k+ degrees of freedom over [0, +x+]
21
19
  ##
22
20
  # :singleton-method: p_value(qn, k)
23
- # Return the P-value of the corresponding integral +qn+ with
24
- # +k+ degrees of freedom
25
-
21
+ # Return the P-value of the corresponding integral +qn+ with
22
+ # +k+ degrees of freedom
23
+
26
24
  ##
27
25
  # :singleton-method: cdf(x,k)
28
- # Returns the integral of T distribution
29
- # with +k+ degrees of freedom over [0, +x+]
30
- end
26
+ # Returns the integral of T distribution
27
+ # with +k+ degrees of freedom over [0, +x+]
28
+ end
31
29
  end
@@ -1,29 +1,32 @@
1
1
  module Distribution
2
2
  module T
3
- module GSL_
4
- class << self
5
-
6
- def pdf(x,k)
7
- GSL::Ran.tdist_pdf(x,k)
8
- end
9
- # Return the P-value of the corresponding integral with
10
- # k degrees of freedom
11
- #
12
- # Distribution::F.p_value(0.95,1,2)
13
- def p_value(pr,k)
14
- GSL::Cdf.tdist_Pinv(pr,k)
15
- end
16
- # F cumulative distribution function (cdf).
17
- #
18
- # Returns the integral of F-distribution
19
- # with k1 and k2 degrees of freedom
20
- # over [0, x].
21
- # Distribution::F.cdf(20,3,2)
22
- #
23
- def cdf(x, k)
24
- GSL::Cdf.tdist_P(x.to_f, k)
3
+ module GSL_
4
+ class << self
5
+ def pdf(x, k)
6
+ GSL::Ran.tdist_pdf(x, k)
7
+ end
8
+
9
+ # F cumulative distribution function (cdf).
10
+ #
11
+ # Returns the integral of F-distribution
12
+ # with k1 and k2 degrees of freedom
13
+ # over [0, x].
14
+ # Distribution::F.cdf(20,3,2)
15
+ #
16
+ def cdf(x, k)
17
+ GSL::Cdf.tdist_P(x.to_f, k)
18
+ end
19
+
20
+ # Return the P-value of the corresponding integral with
21
+ # k degrees of freedom
22
+ #
23
+ # Distribution::F.p_value(0.95,1,2)
24
+ def quantile(pr, k)
25
+ GSL::Cdf.tdist_Pinv(pr, k)
26
+ end
27
+
28
+ alias_method :p_value, :quantile
25
29
  end
26
30
  end
27
31
  end
28
- end
29
- end
32
+ end
@@ -6,4 +6,4 @@ module Distribution
6
6
  end
7
7
  end
8
8
  end
9
- end
9
+ end
@@ -1,111 +1,110 @@
1
1
  module Distribution
2
2
  module T
3
- module Ruby_
4
- class << self
5
- def pdf(t,v)
6
- ((Math.gamma((v+1) / 2.0)) / (Math.sqrt(v*Math::PI)*Math.gamma(v/2.0))) * ((1+(t**2 / v.to_f))**(-(v+1) / 2.0))
7
-
8
- end
9
- # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
- def cdf(t, n)
11
- p_t(n, t)
12
- end
13
-
14
- # t-distribution ([1])
15
- # (-\infty, x]
16
- def p_t(df, t)
17
- if df.to_i!=df
18
- x=(t+Math.sqrt(t**2+df)) / (2*Math.sqrt(t**2+df))
19
- return Math.regularized_beta(x,df/2.0,df/2.0)
3
+ module Ruby_
4
+ class << self
5
+ def pdf(t, v)
6
+ ((Math.gamma((v + 1) / 2.0)) / (Math.sqrt(v * Math::PI) * Math.gamma(v / 2.0))) * ((1 + (t**2 / v.to_f))**(-(v + 1) / 2.0))
20
7
  end
21
- df=df.to_i
22
- c2 = df.to_f / (df + t * t);
23
- s = Math.sqrt(1.0 - c2)
24
- s = -s if t < 0.0
25
- p = 0.0;
26
- i = df % 2 + 2
27
- while i <= df
28
- p += s
29
- s *= (i - 1) * c2 / i
30
- i += 2
8
+
9
+ # Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
10
+ def cdf(t, n)
11
+ p_t(n, t)
31
12
  end
32
-
33
- if df.is_a? Float or df & 1 != 0
34
- 0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df))) / Math::PI
35
- else
36
- (1.0 + p) / 2.0
13
+
14
+ # t-distribution ([1])
15
+ # (-\infty, x]
16
+ def p_t(df, t)
17
+ if df.to_i != df
18
+ x = (t + Math.sqrt(t**2 + df)) / (2 * Math.sqrt(t**2 + df))
19
+ return Math.regularized_beta(x, df / 2.0, df / 2.0)
20
+ end
21
+ df = df.to_i
22
+ c2 = df.to_f / (df + t * t)
23
+ s = Math.sqrt(1.0 - c2)
24
+ s = -s if t < 0.0
25
+ p = 0.0
26
+ i = df % 2 + 2
27
+ while i <= df
28
+ p += s
29
+ s *= (i - 1) * c2 / i
30
+ i += 2
31
+ end
32
+
33
+ if df.is_a?(Float) || df & 1 != 0
34
+ 0.5 + (p * Math.sqrt(c2) + Math.atan(t / Math.sqrt(df))) / Math::PI
35
+ else
36
+ (1.0 + p) / 2.0
37
+ end
37
38
  end
38
- end
39
-
40
-
41
- # inverse of t-distribution ([2])
42
- # (-\infty, -q/2] + [q/2, \infty)
43
- def ptsub(q, n)
44
- q = q.to_f
45
- if(n == 1 && 0.001 < q && q < 0.01)
46
- eps = 1.0e-4
47
- elsif (n == 2 && q < 0.0001)
48
- eps = 1.0e-4
49
- elsif (n == 1 && q < 0.001)
50
- eps = 1.0e-2
51
- else
52
- eps = 1.0e-5
53
- end
54
- s = 10000.0
55
- w = 0.0
56
- loop do
57
- w += s
58
- if(s <= eps) then return w end
59
- if((qe = 2.0 - p_t(n, w)*2.0 - q) == 0.0) then return w end
60
- if(qe < 0.0)
61
- w -= s
62
- s /= 10.0 #/
63
- end
64
- end
65
- end
66
-
67
- def pt(q, n)
68
- q = q.to_f
69
- if(q < 1.0e-5 || q > 1.0 || n < 1)
70
- $stderr.printf("Error : Illegal parameter in pt()!\n")
71
- return 0.0
39
+
40
+ # inverse of t-distribution ([2])
41
+ # (-\infty, -q/2] + [q/2, \infty)
42
+ def ptsub(q, n)
43
+ q = q.to_f
44
+ if n == 1 && 0.001 < q && q < 0.01
45
+ eps = 1.0e-4
46
+ elsif n == 2 && q < 0.0001
47
+ eps = 1.0e-4
48
+ elsif n == 1 && q < 0.001
49
+ eps = 1.0e-2
50
+ else
51
+ eps = 1.0e-5
52
+ end
53
+ s = 10_000.0
54
+ w = 0.0
55
+ loop do
56
+ w += s
57
+ return w if (s <= eps)
58
+ if ((qe = 2.0 - p_t(n, w) * 2.0 - q) == 0.0) then return w end
59
+ if qe < 0.0
60
+ w -= s
61
+ s /= 10.0 # /
62
+ end
63
+ end
72
64
  end
73
-
74
- if(n <= 5) then return ptsub(q, n) end
75
- if(q <= 5.0e-3 && n <= 13) then return ptsub(q, n) end
76
-
77
- f1 = 4.0 * (f = n.to_f)
78
- f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
79
- f2 *= 96.0
80
- f3 *= 384.0
81
- f4 *= 92160.0
82
- f5 *= 368640.0
83
- u = Normal.p_value(1.0 - q / 2.0)
84
-
85
- w0 = (u2 = u * u) * u
86
- w1 = w0 * u2
87
- w2 = w1 * u2
88
- w3 = w2 * u2
89
- w4 = w3 * u2
90
- w = (w0 + u) / f1
91
- w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
92
- w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
93
- w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
94
- w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17955.0 * u) / f5
95
- u + w
96
- end
97
-
98
- # Returns the P-value of tdist().
99
- def p_value(y,n)
100
- if y > 0.5
101
- pt(2.0 - y*2.0, n)
102
- else
103
- - pt(y*2.0, n)
65
+
66
+ def pt(q, n)
67
+ q = q.to_f
68
+ if q < 1.0e-5 || q > 1.0 || n < 1
69
+ $stderr.printf("Error : Illegal parameter in pt()!\n")
70
+ return 0.0
71
+ end
72
+
73
+ return ptsub(q, n) if (n <= 5)
74
+ return ptsub(q, n) if q <= 5.0e-3 && n <= 13
75
+
76
+ f1 = 4.0 * (f = n.to_f)
77
+ f5 = (f4 = (f3 = (f2 = f * f) * f) * f) * f
78
+ f2 *= 96.0
79
+ f3 *= 384.0
80
+ f4 *= 92_160.0
81
+ f5 *= 368_640.0
82
+ u = Normal.p_value(1.0 - q / 2.0)
83
+
84
+ w0 = (u2 = u * u) * u
85
+ w1 = w0 * u2
86
+ w2 = w1 * u2
87
+ w3 = w2 * u2
88
+ w4 = w3 * u2
89
+ w = (w0 + u) / f1
90
+ w += (5.0 * w1 + 16.0 * w0 + 3.0 * u) / f2
91
+ w += (3.0 * w2 + 19.0 * w1 + 17.0 * w0 - 15.0 * u) / f3
92
+ w += (79.0 * w3 + 776.0 * w2 + 1482.0 * w1 - 1920.0 * w0 - 9450.0 * u) / f4
93
+ w += (27.0 * w4 + 339.0 * w3 + 930.0 * w2 - 1782.0 * w1 - 765.0 * w0 + 17_955.0 * u) / f5
94
+ u + w
104
95
  end
96
+
97
+ # Returns the P-value of tdist().
98
+ def quantile(y, n)
99
+ if y > 0.5
100
+ pt(2.0 - y * 2.0, n)
101
+ else
102
+ - pt(y * 2.0, n)
103
+ end
104
+ end
105
+
106
+ alias_method :p_value, :quantile
105
107
  end
106
-
107
-
108
108
  end
109
109
  end
110
110
  end
111
- end