statsample 0.18.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
|
@@ -1,284 +0,0 @@
|
|
|
1
|
-
module Distribution
|
|
2
|
-
# Calculate pdf and cdf for bivariate normal distribution.
|
|
3
|
-
#
|
|
4
|
-
# Pdf if easy to calculate, but CDF is not trivial. Several papers
|
|
5
|
-
# describe methods to calculate the integral.
|
|
6
|
-
#
|
|
7
|
-
# Three methods are implemented on this module:
|
|
8
|
-
# * Genz:: Used by default, with improvement to calculate p on rho > 0.95
|
|
9
|
-
# * Hull:: Port from a C++ code
|
|
10
|
-
# * Jantaravareerat:: Iterative (and slow)
|
|
11
|
-
#
|
|
12
|
-
|
|
13
|
-
module NormalBivariate
|
|
14
|
-
|
|
15
|
-
class << self
|
|
16
|
-
SIDE=0.1 # :nodoc:
|
|
17
|
-
LIMIT=5 # :nodoc:
|
|
18
|
-
# Return the partial derivative of cdf over x, with y and rho constant
|
|
19
|
-
# Reference:
|
|
20
|
-
# * Tallis, 1962, p.346, cited by Olsson, 1979
|
|
21
|
-
def partial_derivative_cdf_x(x,y,rho)
|
|
22
|
-
Distribution::Normal.pdf(x) * Distribution::Normal.cdf((y-rho*x).quo( Math::sqrt( 1 - rho**2 )))
|
|
23
|
-
end
|
|
24
|
-
alias :pd_cdf_x :partial_derivative_cdf_x
|
|
25
|
-
# Probability density function for a given x, y and rho value.
|
|
26
|
-
#
|
|
27
|
-
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
|
28
|
-
def pdf(x,y, rho, s1=1.0, s2=1.0)
|
|
29
|
-
1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
|
|
30
|
-
((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
def f(x,y,aprime,bprime,rho)
|
|
34
|
-
r=aprime*(2*x-aprime)+bprime*(2*y-bprime)+2*rho*(x-aprime)*(y-bprime)
|
|
35
|
-
Math::exp(r)
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# CDF for a given x, y and rho value.
|
|
39
|
-
# Uses Genz algorithm (cdf_genz method).
|
|
40
|
-
#
|
|
41
|
-
def cdf(a,b,rho)
|
|
42
|
-
cdf_genz(a,b,rho)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
def sgn(x)
|
|
46
|
-
if(x>=0)
|
|
47
|
-
1
|
|
48
|
-
else
|
|
49
|
-
-1
|
|
50
|
-
end
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
|
54
|
-
# Based on Hull (1993, cited by Arne, 2003)
|
|
55
|
-
#
|
|
56
|
-
# References:
|
|
57
|
-
# * Arne, B.(2003). Financial Numerical Recipes in C ++. Available on http://finance.bi.no/~bernt/gcc_prog/recipes/recipes/node23.html
|
|
58
|
-
def cdf_hull(a,b,rho)
|
|
59
|
-
#puts "a:#{a} - b:#{b} - rho:#{rho}"
|
|
60
|
-
if (a<=0 and b<=0 and rho<=0)
|
|
61
|
-
# puts "ruta 1"
|
|
62
|
-
aprime=a.quo(Math::sqrt(2.0*(1.0-rho**2)))
|
|
63
|
-
bprime=b.quo(Math::sqrt(2.0*(1.0-rho**2)))
|
|
64
|
-
aa=[0.3253030, 0.4211071, 0.1334425, 0.006374323]
|
|
65
|
-
bb=[0.1337764, 0.6243247, 1.3425378, 2.2626645]
|
|
66
|
-
sum=0
|
|
67
|
-
4.times do |i|
|
|
68
|
-
4.times do |j|
|
|
69
|
-
sum+=aa[i]*aa[j] * f(bb[i], bb[j], aprime, bprime,rho)
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
sum=sum*(Math::sqrt(1.0-rho**2).quo(Math::PI))
|
|
73
|
-
return sum
|
|
74
|
-
elsif(a*b*rho<=0.0)
|
|
75
|
-
|
|
76
|
-
#puts "ruta 2"
|
|
77
|
-
if(a<=0 and b>=0 and rho>=0)
|
|
78
|
-
return Distribution::Normal.cdf(a) - cdf(a,-b,-rho)
|
|
79
|
-
elsif (a>=0.0 and b<=0.0 and rho>=0)
|
|
80
|
-
return Distribution::Normal.cdf(b) - cdf(-a,b,-rho)
|
|
81
|
-
elsif (a>=0.0 and b>=0.0 and rho<=0)
|
|
82
|
-
return Distribution::Normal.cdf(a) + Distribution::Normal.cdf(b) - 1.0 + cdf(-a,-b,rho)
|
|
83
|
-
end
|
|
84
|
-
elsif (a*b*rho>=0.0)
|
|
85
|
-
#puts "ruta 3"
|
|
86
|
-
denum=Math::sqrt(a**2 - 2*rho*a*b + b**2)
|
|
87
|
-
rho1=((rho*a-b)*sgn(a)).quo(denum)
|
|
88
|
-
rho2=((rho*b-a)*sgn(b)).quo(denum)
|
|
89
|
-
delta=(1.0-sgn(a)*sgn(b)).quo(4)
|
|
90
|
-
#puts "#{rho1} - #{rho2}"
|
|
91
|
-
return cdf(a, 0.0, rho1) + cdf(b, 0.0, rho2) - delta
|
|
92
|
-
end
|
|
93
|
-
raise "Should'nt be here! #{a} - #{b} #{rho}"
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
# CDF. Iterative method by Jantaravareerat (n/d)
|
|
97
|
-
#
|
|
98
|
-
# Reference:
|
|
99
|
-
# * Jantaravareerat, M. & Thomopoulos, N. (n/d). Tables for standard bivariate normal distribution
|
|
100
|
-
|
|
101
|
-
def cdf_jantaravareerat(x,y,rho,s1=1,s2=1)
|
|
102
|
-
# Special cases
|
|
103
|
-
return 1 if x>LIMIT and y>LIMIT
|
|
104
|
-
return 0 if x<-LIMIT or y<-LIMIT
|
|
105
|
-
return Distribution::Normal.cdf(y) if x>LIMIT
|
|
106
|
-
return Distribution::Normal.cdf(x) if y>LIMIT
|
|
107
|
-
|
|
108
|
-
#puts "x:#{x} - y:#{y}"
|
|
109
|
-
x=-LIMIT if x<-LIMIT
|
|
110
|
-
x=LIMIT if x>LIMIT
|
|
111
|
-
y=-LIMIT if y<-LIMIT
|
|
112
|
-
y=LIMIT if y>LIMIT
|
|
113
|
-
|
|
114
|
-
x_squares=((LIMIT+x) / SIDE).to_i
|
|
115
|
-
y_squares=((LIMIT+y) / SIDE).to_i
|
|
116
|
-
sum=0
|
|
117
|
-
x_squares.times do |i|
|
|
118
|
-
y_squares.times do |j|
|
|
119
|
-
z1=-LIMIT+(i+1)*SIDE
|
|
120
|
-
z2=-LIMIT+(j+1)*SIDE
|
|
121
|
-
#puts " #{z1}-#{z2}"
|
|
122
|
-
h=(pdf(z1,z2,rho,s1,s2)+pdf(z1-SIDE,z2,rho,s1,s2)+pdf(z1,z2-SIDE,rho,s1,s2) + pdf(z1-SIDE,z2-SIDE,rho,s1,s2)).quo(4)
|
|
123
|
-
sum+= (SIDE**2)*h # area
|
|
124
|
-
end
|
|
125
|
-
end
|
|
126
|
-
sum
|
|
127
|
-
end
|
|
128
|
-
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
|
129
|
-
# Ported from Fortran code by Alan Genz
|
|
130
|
-
#
|
|
131
|
-
# Original documentation
|
|
132
|
-
# DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
|
|
133
|
-
# A function for computing bivariate normal probabilities.
|
|
134
|
-
#
|
|
135
|
-
# Alan Genz
|
|
136
|
-
# Department of Mathematics
|
|
137
|
-
# Washington State University
|
|
138
|
-
# Pullman, WA 99164-3113
|
|
139
|
-
# Email : alangenz_AT_wsu.edu
|
|
140
|
-
#
|
|
141
|
-
# This function is based on the method described by
|
|
142
|
-
# Drezner, Z and G.O. Wesolowsky, (1989),
|
|
143
|
-
# On the computation of the bivariate normal integral,
|
|
144
|
-
# Journal of Statist. Comput. Simul. 35, pp. 101-107,
|
|
145
|
-
# with major modifications for double precision, and for |R| close to 1.
|
|
146
|
-
#
|
|
147
|
-
# Original location:
|
|
148
|
-
# * http://www.math.wsu.edu/faculty/genz/software/fort77/tvpack.f
|
|
149
|
-
def cdf_genz(x,y,rho)
|
|
150
|
-
dh=-x
|
|
151
|
-
dk=-y
|
|
152
|
-
r=rho
|
|
153
|
-
twopi = 6.283185307179586
|
|
154
|
-
|
|
155
|
-
w=11.times.collect {[nil]*4};
|
|
156
|
-
x=11.times.collect {[nil]*4}
|
|
157
|
-
|
|
158
|
-
data=[
|
|
159
|
-
0.1713244923791705E+00, -0.9324695142031522E+00,
|
|
160
|
-
0.3607615730481384E+00, -0.6612093864662647E+00,
|
|
161
|
-
0.4679139345726904E+00, -0.2386191860831970E+00]
|
|
162
|
-
|
|
163
|
-
(1..3).each {|i|
|
|
164
|
-
w[i][1]=data[(i-1)*2]
|
|
165
|
-
x[i][1]=data[(i-1)*2+1]
|
|
166
|
-
|
|
167
|
-
}
|
|
168
|
-
data=[
|
|
169
|
-
0.4717533638651177E-01,-0.9815606342467191E+00,
|
|
170
|
-
0.1069393259953183E+00,-0.9041172563704750E+00,
|
|
171
|
-
0.1600783285433464E+00,-0.7699026741943050E+00,
|
|
172
|
-
0.2031674267230659E+00,-0.5873179542866171E+00,
|
|
173
|
-
0.2334925365383547E+00,-0.3678314989981802E+00,
|
|
174
|
-
0.2491470458134029E+00,-0.1252334085114692E+00]
|
|
175
|
-
(1..6).each {|i|
|
|
176
|
-
w[i][2]=data[(i-1)*2]
|
|
177
|
-
x[i][2]=data[(i-1)*2+1]
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
}
|
|
181
|
-
data=[
|
|
182
|
-
0.1761400713915212E-01,-0.9931285991850949E+00,
|
|
183
|
-
0.4060142980038694E-01,-0.9639719272779138E+00,
|
|
184
|
-
0.6267204833410906E-01,-0.9122344282513259E+00,
|
|
185
|
-
0.8327674157670475E-01,-0.8391169718222188E+00,
|
|
186
|
-
0.1019301198172404E+00,-0.7463319064601508E+00,
|
|
187
|
-
0.1181945319615184E+00,-0.6360536807265150E+00,
|
|
188
|
-
0.1316886384491766E+00,-0.5108670019508271E+00,
|
|
189
|
-
0.1420961093183821E+00,-0.3737060887154196E+00,
|
|
190
|
-
0.1491729864726037E+00,-0.2277858511416451E+00,
|
|
191
|
-
0.1527533871307259E+00,-0.7652652113349733E-01]
|
|
192
|
-
|
|
193
|
-
(1..10).each {|i|
|
|
194
|
-
w[i][3]=data[(i-1)*2]
|
|
195
|
-
x[i][3]=data[(i-1)*2+1]
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
if ( r.abs < 0.3 )
|
|
202
|
-
ng = 1
|
|
203
|
-
lg = 3
|
|
204
|
-
elsif ( r.abs < 0.75 )
|
|
205
|
-
ng = 2
|
|
206
|
-
lg = 6
|
|
207
|
-
else
|
|
208
|
-
ng = 3
|
|
209
|
-
lg = 10
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
h = dh
|
|
214
|
-
k = dk
|
|
215
|
-
hk = h*k
|
|
216
|
-
bvn = 0
|
|
217
|
-
if ( r.abs < 0.925 )
|
|
218
|
-
if ( r.abs > 0 )
|
|
219
|
-
hs = ( h*h + k*k ).quo(2)
|
|
220
|
-
asr = Math::asin(r)
|
|
221
|
-
(1..lg).each do |i|
|
|
222
|
-
[-1,1].each do |is|
|
|
223
|
-
sn = Math::sin(asr*(is* x[i][ng]+1).quo(2) )
|
|
224
|
-
bvn = bvn + w[i][ng] * Math::exp( ( sn*hk-hs ).quo( 1-sn*sn ) )
|
|
225
|
-
end # do
|
|
226
|
-
end # do
|
|
227
|
-
bvn = bvn*asr.quo( 2*twopi )
|
|
228
|
-
end # if
|
|
229
|
-
bvn = bvn + Distribution::Normal.cdf(-h) * Distribution::Normal.cdf(-k)
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
else # r.abs
|
|
233
|
-
if ( r < 0 )
|
|
234
|
-
k = -k
|
|
235
|
-
hk = -hk
|
|
236
|
-
end
|
|
237
|
-
|
|
238
|
-
if ( r.abs < 1 )
|
|
239
|
-
as = ( 1 - r )*( 1 + r )
|
|
240
|
-
a = Math::sqrt(as)
|
|
241
|
-
bs = ( h - k )**2
|
|
242
|
-
c = ( 4 - hk ).quo(8)
|
|
243
|
-
d = ( 12 - hk ).quo(16)
|
|
244
|
-
asr = -( bs.quo(as) + hk ).quo(2)
|
|
245
|
-
if ( asr > -100 )
|
|
246
|
-
bvn = a*Math::exp(asr) * ( 1 - c*( bs - as )*( 1 - d*bs.quo(5) ).quo(3) + c*d*as*as.quo(5) )
|
|
247
|
-
end
|
|
248
|
-
if ( -hk < 100 )
|
|
249
|
-
b = Math::sqrt(bs)
|
|
250
|
-
bvn = bvn - Math::exp( -hk.quo(2) ) * Math::sqrt(twopi)*Distribution::Normal.cdf(-b.quo(a))*b *
|
|
251
|
-
( 1 - c*bs*( 1 - d*bs.quo(5) ).quo(3) )
|
|
252
|
-
end
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
a = a.quo(2)
|
|
256
|
-
(1..lg).each do |i|
|
|
257
|
-
[-1,1].each do |is|
|
|
258
|
-
xs = (a*( is*x[i][ng] + 1 ) )**2
|
|
259
|
-
rs = Math::sqrt( 1 - xs )
|
|
260
|
-
asr = -( bs/xs + hk ).quo(2)
|
|
261
|
-
if ( asr > -100 )
|
|
262
|
-
bvn = bvn + a*w[i][ng] * Math::exp( asr ) *
|
|
263
|
-
( Math::exp( -hk*( 1 - rs ).quo(2*( 1 + rs ) ) ) .quo(rs) - ( 1 + c*xs*( 1 + d*xs ) ) )
|
|
264
|
-
end
|
|
265
|
-
end
|
|
266
|
-
end
|
|
267
|
-
bvn = -bvn/twopi
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
if ( r > 0 )
|
|
271
|
-
bvn = bvn + Distribution::Normal.cdf(-[h,k].max)
|
|
272
|
-
else
|
|
273
|
-
bvn = -bvn
|
|
274
|
-
if ( k > h )
|
|
275
|
-
bvn = bvn + Distribution::Normal.cdf(k) - Distribution::Normal.cdf(h)
|
|
276
|
-
end
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
bvn
|
|
280
|
-
end
|
|
281
|
-
private :f, :sgn
|
|
282
|
-
end
|
|
283
|
-
end
|
|
284
|
-
end
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
module Distribution
|
|
2
|
-
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
|
3
|
-
module NormalMultivariate
|
|
4
|
-
class << self
|
|
5
|
-
# Returns multivariate cdf distribution
|
|
6
|
-
# * a is the array of lower values
|
|
7
|
-
# * b is the array of higher values
|
|
8
|
-
# * s is an symmetric positive definite covariance matrix
|
|
9
|
-
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
|
10
|
-
raise "Doesn't work yet"
|
|
11
|
-
a=[nil]+aa
|
|
12
|
-
b=[nil]+bb
|
|
13
|
-
m=aa.size
|
|
14
|
-
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
|
15
|
-
|
|
16
|
-
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
|
17
|
-
c=cc.lower
|
|
18
|
-
intsum=0
|
|
19
|
-
varsum=0
|
|
20
|
-
n=0
|
|
21
|
-
d=Array.new(m+1,nil)
|
|
22
|
-
e=Array.new(m+1,nil)
|
|
23
|
-
f=Array.new(m+1,nil)
|
|
24
|
-
(1..m).each {|i|
|
|
25
|
-
d[i]=0.0 if a[i].nil?
|
|
26
|
-
e[i]=1.0 if b[i].nil?
|
|
27
|
-
}
|
|
28
|
-
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
|
29
|
-
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
|
30
|
-
f[1]=e[1]-d[1]
|
|
31
|
-
|
|
32
|
-
error=1000
|
|
33
|
-
begin
|
|
34
|
-
w=(m+1).times.collect {|i| rand*epsilon}
|
|
35
|
-
y=[]
|
|
36
|
-
(2..m).each do |i|
|
|
37
|
-
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
|
38
|
-
sumc=0
|
|
39
|
-
(1..(i-1)).each do |j|
|
|
40
|
-
sumc+=c[i-1, j-1]*y[j]
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
if a[i]!=nil
|
|
44
|
-
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
|
45
|
-
end
|
|
46
|
-
# puts "sumc:#{sumc}"
|
|
47
|
-
|
|
48
|
-
if b[i]!=nil
|
|
49
|
-
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
|
50
|
-
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
|
51
|
-
end
|
|
52
|
-
f[i]=(e[i]-d[i])*f[i-1]
|
|
53
|
-
end
|
|
54
|
-
intsum+=intsum+f[m]
|
|
55
|
-
varsum=varsum+f[m]**2
|
|
56
|
-
n+=1
|
|
57
|
-
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
|
58
|
-
end while(error>epsilon and n<max_iterations)
|
|
59
|
-
|
|
60
|
-
f=intsum.quo(n)
|
|
61
|
-
#p intsum
|
|
62
|
-
#puts "f:#{f}, n:#{n}, error:#{error}"
|
|
63
|
-
f
|
|
64
|
-
end
|
|
65
|
-
def iPhi(pr)
|
|
66
|
-
Distribution::Normal.p_value(pr)
|
|
67
|
-
end
|
|
68
|
-
def uPhi(x)
|
|
69
|
-
Distribution::Normal.cdf(x)
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
data/lib/distribution/t.rb
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
require 'rbconfig'
|
|
2
|
-
module Distribution
|
|
3
|
-
|
|
4
|
-
# Calculate cdf and inverse cdf for T Distribution.
|
|
5
|
-
# Uses Statistics2 Module.
|
|
6
|
-
module T
|
|
7
|
-
class << self
|
|
8
|
-
# Return the P-value of the corresponding integral with
|
|
9
|
-
# k degrees of freedom
|
|
10
|
-
def p_value(pr,k)
|
|
11
|
-
Statistics2.ptdist(k, pr)
|
|
12
|
-
end
|
|
13
|
-
# T cumulative distribution function (cdf).
|
|
14
|
-
#
|
|
15
|
-
# Returns the integral of t-distribution
|
|
16
|
-
# with n degrees of freedom over (-Infty, x].
|
|
17
|
-
#
|
|
18
|
-
def cdf(x,k)
|
|
19
|
-
if RbConfig::CONFIG['arch']=~/i686/
|
|
20
|
-
tdist(k, x)
|
|
21
|
-
else
|
|
22
|
-
Statistics2.tdist(k,x)
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
|
27
|
-
def tdist(n, t)
|
|
28
|
-
p_t(n, t)
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# t-distribution ([1])
|
|
32
|
-
# (-\infty, x]
|
|
33
|
-
def p_t(df, t)
|
|
34
|
-
c2 = df.to_f / (df + t * t);
|
|
35
|
-
s = Math.sqrt(1.0 - c2)
|
|
36
|
-
s = -s if t < 0.0
|
|
37
|
-
p = 0.0;
|
|
38
|
-
i = df % 2 + 2
|
|
39
|
-
while i <= df
|
|
40
|
-
p += s
|
|
41
|
-
s *= (i - 1) * c2 / i
|
|
42
|
-
i += 2
|
|
43
|
-
end
|
|
44
|
-
if df.is_a? Float or df & 1 != 0
|
|
45
|
-
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
|
|
46
|
-
else
|
|
47
|
-
(1.0 + p) / 2.0
|
|
48
|
-
end
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
end
|
|
54
|
-
end
|
|
55
|
-
end
|
data/test/test_distribution.rb
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
|
2
|
-
|
|
3
|
-
require 'distribution'
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class DistributionTestCase < MiniTest::Unit::TestCase
|
|
7
|
-
def test_chi
|
|
8
|
-
if Distribution.has_gsl?
|
|
9
|
-
[2,3,4,5].each{|k|
|
|
10
|
-
chis=rand()*10
|
|
11
|
-
area=Distribution::ChiSquare.cdf(chis, k)
|
|
12
|
-
assert_in_delta(area, GSL::Cdf.chisq_P(chis,k),0.0001)
|
|
13
|
-
assert_in_delta(chis, Distribution::ChiSquare.p_value(area,k),0.0001,"Error on prob #{area} and k #{k}")
|
|
14
|
-
}
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
def test_t
|
|
18
|
-
if Distribution.has_gsl?
|
|
19
|
-
[-2,0.1,0.5,1,2].each{|t|
|
|
20
|
-
[2,5,10].each{|n|
|
|
21
|
-
area=Distribution::T.cdf(t,n)
|
|
22
|
-
assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
|
|
23
|
-
assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
|
|
24
|
-
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
end
|
|
28
|
-
end
|
|
29
|
-
def test_normal
|
|
30
|
-
if Distribution.has_gsl?
|
|
31
|
-
[-2,0.1,0.5,1,2].each{|x|
|
|
32
|
-
area=Distribution::Normal.cdf(x)
|
|
33
|
-
assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
|
|
34
|
-
assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
|
|
35
|
-
assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
|
|
36
|
-
}
|
|
37
|
-
end
|
|
38
|
-
end
|
|
39
|
-
def test_normal_bivariate
|
|
40
|
-
if Distribution.has_gsl?
|
|
41
|
-
[0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
|
|
42
|
-
assert_in_delta(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1),1e-8)
|
|
43
|
-
|
|
44
|
-
}
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
[-3,-2,-1,0,1,1.5].each {|x|
|
|
48
|
-
assert_in_delta(Distribution::NormalBivariate.cdf_hull(x,x,0.5), Distribution::NormalBivariate.cdf_genz(x,x,0.5), 0.001)
|
|
49
|
-
#assert_in_delta(Distribution::NormalBivariate.cdf_genz(x,x,0.5), Distribution::NormalBivariate.cdf_jantaravareerat(x,x,0.5), 0.001)
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
|
|
53
|
-
assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
|
|
54
|
-
assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
|
|
55
|
-
|
|
56
|
-
assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
|
|
57
|
-
end
|
|
58
|
-
def test_f
|
|
59
|
-
if Distribution.has_gsl?
|
|
60
|
-
[0.1,0.5,1,2,10,20,30].each{|f|
|
|
61
|
-
[2,5,10].each{|n2|
|
|
62
|
-
[2,5,10].each{|n1|
|
|
63
|
-
area=Distribution::F.cdf(f,n1,n2)
|
|
64
|
-
assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
|
|
65
|
-
assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
|
|
66
|
-
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
end
|