statsample 0.18.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
@@ -1,284 +0,0 @@
|
|
1
|
-
module Distribution
|
2
|
-
# Calculate pdf and cdf for bivariate normal distribution.
|
3
|
-
#
|
4
|
-
# Pdf if easy to calculate, but CDF is not trivial. Several papers
|
5
|
-
# describe methods to calculate the integral.
|
6
|
-
#
|
7
|
-
# Three methods are implemented on this module:
|
8
|
-
# * Genz:: Used by default, with improvement to calculate p on rho > 0.95
|
9
|
-
# * Hull:: Port from a C++ code
|
10
|
-
# * Jantaravareerat:: Iterative (and slow)
|
11
|
-
#
|
12
|
-
|
13
|
-
module NormalBivariate
|
14
|
-
|
15
|
-
class << self
|
16
|
-
SIDE=0.1 # :nodoc:
|
17
|
-
LIMIT=5 # :nodoc:
|
18
|
-
# Return the partial derivative of cdf over x, with y and rho constant
|
19
|
-
# Reference:
|
20
|
-
# * Tallis, 1962, p.346, cited by Olsson, 1979
|
21
|
-
def partial_derivative_cdf_x(x,y,rho)
|
22
|
-
Distribution::Normal.pdf(x) * Distribution::Normal.cdf((y-rho*x).quo( Math::sqrt( 1 - rho**2 )))
|
23
|
-
end
|
24
|
-
alias :pd_cdf_x :partial_derivative_cdf_x
|
25
|
-
# Probability density function for a given x, y and rho value.
|
26
|
-
#
|
27
|
-
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
28
|
-
def pdf(x,y, rho, s1=1.0, s2=1.0)
|
29
|
-
1.quo(2 * Math::PI * s1 * s2 * Math::sqrt( 1 - rho**2 )) * (Math::exp(-(1.quo(2*(1-rho**2))) *
|
30
|
-
((x**2.quo(s1)) + (y**2.quo(s2)) - (2*rho*x*y).quo(s1*s2))))
|
31
|
-
end
|
32
|
-
|
33
|
-
def f(x,y,aprime,bprime,rho)
|
34
|
-
r=aprime*(2*x-aprime)+bprime*(2*y-bprime)+2*rho*(x-aprime)*(y-bprime)
|
35
|
-
Math::exp(r)
|
36
|
-
end
|
37
|
-
|
38
|
-
# CDF for a given x, y and rho value.
|
39
|
-
# Uses Genz algorithm (cdf_genz method).
|
40
|
-
#
|
41
|
-
def cdf(a,b,rho)
|
42
|
-
cdf_genz(a,b,rho)
|
43
|
-
end
|
44
|
-
|
45
|
-
def sgn(x)
|
46
|
-
if(x>=0)
|
47
|
-
1
|
48
|
-
else
|
49
|
-
-1
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
54
|
-
# Based on Hull (1993, cited by Arne, 2003)
|
55
|
-
#
|
56
|
-
# References:
|
57
|
-
# * Arne, B.(2003). Financial Numerical Recipes in C ++. Available on http://finance.bi.no/~bernt/gcc_prog/recipes/recipes/node23.html
|
58
|
-
def cdf_hull(a,b,rho)
|
59
|
-
#puts "a:#{a} - b:#{b} - rho:#{rho}"
|
60
|
-
if (a<=0 and b<=0 and rho<=0)
|
61
|
-
# puts "ruta 1"
|
62
|
-
aprime=a.quo(Math::sqrt(2.0*(1.0-rho**2)))
|
63
|
-
bprime=b.quo(Math::sqrt(2.0*(1.0-rho**2)))
|
64
|
-
aa=[0.3253030, 0.4211071, 0.1334425, 0.006374323]
|
65
|
-
bb=[0.1337764, 0.6243247, 1.3425378, 2.2626645]
|
66
|
-
sum=0
|
67
|
-
4.times do |i|
|
68
|
-
4.times do |j|
|
69
|
-
sum+=aa[i]*aa[j] * f(bb[i], bb[j], aprime, bprime,rho)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
sum=sum*(Math::sqrt(1.0-rho**2).quo(Math::PI))
|
73
|
-
return sum
|
74
|
-
elsif(a*b*rho<=0.0)
|
75
|
-
|
76
|
-
#puts "ruta 2"
|
77
|
-
if(a<=0 and b>=0 and rho>=0)
|
78
|
-
return Distribution::Normal.cdf(a) - cdf(a,-b,-rho)
|
79
|
-
elsif (a>=0.0 and b<=0.0 and rho>=0)
|
80
|
-
return Distribution::Normal.cdf(b) - cdf(-a,b,-rho)
|
81
|
-
elsif (a>=0.0 and b>=0.0 and rho<=0)
|
82
|
-
return Distribution::Normal.cdf(a) + Distribution::Normal.cdf(b) - 1.0 + cdf(-a,-b,rho)
|
83
|
-
end
|
84
|
-
elsif (a*b*rho>=0.0)
|
85
|
-
#puts "ruta 3"
|
86
|
-
denum=Math::sqrt(a**2 - 2*rho*a*b + b**2)
|
87
|
-
rho1=((rho*a-b)*sgn(a)).quo(denum)
|
88
|
-
rho2=((rho*b-a)*sgn(b)).quo(denum)
|
89
|
-
delta=(1.0-sgn(a)*sgn(b)).quo(4)
|
90
|
-
#puts "#{rho1} - #{rho2}"
|
91
|
-
return cdf(a, 0.0, rho1) + cdf(b, 0.0, rho2) - delta
|
92
|
-
end
|
93
|
-
raise "Should'nt be here! #{a} - #{b} #{rho}"
|
94
|
-
end
|
95
|
-
|
96
|
-
# CDF. Iterative method by Jantaravareerat (n/d)
|
97
|
-
#
|
98
|
-
# Reference:
|
99
|
-
# * Jantaravareerat, M. & Thomopoulos, N. (n/d). Tables for standard bivariate normal distribution
|
100
|
-
|
101
|
-
def cdf_jantaravareerat(x,y,rho,s1=1,s2=1)
|
102
|
-
# Special cases
|
103
|
-
return 1 if x>LIMIT and y>LIMIT
|
104
|
-
return 0 if x<-LIMIT or y<-LIMIT
|
105
|
-
return Distribution::Normal.cdf(y) if x>LIMIT
|
106
|
-
return Distribution::Normal.cdf(x) if y>LIMIT
|
107
|
-
|
108
|
-
#puts "x:#{x} - y:#{y}"
|
109
|
-
x=-LIMIT if x<-LIMIT
|
110
|
-
x=LIMIT if x>LIMIT
|
111
|
-
y=-LIMIT if y<-LIMIT
|
112
|
-
y=LIMIT if y>LIMIT
|
113
|
-
|
114
|
-
x_squares=((LIMIT+x) / SIDE).to_i
|
115
|
-
y_squares=((LIMIT+y) / SIDE).to_i
|
116
|
-
sum=0
|
117
|
-
x_squares.times do |i|
|
118
|
-
y_squares.times do |j|
|
119
|
-
z1=-LIMIT+(i+1)*SIDE
|
120
|
-
z2=-LIMIT+(j+1)*SIDE
|
121
|
-
#puts " #{z1}-#{z2}"
|
122
|
-
h=(pdf(z1,z2,rho,s1,s2)+pdf(z1-SIDE,z2,rho,s1,s2)+pdf(z1,z2-SIDE,rho,s1,s2) + pdf(z1-SIDE,z2-SIDE,rho,s1,s2)).quo(4)
|
123
|
-
sum+= (SIDE**2)*h # area
|
124
|
-
end
|
125
|
-
end
|
126
|
-
sum
|
127
|
-
end
|
128
|
-
# Normal cumulative distribution function (cdf) for a given x, y and rho.
|
129
|
-
# Ported from Fortran code by Alan Genz
|
130
|
-
#
|
131
|
-
# Original documentation
|
132
|
-
# DOUBLE PRECISION FUNCTION BVND( DH, DK, R )
|
133
|
-
# A function for computing bivariate normal probabilities.
|
134
|
-
#
|
135
|
-
# Alan Genz
|
136
|
-
# Department of Mathematics
|
137
|
-
# Washington State University
|
138
|
-
# Pullman, WA 99164-3113
|
139
|
-
# Email : alangenz_AT_wsu.edu
|
140
|
-
#
|
141
|
-
# This function is based on the method described by
|
142
|
-
# Drezner, Z and G.O. Wesolowsky, (1989),
|
143
|
-
# On the computation of the bivariate normal integral,
|
144
|
-
# Journal of Statist. Comput. Simul. 35, pp. 101-107,
|
145
|
-
# with major modifications for double precision, and for |R| close to 1.
|
146
|
-
#
|
147
|
-
# Original location:
|
148
|
-
# * http://www.math.wsu.edu/faculty/genz/software/fort77/tvpack.f
|
149
|
-
def cdf_genz(x,y,rho)
|
150
|
-
dh=-x
|
151
|
-
dk=-y
|
152
|
-
r=rho
|
153
|
-
twopi = 6.283185307179586
|
154
|
-
|
155
|
-
w=11.times.collect {[nil]*4};
|
156
|
-
x=11.times.collect {[nil]*4}
|
157
|
-
|
158
|
-
data=[
|
159
|
-
0.1713244923791705E+00, -0.9324695142031522E+00,
|
160
|
-
0.3607615730481384E+00, -0.6612093864662647E+00,
|
161
|
-
0.4679139345726904E+00, -0.2386191860831970E+00]
|
162
|
-
|
163
|
-
(1..3).each {|i|
|
164
|
-
w[i][1]=data[(i-1)*2]
|
165
|
-
x[i][1]=data[(i-1)*2+1]
|
166
|
-
|
167
|
-
}
|
168
|
-
data=[
|
169
|
-
0.4717533638651177E-01,-0.9815606342467191E+00,
|
170
|
-
0.1069393259953183E+00,-0.9041172563704750E+00,
|
171
|
-
0.1600783285433464E+00,-0.7699026741943050E+00,
|
172
|
-
0.2031674267230659E+00,-0.5873179542866171E+00,
|
173
|
-
0.2334925365383547E+00,-0.3678314989981802E+00,
|
174
|
-
0.2491470458134029E+00,-0.1252334085114692E+00]
|
175
|
-
(1..6).each {|i|
|
176
|
-
w[i][2]=data[(i-1)*2]
|
177
|
-
x[i][2]=data[(i-1)*2+1]
|
178
|
-
|
179
|
-
|
180
|
-
}
|
181
|
-
data=[
|
182
|
-
0.1761400713915212E-01,-0.9931285991850949E+00,
|
183
|
-
0.4060142980038694E-01,-0.9639719272779138E+00,
|
184
|
-
0.6267204833410906E-01,-0.9122344282513259E+00,
|
185
|
-
0.8327674157670475E-01,-0.8391169718222188E+00,
|
186
|
-
0.1019301198172404E+00,-0.7463319064601508E+00,
|
187
|
-
0.1181945319615184E+00,-0.6360536807265150E+00,
|
188
|
-
0.1316886384491766E+00,-0.5108670019508271E+00,
|
189
|
-
0.1420961093183821E+00,-0.3737060887154196E+00,
|
190
|
-
0.1491729864726037E+00,-0.2277858511416451E+00,
|
191
|
-
0.1527533871307259E+00,-0.7652652113349733E-01]
|
192
|
-
|
193
|
-
(1..10).each {|i|
|
194
|
-
w[i][3]=data[(i-1)*2]
|
195
|
-
x[i][3]=data[(i-1)*2+1]
|
196
|
-
|
197
|
-
|
198
|
-
}
|
199
|
-
|
200
|
-
|
201
|
-
if ( r.abs < 0.3 )
|
202
|
-
ng = 1
|
203
|
-
lg = 3
|
204
|
-
elsif ( r.abs < 0.75 )
|
205
|
-
ng = 2
|
206
|
-
lg = 6
|
207
|
-
else
|
208
|
-
ng = 3
|
209
|
-
lg = 10
|
210
|
-
end
|
211
|
-
|
212
|
-
|
213
|
-
h = dh
|
214
|
-
k = dk
|
215
|
-
hk = h*k
|
216
|
-
bvn = 0
|
217
|
-
if ( r.abs < 0.925 )
|
218
|
-
if ( r.abs > 0 )
|
219
|
-
hs = ( h*h + k*k ).quo(2)
|
220
|
-
asr = Math::asin(r)
|
221
|
-
(1..lg).each do |i|
|
222
|
-
[-1,1].each do |is|
|
223
|
-
sn = Math::sin(asr*(is* x[i][ng]+1).quo(2) )
|
224
|
-
bvn = bvn + w[i][ng] * Math::exp( ( sn*hk-hs ).quo( 1-sn*sn ) )
|
225
|
-
end # do
|
226
|
-
end # do
|
227
|
-
bvn = bvn*asr.quo( 2*twopi )
|
228
|
-
end # if
|
229
|
-
bvn = bvn + Distribution::Normal.cdf(-h) * Distribution::Normal.cdf(-k)
|
230
|
-
|
231
|
-
|
232
|
-
else # r.abs
|
233
|
-
if ( r < 0 )
|
234
|
-
k = -k
|
235
|
-
hk = -hk
|
236
|
-
end
|
237
|
-
|
238
|
-
if ( r.abs < 1 )
|
239
|
-
as = ( 1 - r )*( 1 + r )
|
240
|
-
a = Math::sqrt(as)
|
241
|
-
bs = ( h - k )**2
|
242
|
-
c = ( 4 - hk ).quo(8)
|
243
|
-
d = ( 12 - hk ).quo(16)
|
244
|
-
asr = -( bs.quo(as) + hk ).quo(2)
|
245
|
-
if ( asr > -100 )
|
246
|
-
bvn = a*Math::exp(asr) * ( 1 - c*( bs - as )*( 1 - d*bs.quo(5) ).quo(3) + c*d*as*as.quo(5) )
|
247
|
-
end
|
248
|
-
if ( -hk < 100 )
|
249
|
-
b = Math::sqrt(bs)
|
250
|
-
bvn = bvn - Math::exp( -hk.quo(2) ) * Math::sqrt(twopi)*Distribution::Normal.cdf(-b.quo(a))*b *
|
251
|
-
( 1 - c*bs*( 1 - d*bs.quo(5) ).quo(3) )
|
252
|
-
end
|
253
|
-
|
254
|
-
|
255
|
-
a = a.quo(2)
|
256
|
-
(1..lg).each do |i|
|
257
|
-
[-1,1].each do |is|
|
258
|
-
xs = (a*( is*x[i][ng] + 1 ) )**2
|
259
|
-
rs = Math::sqrt( 1 - xs )
|
260
|
-
asr = -( bs/xs + hk ).quo(2)
|
261
|
-
if ( asr > -100 )
|
262
|
-
bvn = bvn + a*w[i][ng] * Math::exp( asr ) *
|
263
|
-
( Math::exp( -hk*( 1 - rs ).quo(2*( 1 + rs ) ) ) .quo(rs) - ( 1 + c*xs*( 1 + d*xs ) ) )
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
bvn = -bvn/twopi
|
268
|
-
end
|
269
|
-
|
270
|
-
if ( r > 0 )
|
271
|
-
bvn = bvn + Distribution::Normal.cdf(-[h,k].max)
|
272
|
-
else
|
273
|
-
bvn = -bvn
|
274
|
-
if ( k > h )
|
275
|
-
bvn = bvn + Distribution::Normal.cdf(k) - Distribution::Normal.cdf(h)
|
276
|
-
end
|
277
|
-
end
|
278
|
-
end
|
279
|
-
bvn
|
280
|
-
end
|
281
|
-
private :f, :sgn
|
282
|
-
end
|
283
|
-
end
|
284
|
-
end
|
@@ -1,73 +0,0 @@
|
|
1
|
-
module Distribution
|
2
|
-
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
3
|
-
module NormalMultivariate
|
4
|
-
class << self
|
5
|
-
# Returns multivariate cdf distribution
|
6
|
-
# * a is the array of lower values
|
7
|
-
# * b is the array of higher values
|
8
|
-
# * s is an symmetric positive definite covariance matrix
|
9
|
-
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
10
|
-
raise "Doesn't work yet"
|
11
|
-
a=[nil]+aa
|
12
|
-
b=[nil]+bb
|
13
|
-
m=aa.size
|
14
|
-
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
15
|
-
|
16
|
-
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
17
|
-
c=cc.lower
|
18
|
-
intsum=0
|
19
|
-
varsum=0
|
20
|
-
n=0
|
21
|
-
d=Array.new(m+1,nil)
|
22
|
-
e=Array.new(m+1,nil)
|
23
|
-
f=Array.new(m+1,nil)
|
24
|
-
(1..m).each {|i|
|
25
|
-
d[i]=0.0 if a[i].nil?
|
26
|
-
e[i]=1.0 if b[i].nil?
|
27
|
-
}
|
28
|
-
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
29
|
-
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
30
|
-
f[1]=e[1]-d[1]
|
31
|
-
|
32
|
-
error=1000
|
33
|
-
begin
|
34
|
-
w=(m+1).times.collect {|i| rand*epsilon}
|
35
|
-
y=[]
|
36
|
-
(2..m).each do |i|
|
37
|
-
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
38
|
-
sumc=0
|
39
|
-
(1..(i-1)).each do |j|
|
40
|
-
sumc+=c[i-1, j-1]*y[j]
|
41
|
-
end
|
42
|
-
|
43
|
-
if a[i]!=nil
|
44
|
-
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
45
|
-
end
|
46
|
-
# puts "sumc:#{sumc}"
|
47
|
-
|
48
|
-
if b[i]!=nil
|
49
|
-
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
50
|
-
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
51
|
-
end
|
52
|
-
f[i]=(e[i]-d[i])*f[i-1]
|
53
|
-
end
|
54
|
-
intsum+=intsum+f[m]
|
55
|
-
varsum=varsum+f[m]**2
|
56
|
-
n+=1
|
57
|
-
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
58
|
-
end while(error>epsilon and n<max_iterations)
|
59
|
-
|
60
|
-
f=intsum.quo(n)
|
61
|
-
#p intsum
|
62
|
-
#puts "f:#{f}, n:#{n}, error:#{error}"
|
63
|
-
f
|
64
|
-
end
|
65
|
-
def iPhi(pr)
|
66
|
-
Distribution::Normal.p_value(pr)
|
67
|
-
end
|
68
|
-
def uPhi(x)
|
69
|
-
Distribution::Normal.cdf(x)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
end
|
73
|
-
end
|
data/lib/distribution/t.rb
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
require 'rbconfig'
|
2
|
-
module Distribution
|
3
|
-
|
4
|
-
# Calculate cdf and inverse cdf for T Distribution.
|
5
|
-
# Uses Statistics2 Module.
|
6
|
-
module T
|
7
|
-
class << self
|
8
|
-
# Return the P-value of the corresponding integral with
|
9
|
-
# k degrees of freedom
|
10
|
-
def p_value(pr,k)
|
11
|
-
Statistics2.ptdist(k, pr)
|
12
|
-
end
|
13
|
-
# T cumulative distribution function (cdf).
|
14
|
-
#
|
15
|
-
# Returns the integral of t-distribution
|
16
|
-
# with n degrees of freedom over (-Infty, x].
|
17
|
-
#
|
18
|
-
def cdf(x,k)
|
19
|
-
if RbConfig::CONFIG['arch']=~/i686/
|
20
|
-
tdist(k, x)
|
21
|
-
else
|
22
|
-
Statistics2.tdist(k,x)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
27
|
-
def tdist(n, t)
|
28
|
-
p_t(n, t)
|
29
|
-
end
|
30
|
-
|
31
|
-
# t-distribution ([1])
|
32
|
-
# (-\infty, x]
|
33
|
-
def p_t(df, t)
|
34
|
-
c2 = df.to_f / (df + t * t);
|
35
|
-
s = Math.sqrt(1.0 - c2)
|
36
|
-
s = -s if t < 0.0
|
37
|
-
p = 0.0;
|
38
|
-
i = df % 2 + 2
|
39
|
-
while i <= df
|
40
|
-
p += s
|
41
|
-
s *= (i - 1) * c2 / i
|
42
|
-
i += 2
|
43
|
-
end
|
44
|
-
if df.is_a? Float or df & 1 != 0
|
45
|
-
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
|
46
|
-
else
|
47
|
-
(1.0 + p) / 2.0
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
data/test/test_distribution.rb
DELETED
@@ -1,73 +0,0 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
3
|
-
require 'distribution'
|
4
|
-
|
5
|
-
|
6
|
-
class DistributionTestCase < MiniTest::Unit::TestCase
|
7
|
-
def test_chi
|
8
|
-
if Distribution.has_gsl?
|
9
|
-
[2,3,4,5].each{|k|
|
10
|
-
chis=rand()*10
|
11
|
-
area=Distribution::ChiSquare.cdf(chis, k)
|
12
|
-
assert_in_delta(area, GSL::Cdf.chisq_P(chis,k),0.0001)
|
13
|
-
assert_in_delta(chis, Distribution::ChiSquare.p_value(area,k),0.0001,"Error on prob #{area} and k #{k}")
|
14
|
-
}
|
15
|
-
end
|
16
|
-
end
|
17
|
-
def test_t
|
18
|
-
if Distribution.has_gsl?
|
19
|
-
[-2,0.1,0.5,1,2].each{|t|
|
20
|
-
[2,5,10].each{|n|
|
21
|
-
area=Distribution::T.cdf(t,n)
|
22
|
-
assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
|
23
|
-
assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
|
24
|
-
|
25
|
-
}
|
26
|
-
}
|
27
|
-
end
|
28
|
-
end
|
29
|
-
def test_normal
|
30
|
-
if Distribution.has_gsl?
|
31
|
-
[-2,0.1,0.5,1,2].each{|x|
|
32
|
-
area=Distribution::Normal.cdf(x)
|
33
|
-
assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
|
34
|
-
assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
|
35
|
-
assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
|
36
|
-
}
|
37
|
-
end
|
38
|
-
end
|
39
|
-
def test_normal_bivariate
|
40
|
-
if Distribution.has_gsl?
|
41
|
-
[0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
|
42
|
-
assert_in_delta(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1),1e-8)
|
43
|
-
|
44
|
-
}
|
45
|
-
end
|
46
|
-
|
47
|
-
[-3,-2,-1,0,1,1.5].each {|x|
|
48
|
-
assert_in_delta(Distribution::NormalBivariate.cdf_hull(x,x,0.5), Distribution::NormalBivariate.cdf_genz(x,x,0.5), 0.001)
|
49
|
-
#assert_in_delta(Distribution::NormalBivariate.cdf_genz(x,x,0.5), Distribution::NormalBivariate.cdf_jantaravareerat(x,x,0.5), 0.001)
|
50
|
-
}
|
51
|
-
|
52
|
-
assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
|
53
|
-
assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
|
54
|
-
assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
|
55
|
-
|
56
|
-
assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
|
57
|
-
end
|
58
|
-
def test_f
|
59
|
-
if Distribution.has_gsl?
|
60
|
-
[0.1,0.5,1,2,10,20,30].each{|f|
|
61
|
-
[2,5,10].each{|n2|
|
62
|
-
[2,5,10].each{|n1|
|
63
|
-
area=Distribution::F.cdf(f,n1,n2)
|
64
|
-
assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
|
65
|
-
assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
|
66
|
-
|
67
|
-
}
|
68
|
-
}
|
69
|
-
}
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|