statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
@@ -5,7 +5,7 @@ module Statsample
|
|
5
5
|
tc=Tetrachoric.new_with_vectors(v1,v2)
|
6
6
|
tc.r
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
# Tetrachoric correlation matrix.
|
10
10
|
# Order of rows and columns depends on Dataset#fields order
|
11
11
|
def self.tetrachoric_correlation_matrix(ds)
|
@@ -21,20 +21,20 @@ module Statsample
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
|
-
#
|
25
24
|
# Compute tetrachoric correlation.
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
25
|
+
#
|
26
|
+
# The <em>tetrachoric</em> correlation is a measure of
|
27
|
+
# bivariate association arising when both observed variates
|
28
|
+
# are categorical variables that result from dichotomizing
|
29
|
+
# the two undelying continuous variables (Drasgow, 2006).
|
30
|
+
# The tetrachoric correlation is a good way to measure rater agreement (Uebersax, 2006)
|
29
31
|
#
|
30
32
|
# This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
31
|
-
#
|
32
|
-
# == References:
|
33
|
-
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
34
33
|
#
|
35
|
-
#
|
34
|
+
#
|
35
|
+
# == Usage
|
36
36
|
# With two variables x and y on a crosstab like this:
|
37
|
-
#
|
37
|
+
#
|
38
38
|
# -------------
|
39
39
|
# | y=0 | y=1 |
|
40
40
|
# -------------
|
@@ -43,20 +43,25 @@ module Statsample
|
|
43
43
|
# x = 1 | c | d |
|
44
44
|
# -------------
|
45
45
|
#
|
46
|
-
#
|
46
|
+
# The code will be
|
47
47
|
# tc=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
48
48
|
# tc.r # correlation
|
49
49
|
# tc.se # standard error
|
50
50
|
# tc.threshold_y # threshold for y variable
|
51
51
|
# tc.threshold_x # threshold for x variable
|
52
|
-
|
53
|
-
|
52
|
+
#
|
53
|
+
# == References:
|
54
|
+
#
|
55
|
+
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. <em>Applied Statistics, 26</em>, 343-351.
|
56
|
+
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
57
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
58
|
+
|
54
59
|
class Tetrachoric
|
55
60
|
include GetText
|
56
61
|
bindtextdomain("statsample")
|
57
62
|
attr_reader :r
|
58
63
|
attr_accessor :name
|
59
|
-
|
64
|
+
|
60
65
|
TWOPI=Math::PI*2
|
61
66
|
SQT2PI= 2.50662827
|
62
67
|
RLIMIT = 0.9999
|
@@ -113,7 +118,7 @@ module Statsample
|
|
113
118
|
rp.add(self)
|
114
119
|
rp.to_text
|
115
120
|
end
|
116
|
-
|
121
|
+
|
117
122
|
def to_reportbuilder(generator) # :nodoc:
|
118
123
|
section=ReportBuilder::Section.new(:name=>@name)
|
119
124
|
t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
|
@@ -129,211 +134,211 @@ module Statsample
|
|
129
134
|
section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
|
130
135
|
generator.parse_element(section)
|
131
136
|
end
|
132
|
-
|
133
|
-
# Creates a new tetrachoric object for analysis
|
137
|
+
|
138
|
+
# Creates a new tetrachoric object for analysis
|
134
139
|
def initialize(a,b,c,d)
|
135
140
|
@a,@b,@c,@d=a,b,c,d
|
136
141
|
@name=_("Tetrachoric correlation")
|
137
142
|
#
|
138
143
|
# CHECK IF ANY CELL FREQUENCY IS NEGATIVE
|
139
144
|
#
|
140
|
-
raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
|
145
|
+
raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
|
141
146
|
compute
|
142
147
|
end
|
143
148
|
# Compute the tetrachoric correlation.
|
144
149
|
# Called on object creation.
|
145
150
|
#
|
146
151
|
def compute
|
147
|
-
|
148
|
-
#
|
149
|
-
# INITIALIZATION
|
150
|
-
#
|
151
|
-
@r = 0
|
152
|
-
sdzero = 0
|
153
|
-
@sdr = 0
|
154
|
-
@itype = 0
|
155
|
-
@ifault = 0
|
156
|
-
|
157
|
-
#
|
158
|
-
# CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
|
159
|
-
#
|
160
|
-
@kdelta = 1
|
161
|
-
delta = 0
|
162
|
-
@kdelta = 2 if (@a == 0 or @d == 0)
|
163
|
-
@kdelta += 2 if (@b == 0 or @c == 0)
|
164
|
-
#
|
165
|
-
# kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
|
166
|
-
#
|
167
152
|
|
168
|
-
|
153
|
+
#
|
154
|
+
# INITIALIZATION
|
155
|
+
#
|
156
|
+
@r = 0
|
157
|
+
sdzero = 0
|
158
|
+
@sdr = 0
|
159
|
+
@itype = 0
|
160
|
+
@ifault = 0
|
169
161
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
# 2
|
181
|
-
delta=-0.5
|
182
|
-
@r=1 if (@b==0 and @c==0)
|
183
|
-
end
|
184
|
-
# 4
|
185
|
-
if @r!=0
|
186
|
-
@itype=3
|
187
|
-
end
|
162
|
+
#
|
163
|
+
# CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
|
164
|
+
#
|
165
|
+
@kdelta = 1
|
166
|
+
delta = 0
|
167
|
+
@kdelta = 2 if (@a == 0 or @d == 0)
|
168
|
+
@kdelta += 2 if (@b == 0 or @c == 0)
|
169
|
+
#
|
170
|
+
# kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
|
171
|
+
#
|
188
172
|
|
189
|
-
|
190
|
-
# STORE FREQUENCIES IN AA, BB, CC AND DD
|
191
|
-
#
|
192
|
-
@aa = @a + delta
|
193
|
-
@bb = @b - delta
|
194
|
-
@cc = @c - delta
|
195
|
-
@dd = @d + delta
|
196
|
-
@tot = @aa+@bb+@cc+@dd
|
197
|
-
#
|
198
|
-
# CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
|
199
|
-
# IF (AA * DD - BB * CC) 7, 5, 6
|
173
|
+
raise "Rows and columns should have more than 0 items" if @kdelta==4
|
200
174
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
@
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
175
|
+
# GOTO (4, 1, 2 , 92), kdelta
|
176
|
+
#
|
177
|
+
# delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
|
178
|
+
#
|
179
|
+
|
180
|
+
if(@kdelta==2)
|
181
|
+
# 1
|
182
|
+
delta=0.5
|
183
|
+
@r=-1 if (@a==0 and @d==0)
|
184
|
+
elsif(@kdelta==3)
|
185
|
+
# 2
|
186
|
+
delta=-0.5
|
187
|
+
@r=1 if (@b==0 and @c==0)
|
188
|
+
end
|
189
|
+
# 4
|
190
|
+
if @r!=0
|
191
|
+
@itype=3
|
212
192
|
end
|
213
|
-
|
193
|
+
|
214
194
|
#
|
215
|
-
#
|
216
|
-
# PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
|
217
|
-
# KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
|
195
|
+
# STORE FREQUENCIES IN AA, BB, CC AND DD
|
218
196
|
#
|
219
|
-
|
220
|
-
@
|
221
|
-
@
|
222
|
-
@
|
223
|
-
|
224
|
-
# 8
|
225
|
-
|
226
|
-
@probab = (@aa+@bb).quo(@tot)
|
227
|
-
|
228
|
-
#
|
229
|
-
# COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
|
230
|
-
# SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
|
231
|
-
#
|
232
|
-
@zac = Distribution::Normal.p_value(@probac)
|
233
|
-
@zab = Distribution::Normal.p_value(@probab)
|
234
|
-
@ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
|
235
|
-
#
|
236
|
-
# WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
|
237
|
-
#
|
238
|
-
if (@r != 0 or @itype > 0)
|
239
|
-
compute_sdzero
|
240
|
-
return true
|
241
|
-
end
|
242
|
-
#
|
243
|
-
# WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
|
244
|
-
#
|
245
|
-
if (@a == @b and @b == @c)
|
246
|
-
calculate_cosine
|
247
|
-
return true
|
248
|
-
end
|
249
|
-
#
|
250
|
-
# INITIAL ESTIMATE OF CORRELATION IS YULES Y
|
251
|
-
#
|
252
|
-
@rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
|
253
|
-
@iter = 0
|
254
|
-
begin
|
197
|
+
@aa = @a + delta
|
198
|
+
@bb = @b - delta
|
199
|
+
@cc = @c - delta
|
200
|
+
@dd = @d + delta
|
201
|
+
@tot = @aa+@bb+@cc+@dd
|
255
202
|
#
|
256
|
-
# IF
|
203
|
+
# CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
|
204
|
+
# IF (AA * DD - BB * CC) 7, 5, 6
|
205
|
+
|
206
|
+
corr_dir=@aa * @dd - @bb * @cc
|
207
|
+
if(corr_dir < 0)
|
208
|
+
# 7
|
209
|
+
@probaa = @bb.quo(@tot)
|
210
|
+
@probac = (@bb + @dd).quo(@tot)
|
211
|
+
@ksign = 2
|
212
|
+
# -> 8
|
213
|
+
else
|
214
|
+
if (corr_dir==0)
|
215
|
+
# 5
|
216
|
+
@itype=4
|
217
|
+
end
|
218
|
+
# 6
|
219
|
+
#
|
220
|
+
# COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
|
221
|
+
# PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
|
222
|
+
# KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
|
223
|
+
#
|
224
|
+
|
225
|
+
@probaa = @aa.quo(@tot)
|
226
|
+
@probac = (@aa+@cc).quo(@tot)
|
227
|
+
@ksign=1
|
228
|
+
end
|
229
|
+
# 8
|
230
|
+
|
231
|
+
@probab = (@aa+@bb).quo(@tot)
|
232
|
+
|
233
|
+
#
|
234
|
+
# COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
|
235
|
+
# SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
|
257
236
|
#
|
258
|
-
|
259
|
-
|
260
|
-
|
237
|
+
@zac = Distribution::Normal.p_value(@probac)
|
238
|
+
@zab = Distribution::Normal.p_value(@probab)
|
239
|
+
@ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
|
240
|
+
#
|
241
|
+
# WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
|
242
|
+
#
|
243
|
+
if (@r != 0 or @itype > 0)
|
244
|
+
compute_sdzero
|
261
245
|
return true
|
262
246
|
end
|
263
247
|
#
|
264
|
-
#
|
248
|
+
# WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
|
249
|
+
#
|
250
|
+
if (@a == @b and @b == @c)
|
251
|
+
calculate_cosine
|
252
|
+
return true
|
253
|
+
end
|
265
254
|
#
|
266
|
-
#
|
255
|
+
# INITIAL ESTIMATE OF CORRELATION IS YULES Y
|
267
256
|
#
|
268
|
-
|
269
|
-
|
270
|
-
wa=1.0
|
271
|
-
wb=@zab.to_f
|
272
|
-
term = 1.0
|
273
|
-
iterm = 0.0
|
274
|
-
@sum = @probab * @probac
|
275
|
-
deriv = 0.0
|
276
|
-
sr = @ss
|
277
|
-
#15
|
257
|
+
@rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
|
258
|
+
@iter = 0
|
278
259
|
begin
|
279
|
-
|
260
|
+
#
|
261
|
+
# IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
|
262
|
+
#
|
263
|
+
#10
|
264
|
+
if @rr>RCUT
|
265
|
+
gaussian_quadrature
|
266
|
+
return true
|
267
|
+
end
|
268
|
+
#
|
269
|
+
# TETRACHORIC SERIES IS COMPUTED
|
270
|
+
#
|
271
|
+
# INITIALIZATION
|
272
|
+
#
|
273
|
+
va=1.0
|
274
|
+
vb=@zac.to_f
|
275
|
+
wa=1.0
|
276
|
+
wb=@zab.to_f
|
277
|
+
term = 1.0
|
278
|
+
iterm = 0.0
|
279
|
+
@sum = @probab * @probac
|
280
|
+
deriv = 0.0
|
281
|
+
sr = @ss
|
282
|
+
#15
|
283
|
+
begin
|
284
|
+
if(sr.abs<=CONST)
|
285
|
+
#
|
286
|
+
# RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
|
287
|
+
#
|
288
|
+
sr = sr / CONST
|
289
|
+
va = va * CHALF
|
290
|
+
vb = vb * CHALF
|
291
|
+
wa = wa * CHALF
|
292
|
+
wb = wb * CHALF
|
293
|
+
end
|
294
|
+
#
|
295
|
+
# FORM SUM AND DERIVATIVE OF SERIES
|
296
|
+
#
|
297
|
+
# 20
|
298
|
+
dr = sr * va * wa
|
299
|
+
sr = sr * @rr / term
|
300
|
+
cof = sr * va * wa
|
280
301
|
#
|
281
|
-
#
|
302
|
+
# ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
|
282
303
|
#
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
304
|
+
iterm+= 1
|
305
|
+
iterm=0 if (cof.abs > CONV)
|
306
|
+
@sum = @sum + cof
|
307
|
+
deriv += dr
|
308
|
+
vaa = va
|
309
|
+
waa = wa
|
310
|
+
va = vb
|
311
|
+
wa = wb
|
312
|
+
vb = @zac * va - term * vaa
|
313
|
+
wb = @zab * wa - term * waa
|
314
|
+
term += 1
|
315
|
+
end while (iterm < 2 or term < 6)
|
316
|
+
#
|
317
|
+
# CHECK IF ITERATION CONVERGED
|
318
|
+
#
|
319
|
+
if((@sum-@probaa).abs <= CITER)
|
320
|
+
@itype=term
|
321
|
+
calculate_sdr
|
322
|
+
return true
|
288
323
|
end
|
289
324
|
#
|
290
|
-
#
|
325
|
+
# CALCULATE NEXT ESTIMATE OF CORRELATION
|
291
326
|
#
|
292
|
-
#
|
293
|
-
|
294
|
-
sr = sr * @rr / term
|
295
|
-
cof = sr * va * wa
|
327
|
+
#25
|
328
|
+
@iter += 1
|
296
329
|
#
|
297
|
-
#
|
330
|
+
# IF TOO MANY ITERATlONS, RUN IS TERMINATED
|
298
331
|
#
|
299
|
-
|
300
|
-
|
301
|
-
@
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
term += 1
|
310
|
-
end while (iterm < 2 or term < 6)
|
311
|
-
#
|
312
|
-
# CHECK IF ITERATION CONVERGED
|
313
|
-
#
|
314
|
-
if((@sum-@probaa).abs <= CITER)
|
315
|
-
@itype=term
|
316
|
-
calculate_sdr
|
317
|
-
return true
|
318
|
-
end
|
319
|
-
#
|
320
|
-
# CALCULATE NEXT ESTIMATE OF CORRELATION
|
321
|
-
#
|
322
|
-
#25
|
323
|
-
@iter += 1
|
324
|
-
#
|
325
|
-
# IF TOO MANY ITERATlONS, RUN IS TERMINATED
|
326
|
-
#
|
327
|
-
delta = (@sum - @probaa) / deriv
|
328
|
-
@rrprev = @rr
|
329
|
-
@rr = @rr - delta
|
330
|
-
@rr += 0.5 * delta if(@iter == 1)
|
331
|
-
@rr= RLIMIT if (@rr > RLIMIT)
|
332
|
-
@rr =0 if (@rr < 0.0)
|
333
|
-
end while @iter < NITER
|
334
|
-
raise "Too many iteration"
|
335
|
-
# GOTO 10
|
336
|
-
end
|
332
|
+
delta = (@sum - @probaa) / deriv
|
333
|
+
@rrprev = @rr
|
334
|
+
@rr = @rr - delta
|
335
|
+
@rr += 0.5 * delta if(@iter == 1)
|
336
|
+
@rr= RLIMIT if (@rr > RLIMIT)
|
337
|
+
@rr =0 if (@rr < 0.0)
|
338
|
+
end while @iter < NITER
|
339
|
+
raise "Too many iteration"
|
340
|
+
# GOTO 10
|
341
|
+
end
|
337
342
|
# GAUSSIAN QUADRATURE
|
338
343
|
# 40
|
339
344
|
def gaussian_quadrature
|
@@ -342,8 +347,8 @@ module Statsample
|
|
342
347
|
@sum=@probab*@probac
|
343
348
|
@rrprev=0
|
344
349
|
end
|
345
|
-
|
346
|
-
# 41
|
350
|
+
|
351
|
+
# 41
|
347
352
|
sumprv = @probab - @sum
|
348
353
|
@prob = @bb.quo(@tot)
|
349
354
|
@prob = @aa.quo(@tot) if (@ksign == 2)
|
@@ -353,7 +358,7 @@ module Statsample
|
|
353
358
|
# COMPUTATION OF INTEGRAL (SUM) BY QUADRATURE
|
354
359
|
#
|
355
360
|
# 42
|
356
|
-
|
361
|
+
|
357
362
|
begin
|
358
363
|
rrsq = Math::sqrt(1 - @rr ** 2)
|
359
364
|
amid = 0.5 * (UPLIM + @zac)
|
@@ -362,8 +367,8 @@ module Statsample
|
|
362
367
|
(1..16).each do |iquad|
|
363
368
|
xla = amid + X[iquad] * xlen
|
364
369
|
xlb = amid - X[iquad] * xlen
|
365
|
-
|
366
|
-
|
370
|
+
|
371
|
+
|
367
372
|
#
|
368
373
|
# TO AVOID UNDERFLOWS, TEMPA AND TEMPB ARE USED
|
369
374
|
#
|
@@ -372,7 +377,7 @@ module Statsample
|
|
372
377
|
@sum = @sum + W[iquad] * Math::exp(-0.5 * xla ** 2) * Distribution::Normal.cdf(tempa)
|
373
378
|
end
|
374
379
|
tempb = (@zab - @rr * xlb) / rrsq
|
375
|
-
|
380
|
+
|
376
381
|
if (tempb >= -6.0)
|
377
382
|
@sum = @sum + W[iquad] * Math::exp(-0.5 * xlb ** 2) * Distribution::Normal.cdf(tempb)
|
378
383
|
end
|
@@ -380,29 +385,29 @@ module Statsample
|
|
380
385
|
@sum=@sum*xlen / SQT2PI
|
381
386
|
#
|
382
387
|
# CHECK IF ITERATION HAS CONVERGED
|
383
|
-
#
|
384
|
-
if ((@prob - @sum).abs <= CITER)
|
388
|
+
#
|
389
|
+
if ((@prob - @sum).abs <= CITER)
|
385
390
|
calculate_sdr
|
386
391
|
return true
|
387
392
|
end
|
388
393
|
# ESTIMATE CORRELATION FOR NEXT ITERATION BY LINEAR INTERPOLATION
|
389
|
-
|
394
|
+
|
390
395
|
rrest = ((@prob - @sum) * @rrprev - (@prob - sumprv) * @rr) / (sumprv - @sum)
|
391
|
-
rrest = RLIMIT if (rrest > RLIMIT)
|
392
|
-
rrest = 0 if (rrest < 0)
|
396
|
+
rrest = RLIMIT if (rrest > RLIMIT)
|
397
|
+
rrest = 0 if (rrest < 0)
|
393
398
|
@rrprev = @rr
|
394
399
|
@rr = rrest
|
395
400
|
sumprv = @sum
|
396
401
|
#
|
397
402
|
# if estimate has same value on two iterations, stop iteration
|
398
403
|
#
|
399
|
-
if @rr == @rrprev
|
404
|
+
if @rr == @rrprev
|
400
405
|
calculate_sdr
|
401
406
|
return true
|
402
407
|
end
|
403
|
-
|
404
|
-
|
405
|
-
end while @iter < NITER
|
408
|
+
|
409
|
+
|
410
|
+
end while @iter < NITER
|
406
411
|
raise "Too many iterations"
|
407
412
|
# ir a 42
|
408
413
|
end
|
@@ -414,16 +419,16 @@ module Statsample
|
|
414
419
|
@itype = 2
|
415
420
|
calculate_sdr
|
416
421
|
end
|
417
|
-
|
418
|
-
|
422
|
+
|
423
|
+
|
419
424
|
def calculate_sdr # :nodoc:
|
420
425
|
#
|
421
426
|
# COMPUTE SDR
|
422
427
|
#
|
423
428
|
@r = @rr
|
424
|
-
rrsq = Math::sqrt(1.0 - @r ** 2)
|
429
|
+
rrsq = Math::sqrt(1.0 - @r ** 2)
|
425
430
|
@itype = -@itype if (@kdelta > 1)
|
426
|
-
if (@ksign != 1)
|
431
|
+
if (@ksign != 1)
|
427
432
|
@r = -@r
|
428
433
|
@zac = -@zac
|
429
434
|
end
|
@@ -431,13 +436,13 @@ module Statsample
|
|
431
436
|
pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
|
432
437
|
@pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
|
433
438
|
@pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
|
434
|
-
|
439
|
+
|
435
440
|
@sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
|
436
441
|
@sdr=0 if (@sdr<0)
|
437
442
|
@sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
|
438
443
|
compute_sdzero
|
439
444
|
end
|
440
|
-
|
445
|
+
|
441
446
|
# 85
|
442
447
|
#
|
443
448
|
# COMPUTE SDZERO
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
require 'statsample/bivariate/tetrachoric'
|
2
2
|
require 'statsample/bivariate/polychoric'
|
3
3
|
module Statsample
|
4
|
-
# Diverse
|
4
|
+
# Diverse bivariate methods, including #covariance, #pearson correlation (r), #spearman ranked correlation (rho), #tetrachoric correlation and #polychoric correlation.
|
5
5
|
module Bivariate
|
6
6
|
class << self
|
7
7
|
# Covariance between two vectors
|
8
8
|
def covariance(v1,v2)
|
9
9
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
10
10
|
return nil if v1a.size==0
|
11
|
-
if
|
11
|
+
if Statsample.has_gsl?
|
12
12
|
GSL::Stats::covariance(v1a.gsl, v2a.gsl)
|
13
13
|
else
|
14
14
|
covariance_slow(v1a,v2a)
|
@@ -31,11 +31,12 @@ module Statsample
|
|
31
31
|
(0...v1a.size).each {|i| t+=((v1a[i]-m1)*(v2a[i]-m2)) }
|
32
32
|
t.to_f / (v1a.size-1)
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
|
+
# Calculate Pearson correlation coefficient (r) between 2 vectors
|
35
36
|
def pearson(v1,v2)
|
36
37
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
37
38
|
return nil if v1a.size ==0
|
38
|
-
if
|
39
|
+
if Statsample.has_gsl?
|
39
40
|
GSL::Stats::correlation(v1a.gsl, v2a.gsl)
|
40
41
|
else
|
41
42
|
pearson_slow(v1a,v2a)
|
@@ -177,7 +178,7 @@ module Statsample
|
|
177
178
|
Matrix.rows(rows)
|
178
179
|
end
|
179
180
|
|
180
|
-
# Spearman ranked correlation coefficient between 2 vectors
|
181
|
+
# Spearman ranked correlation coefficient (rho) between 2 vectors
|
181
182
|
def spearman(v1,v2)
|
182
183
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
183
184
|
v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
|
@@ -195,7 +196,6 @@ module Statsample
|
|
195
196
|
((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
|
196
197
|
end
|
197
198
|
# Kendall Rank Correlation Coefficient.
|
198
|
-
#
|
199
199
|
# Based on Hervé Adbi article
|
200
200
|
def tau_a(v1,v2)
|
201
201
|
v1a,v2a=Statsample.only_valid(v1,v2)
|