statsample 0.6.5 → 0.6.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
@@ -5,7 +5,7 @@ module Statsample
|
|
5
5
|
tc=Tetrachoric.new_with_vectors(v1,v2)
|
6
6
|
tc.r
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
# Tetrachoric correlation matrix.
|
10
10
|
# Order of rows and columns depends on Dataset#fields order
|
11
11
|
def self.tetrachoric_correlation_matrix(ds)
|
@@ -21,20 +21,20 @@ module Statsample
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
end
|
24
|
-
#
|
25
24
|
# Compute tetrachoric correlation.
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
25
|
+
#
|
26
|
+
# The <em>tetrachoric</em> correlation is a measure of
|
27
|
+
# bivariate association arising when both observed variates
|
28
|
+
# are categorical variables that result from dichotomizing
|
29
|
+
# the two undelying continuous variables (Drasgow, 2006).
|
30
|
+
# The tetrachoric correlation is a good way to measure rater agreement (Uebersax, 2006)
|
29
31
|
#
|
30
32
|
# This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
31
|
-
#
|
32
|
-
# == References:
|
33
|
-
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
34
33
|
#
|
35
|
-
#
|
34
|
+
#
|
35
|
+
# == Usage
|
36
36
|
# With two variables x and y on a crosstab like this:
|
37
|
-
#
|
37
|
+
#
|
38
38
|
# -------------
|
39
39
|
# | y=0 | y=1 |
|
40
40
|
# -------------
|
@@ -43,20 +43,25 @@ module Statsample
|
|
43
43
|
# x = 1 | c | d |
|
44
44
|
# -------------
|
45
45
|
#
|
46
|
-
#
|
46
|
+
# The code will be
|
47
47
|
# tc=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
48
48
|
# tc.r # correlation
|
49
49
|
# tc.se # standard error
|
50
50
|
# tc.threshold_y # threshold for y variable
|
51
51
|
# tc.threshold_x # threshold for x variable
|
52
|
-
|
53
|
-
|
52
|
+
#
|
53
|
+
# == References:
|
54
|
+
#
|
55
|
+
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. <em>Applied Statistics, 26</em>, 343-351.
|
56
|
+
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
57
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
58
|
+
|
54
59
|
class Tetrachoric
|
55
60
|
include GetText
|
56
61
|
bindtextdomain("statsample")
|
57
62
|
attr_reader :r
|
58
63
|
attr_accessor :name
|
59
|
-
|
64
|
+
|
60
65
|
TWOPI=Math::PI*2
|
61
66
|
SQT2PI= 2.50662827
|
62
67
|
RLIMIT = 0.9999
|
@@ -113,7 +118,7 @@ module Statsample
|
|
113
118
|
rp.add(self)
|
114
119
|
rp.to_text
|
115
120
|
end
|
116
|
-
|
121
|
+
|
117
122
|
def to_reportbuilder(generator) # :nodoc:
|
118
123
|
section=ReportBuilder::Section.new(:name=>@name)
|
119
124
|
t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
|
@@ -129,211 +134,211 @@ module Statsample
|
|
129
134
|
section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
|
130
135
|
generator.parse_element(section)
|
131
136
|
end
|
132
|
-
|
133
|
-
# Creates a new tetrachoric object for analysis
|
137
|
+
|
138
|
+
# Creates a new tetrachoric object for analysis
|
134
139
|
def initialize(a,b,c,d)
|
135
140
|
@a,@b,@c,@d=a,b,c,d
|
136
141
|
@name=_("Tetrachoric correlation")
|
137
142
|
#
|
138
143
|
# CHECK IF ANY CELL FREQUENCY IS NEGATIVE
|
139
144
|
#
|
140
|
-
raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
|
145
|
+
raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
|
141
146
|
compute
|
142
147
|
end
|
143
148
|
# Compute the tetrachoric correlation.
|
144
149
|
# Called on object creation.
|
145
150
|
#
|
146
151
|
def compute
|
147
|
-
|
148
|
-
#
|
149
|
-
# INITIALIZATION
|
150
|
-
#
|
151
|
-
@r = 0
|
152
|
-
sdzero = 0
|
153
|
-
@sdr = 0
|
154
|
-
@itype = 0
|
155
|
-
@ifault = 0
|
156
|
-
|
157
|
-
#
|
158
|
-
# CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
|
159
|
-
#
|
160
|
-
@kdelta = 1
|
161
|
-
delta = 0
|
162
|
-
@kdelta = 2 if (@a == 0 or @d == 0)
|
163
|
-
@kdelta += 2 if (@b == 0 or @c == 0)
|
164
|
-
#
|
165
|
-
# kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
|
166
|
-
#
|
167
152
|
|
168
|
-
|
153
|
+
#
|
154
|
+
# INITIALIZATION
|
155
|
+
#
|
156
|
+
@r = 0
|
157
|
+
sdzero = 0
|
158
|
+
@sdr = 0
|
159
|
+
@itype = 0
|
160
|
+
@ifault = 0
|
169
161
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
# 2
|
181
|
-
delta=-0.5
|
182
|
-
@r=1 if (@b==0 and @c==0)
|
183
|
-
end
|
184
|
-
# 4
|
185
|
-
if @r!=0
|
186
|
-
@itype=3
|
187
|
-
end
|
162
|
+
#
|
163
|
+
# CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
|
164
|
+
#
|
165
|
+
@kdelta = 1
|
166
|
+
delta = 0
|
167
|
+
@kdelta = 2 if (@a == 0 or @d == 0)
|
168
|
+
@kdelta += 2 if (@b == 0 or @c == 0)
|
169
|
+
#
|
170
|
+
# kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
|
171
|
+
#
|
188
172
|
|
189
|
-
|
190
|
-
# STORE FREQUENCIES IN AA, BB, CC AND DD
|
191
|
-
#
|
192
|
-
@aa = @a + delta
|
193
|
-
@bb = @b - delta
|
194
|
-
@cc = @c - delta
|
195
|
-
@dd = @d + delta
|
196
|
-
@tot = @aa+@bb+@cc+@dd
|
197
|
-
#
|
198
|
-
# CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
|
199
|
-
# IF (AA * DD - BB * CC) 7, 5, 6
|
173
|
+
raise "Rows and columns should have more than 0 items" if @kdelta==4
|
200
174
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
@
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
175
|
+
# GOTO (4, 1, 2 , 92), kdelta
|
176
|
+
#
|
177
|
+
# delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
|
178
|
+
#
|
179
|
+
|
180
|
+
if(@kdelta==2)
|
181
|
+
# 1
|
182
|
+
delta=0.5
|
183
|
+
@r=-1 if (@a==0 and @d==0)
|
184
|
+
elsif(@kdelta==3)
|
185
|
+
# 2
|
186
|
+
delta=-0.5
|
187
|
+
@r=1 if (@b==0 and @c==0)
|
188
|
+
end
|
189
|
+
# 4
|
190
|
+
if @r!=0
|
191
|
+
@itype=3
|
212
192
|
end
|
213
|
-
|
193
|
+
|
214
194
|
#
|
215
|
-
#
|
216
|
-
# PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
|
217
|
-
# KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
|
195
|
+
# STORE FREQUENCIES IN AA, BB, CC AND DD
|
218
196
|
#
|
219
|
-
|
220
|
-
@
|
221
|
-
@
|
222
|
-
@
|
223
|
-
|
224
|
-
# 8
|
225
|
-
|
226
|
-
@probab = (@aa+@bb).quo(@tot)
|
227
|
-
|
228
|
-
#
|
229
|
-
# COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
|
230
|
-
# SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
|
231
|
-
#
|
232
|
-
@zac = Distribution::Normal.p_value(@probac)
|
233
|
-
@zab = Distribution::Normal.p_value(@probab)
|
234
|
-
@ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
|
235
|
-
#
|
236
|
-
# WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
|
237
|
-
#
|
238
|
-
if (@r != 0 or @itype > 0)
|
239
|
-
compute_sdzero
|
240
|
-
return true
|
241
|
-
end
|
242
|
-
#
|
243
|
-
# WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
|
244
|
-
#
|
245
|
-
if (@a == @b and @b == @c)
|
246
|
-
calculate_cosine
|
247
|
-
return true
|
248
|
-
end
|
249
|
-
#
|
250
|
-
# INITIAL ESTIMATE OF CORRELATION IS YULES Y
|
251
|
-
#
|
252
|
-
@rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
|
253
|
-
@iter = 0
|
254
|
-
begin
|
197
|
+
@aa = @a + delta
|
198
|
+
@bb = @b - delta
|
199
|
+
@cc = @c - delta
|
200
|
+
@dd = @d + delta
|
201
|
+
@tot = @aa+@bb+@cc+@dd
|
255
202
|
#
|
256
|
-
# IF
|
203
|
+
# CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
|
204
|
+
# IF (AA * DD - BB * CC) 7, 5, 6
|
205
|
+
|
206
|
+
corr_dir=@aa * @dd - @bb * @cc
|
207
|
+
if(corr_dir < 0)
|
208
|
+
# 7
|
209
|
+
@probaa = @bb.quo(@tot)
|
210
|
+
@probac = (@bb + @dd).quo(@tot)
|
211
|
+
@ksign = 2
|
212
|
+
# -> 8
|
213
|
+
else
|
214
|
+
if (corr_dir==0)
|
215
|
+
# 5
|
216
|
+
@itype=4
|
217
|
+
end
|
218
|
+
# 6
|
219
|
+
#
|
220
|
+
# COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
|
221
|
+
# PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
|
222
|
+
# KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
|
223
|
+
#
|
224
|
+
|
225
|
+
@probaa = @aa.quo(@tot)
|
226
|
+
@probac = (@aa+@cc).quo(@tot)
|
227
|
+
@ksign=1
|
228
|
+
end
|
229
|
+
# 8
|
230
|
+
|
231
|
+
@probab = (@aa+@bb).quo(@tot)
|
232
|
+
|
233
|
+
#
|
234
|
+
# COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
|
235
|
+
# SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
|
257
236
|
#
|
258
|
-
|
259
|
-
|
260
|
-
|
237
|
+
@zac = Distribution::Normal.p_value(@probac)
|
238
|
+
@zab = Distribution::Normal.p_value(@probab)
|
239
|
+
@ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
|
240
|
+
#
|
241
|
+
# WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
|
242
|
+
#
|
243
|
+
if (@r != 0 or @itype > 0)
|
244
|
+
compute_sdzero
|
261
245
|
return true
|
262
246
|
end
|
263
247
|
#
|
264
|
-
#
|
248
|
+
# WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
|
249
|
+
#
|
250
|
+
if (@a == @b and @b == @c)
|
251
|
+
calculate_cosine
|
252
|
+
return true
|
253
|
+
end
|
265
254
|
#
|
266
|
-
#
|
255
|
+
# INITIAL ESTIMATE OF CORRELATION IS YULES Y
|
267
256
|
#
|
268
|
-
|
269
|
-
|
270
|
-
wa=1.0
|
271
|
-
wb=@zab.to_f
|
272
|
-
term = 1.0
|
273
|
-
iterm = 0.0
|
274
|
-
@sum = @probab * @probac
|
275
|
-
deriv = 0.0
|
276
|
-
sr = @ss
|
277
|
-
#15
|
257
|
+
@rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
|
258
|
+
@iter = 0
|
278
259
|
begin
|
279
|
-
|
260
|
+
#
|
261
|
+
# IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
|
262
|
+
#
|
263
|
+
#10
|
264
|
+
if @rr>RCUT
|
265
|
+
gaussian_quadrature
|
266
|
+
return true
|
267
|
+
end
|
268
|
+
#
|
269
|
+
# TETRACHORIC SERIES IS COMPUTED
|
270
|
+
#
|
271
|
+
# INITIALIZATION
|
272
|
+
#
|
273
|
+
va=1.0
|
274
|
+
vb=@zac.to_f
|
275
|
+
wa=1.0
|
276
|
+
wb=@zab.to_f
|
277
|
+
term = 1.0
|
278
|
+
iterm = 0.0
|
279
|
+
@sum = @probab * @probac
|
280
|
+
deriv = 0.0
|
281
|
+
sr = @ss
|
282
|
+
#15
|
283
|
+
begin
|
284
|
+
if(sr.abs<=CONST)
|
285
|
+
#
|
286
|
+
# RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
|
287
|
+
#
|
288
|
+
sr = sr / CONST
|
289
|
+
va = va * CHALF
|
290
|
+
vb = vb * CHALF
|
291
|
+
wa = wa * CHALF
|
292
|
+
wb = wb * CHALF
|
293
|
+
end
|
294
|
+
#
|
295
|
+
# FORM SUM AND DERIVATIVE OF SERIES
|
296
|
+
#
|
297
|
+
# 20
|
298
|
+
dr = sr * va * wa
|
299
|
+
sr = sr * @rr / term
|
300
|
+
cof = sr * va * wa
|
280
301
|
#
|
281
|
-
#
|
302
|
+
# ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
|
282
303
|
#
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
304
|
+
iterm+= 1
|
305
|
+
iterm=0 if (cof.abs > CONV)
|
306
|
+
@sum = @sum + cof
|
307
|
+
deriv += dr
|
308
|
+
vaa = va
|
309
|
+
waa = wa
|
310
|
+
va = vb
|
311
|
+
wa = wb
|
312
|
+
vb = @zac * va - term * vaa
|
313
|
+
wb = @zab * wa - term * waa
|
314
|
+
term += 1
|
315
|
+
end while (iterm < 2 or term < 6)
|
316
|
+
#
|
317
|
+
# CHECK IF ITERATION CONVERGED
|
318
|
+
#
|
319
|
+
if((@sum-@probaa).abs <= CITER)
|
320
|
+
@itype=term
|
321
|
+
calculate_sdr
|
322
|
+
return true
|
288
323
|
end
|
289
324
|
#
|
290
|
-
#
|
325
|
+
# CALCULATE NEXT ESTIMATE OF CORRELATION
|
291
326
|
#
|
292
|
-
#
|
293
|
-
|
294
|
-
sr = sr * @rr / term
|
295
|
-
cof = sr * va * wa
|
327
|
+
#25
|
328
|
+
@iter += 1
|
296
329
|
#
|
297
|
-
#
|
330
|
+
# IF TOO MANY ITERATlONS, RUN IS TERMINATED
|
298
331
|
#
|
299
|
-
|
300
|
-
|
301
|
-
@
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
term += 1
|
310
|
-
end while (iterm < 2 or term < 6)
|
311
|
-
#
|
312
|
-
# CHECK IF ITERATION CONVERGED
|
313
|
-
#
|
314
|
-
if((@sum-@probaa).abs <= CITER)
|
315
|
-
@itype=term
|
316
|
-
calculate_sdr
|
317
|
-
return true
|
318
|
-
end
|
319
|
-
#
|
320
|
-
# CALCULATE NEXT ESTIMATE OF CORRELATION
|
321
|
-
#
|
322
|
-
#25
|
323
|
-
@iter += 1
|
324
|
-
#
|
325
|
-
# IF TOO MANY ITERATlONS, RUN IS TERMINATED
|
326
|
-
#
|
327
|
-
delta = (@sum - @probaa) / deriv
|
328
|
-
@rrprev = @rr
|
329
|
-
@rr = @rr - delta
|
330
|
-
@rr += 0.5 * delta if(@iter == 1)
|
331
|
-
@rr= RLIMIT if (@rr > RLIMIT)
|
332
|
-
@rr =0 if (@rr < 0.0)
|
333
|
-
end while @iter < NITER
|
334
|
-
raise "Too many iteration"
|
335
|
-
# GOTO 10
|
336
|
-
end
|
332
|
+
delta = (@sum - @probaa) / deriv
|
333
|
+
@rrprev = @rr
|
334
|
+
@rr = @rr - delta
|
335
|
+
@rr += 0.5 * delta if(@iter == 1)
|
336
|
+
@rr= RLIMIT if (@rr > RLIMIT)
|
337
|
+
@rr =0 if (@rr < 0.0)
|
338
|
+
end while @iter < NITER
|
339
|
+
raise "Too many iteration"
|
340
|
+
# GOTO 10
|
341
|
+
end
|
337
342
|
# GAUSSIAN QUADRATURE
|
338
343
|
# 40
|
339
344
|
def gaussian_quadrature
|
@@ -342,8 +347,8 @@ module Statsample
|
|
342
347
|
@sum=@probab*@probac
|
343
348
|
@rrprev=0
|
344
349
|
end
|
345
|
-
|
346
|
-
# 41
|
350
|
+
|
351
|
+
# 41
|
347
352
|
sumprv = @probab - @sum
|
348
353
|
@prob = @bb.quo(@tot)
|
349
354
|
@prob = @aa.quo(@tot) if (@ksign == 2)
|
@@ -353,7 +358,7 @@ module Statsample
|
|
353
358
|
# COMPUTATION OF INTEGRAL (SUM) BY QUADRATURE
|
354
359
|
#
|
355
360
|
# 42
|
356
|
-
|
361
|
+
|
357
362
|
begin
|
358
363
|
rrsq = Math::sqrt(1 - @rr ** 2)
|
359
364
|
amid = 0.5 * (UPLIM + @zac)
|
@@ -362,8 +367,8 @@ module Statsample
|
|
362
367
|
(1..16).each do |iquad|
|
363
368
|
xla = amid + X[iquad] * xlen
|
364
369
|
xlb = amid - X[iquad] * xlen
|
365
|
-
|
366
|
-
|
370
|
+
|
371
|
+
|
367
372
|
#
|
368
373
|
# TO AVOID UNDERFLOWS, TEMPA AND TEMPB ARE USED
|
369
374
|
#
|
@@ -372,7 +377,7 @@ module Statsample
|
|
372
377
|
@sum = @sum + W[iquad] * Math::exp(-0.5 * xla ** 2) * Distribution::Normal.cdf(tempa)
|
373
378
|
end
|
374
379
|
tempb = (@zab - @rr * xlb) / rrsq
|
375
|
-
|
380
|
+
|
376
381
|
if (tempb >= -6.0)
|
377
382
|
@sum = @sum + W[iquad] * Math::exp(-0.5 * xlb ** 2) * Distribution::Normal.cdf(tempb)
|
378
383
|
end
|
@@ -380,29 +385,29 @@ module Statsample
|
|
380
385
|
@sum=@sum*xlen / SQT2PI
|
381
386
|
#
|
382
387
|
# CHECK IF ITERATION HAS CONVERGED
|
383
|
-
#
|
384
|
-
if ((@prob - @sum).abs <= CITER)
|
388
|
+
#
|
389
|
+
if ((@prob - @sum).abs <= CITER)
|
385
390
|
calculate_sdr
|
386
391
|
return true
|
387
392
|
end
|
388
393
|
# ESTIMATE CORRELATION FOR NEXT ITERATION BY LINEAR INTERPOLATION
|
389
|
-
|
394
|
+
|
390
395
|
rrest = ((@prob - @sum) * @rrprev - (@prob - sumprv) * @rr) / (sumprv - @sum)
|
391
|
-
rrest = RLIMIT if (rrest > RLIMIT)
|
392
|
-
rrest = 0 if (rrest < 0)
|
396
|
+
rrest = RLIMIT if (rrest > RLIMIT)
|
397
|
+
rrest = 0 if (rrest < 0)
|
393
398
|
@rrprev = @rr
|
394
399
|
@rr = rrest
|
395
400
|
sumprv = @sum
|
396
401
|
#
|
397
402
|
# if estimate has same value on two iterations, stop iteration
|
398
403
|
#
|
399
|
-
if @rr == @rrprev
|
404
|
+
if @rr == @rrprev
|
400
405
|
calculate_sdr
|
401
406
|
return true
|
402
407
|
end
|
403
|
-
|
404
|
-
|
405
|
-
end while @iter < NITER
|
408
|
+
|
409
|
+
|
410
|
+
end while @iter < NITER
|
406
411
|
raise "Too many iterations"
|
407
412
|
# ir a 42
|
408
413
|
end
|
@@ -414,16 +419,16 @@ module Statsample
|
|
414
419
|
@itype = 2
|
415
420
|
calculate_sdr
|
416
421
|
end
|
417
|
-
|
418
|
-
|
422
|
+
|
423
|
+
|
419
424
|
def calculate_sdr # :nodoc:
|
420
425
|
#
|
421
426
|
# COMPUTE SDR
|
422
427
|
#
|
423
428
|
@r = @rr
|
424
|
-
rrsq = Math::sqrt(1.0 - @r ** 2)
|
429
|
+
rrsq = Math::sqrt(1.0 - @r ** 2)
|
425
430
|
@itype = -@itype if (@kdelta > 1)
|
426
|
-
if (@ksign != 1)
|
431
|
+
if (@ksign != 1)
|
427
432
|
@r = -@r
|
428
433
|
@zac = -@zac
|
429
434
|
end
|
@@ -431,13 +436,13 @@ module Statsample
|
|
431
436
|
pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
|
432
437
|
@pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
|
433
438
|
@pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
|
434
|
-
|
439
|
+
|
435
440
|
@sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
|
436
441
|
@sdr=0 if (@sdr<0)
|
437
442
|
@sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
|
438
443
|
compute_sdzero
|
439
444
|
end
|
440
|
-
|
445
|
+
|
441
446
|
# 85
|
442
447
|
#
|
443
448
|
# COMPUTE SDZERO
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
require 'statsample/bivariate/tetrachoric'
|
2
2
|
require 'statsample/bivariate/polychoric'
|
3
3
|
module Statsample
|
4
|
-
# Diverse
|
4
|
+
# Diverse bivariate methods, including #covariance, #pearson correlation (r), #spearman ranked correlation (rho), #tetrachoric correlation and #polychoric correlation.
|
5
5
|
module Bivariate
|
6
6
|
class << self
|
7
7
|
# Covariance between two vectors
|
8
8
|
def covariance(v1,v2)
|
9
9
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
10
10
|
return nil if v1a.size==0
|
11
|
-
if
|
11
|
+
if Statsample.has_gsl?
|
12
12
|
GSL::Stats::covariance(v1a.gsl, v2a.gsl)
|
13
13
|
else
|
14
14
|
covariance_slow(v1a,v2a)
|
@@ -31,11 +31,12 @@ module Statsample
|
|
31
31
|
(0...v1a.size).each {|i| t+=((v1a[i]-m1)*(v2a[i]-m2)) }
|
32
32
|
t.to_f / (v1a.size-1)
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
|
+
# Calculate Pearson correlation coefficient (r) between 2 vectors
|
35
36
|
def pearson(v1,v2)
|
36
37
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
37
38
|
return nil if v1a.size ==0
|
38
|
-
if
|
39
|
+
if Statsample.has_gsl?
|
39
40
|
GSL::Stats::correlation(v1a.gsl, v2a.gsl)
|
40
41
|
else
|
41
42
|
pearson_slow(v1a,v2a)
|
@@ -177,7 +178,7 @@ module Statsample
|
|
177
178
|
Matrix.rows(rows)
|
178
179
|
end
|
179
180
|
|
180
|
-
# Spearman ranked correlation coefficient between 2 vectors
|
181
|
+
# Spearman ranked correlation coefficient (rho) between 2 vectors
|
181
182
|
def spearman(v1,v2)
|
182
183
|
v1a,v2a=Statsample.only_valid(v1,v2)
|
183
184
|
v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
|
@@ -195,7 +196,6 @@ module Statsample
|
|
195
196
|
((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
|
196
197
|
end
|
197
198
|
# Kendall Rank Correlation Coefficient.
|
198
|
-
#
|
199
199
|
# Based on Hervé Adbi article
|
200
200
|
def tau_a(v1,v2)
|
201
201
|
v1a,v2a=Statsample.only_valid(v1,v2)
|