statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -5,7 +5,7 @@ module Statsample
5
5
  tc=Tetrachoric.new_with_vectors(v1,v2)
6
6
  tc.r
7
7
  end
8
-
8
+
9
9
  # Tetrachoric correlation matrix.
10
10
  # Order of rows and columns depends on Dataset#fields order
11
11
  def self.tetrachoric_correlation_matrix(ds)
@@ -21,20 +21,20 @@ module Statsample
21
21
  end
22
22
  end
23
23
  end
24
- #
25
24
  # Compute tetrachoric correlation.
26
- #
27
- # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
- # documentation about tetrachoric correlation.
25
+ #
26
+ # The <em>tetrachoric</em> correlation is a measure of
27
+ # bivariate association arising when both observed variates
28
+ # are categorical variables that result from dichotomizing
29
+ # the two undelying continuous variables (Drasgow, 2006).
30
+ # The tetrachoric correlation is a good way to measure rater agreement (Uebersax, 2006)
29
31
  #
30
32
  # This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
31
- #
32
- # == References:
33
- # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
34
33
  #
35
- # <b>Usage</b>.
34
+ #
35
+ # == Usage
36
36
  # With two variables x and y on a crosstab like this:
37
- #
37
+ #
38
38
  # -------------
39
39
  # | y=0 | y=1 |
40
40
  # -------------
@@ -43,20 +43,25 @@ module Statsample
43
43
  # x = 1 | c | d |
44
44
  # -------------
45
45
  #
46
- # Use:
46
+ # The code will be
47
47
  # tc=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
48
48
  # tc.r # correlation
49
49
  # tc.se # standard error
50
50
  # tc.threshold_y # threshold for y variable
51
51
  # tc.threshold_x # threshold for x variable
52
-
53
-
52
+ #
53
+ # == References:
54
+ #
55
+ # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. <em>Applied Statistics, 26</em>, 343-351.
56
+ # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
57
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
58
+
54
59
  class Tetrachoric
55
60
  include GetText
56
61
  bindtextdomain("statsample")
57
62
  attr_reader :r
58
63
  attr_accessor :name
59
-
64
+
60
65
  TWOPI=Math::PI*2
61
66
  SQT2PI= 2.50662827
62
67
  RLIMIT = 0.9999
@@ -113,7 +118,7 @@ module Statsample
113
118
  rp.add(self)
114
119
  rp.to_text
115
120
  end
116
-
121
+
117
122
  def to_reportbuilder(generator) # :nodoc:
118
123
  section=ReportBuilder::Section.new(:name=>@name)
119
124
  t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
@@ -129,211 +134,211 @@ module Statsample
129
134
  section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
130
135
  generator.parse_element(section)
131
136
  end
132
-
133
- # Creates a new tetrachoric object for analysis
137
+
138
+ # Creates a new tetrachoric object for analysis
134
139
  def initialize(a,b,c,d)
135
140
  @a,@b,@c,@d=a,b,c,d
136
141
  @name=_("Tetrachoric correlation")
137
142
  #
138
143
  # CHECK IF ANY CELL FREQUENCY IS NEGATIVE
139
144
  #
140
- raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
145
+ raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
141
146
  compute
142
147
  end
143
148
  # Compute the tetrachoric correlation.
144
149
  # Called on object creation.
145
150
  #
146
151
  def compute
147
-
148
- #
149
- # INITIALIZATION
150
- #
151
- @r = 0
152
- sdzero = 0
153
- @sdr = 0
154
- @itype = 0
155
- @ifault = 0
156
-
157
- #
158
- # CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
159
- #
160
- @kdelta = 1
161
- delta = 0
162
- @kdelta = 2 if (@a == 0 or @d == 0)
163
- @kdelta += 2 if (@b == 0 or @c == 0)
164
- #
165
- # kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
166
- #
167
152
 
168
- raise "Rows and columns should have more than 0 items" if @kdelta==4
153
+ #
154
+ # INITIALIZATION
155
+ #
156
+ @r = 0
157
+ sdzero = 0
158
+ @sdr = 0
159
+ @itype = 0
160
+ @ifault = 0
169
161
 
170
- # GOTO (4, 1, 2 , 92), kdelta
171
- #
172
- # delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
173
- #
174
-
175
- if(@kdelta==2)
176
- # 1
177
- delta=0.5
178
- @r=-1 if (@a==0 and @d==0)
179
- elsif(@kdelta==3)
180
- # 2
181
- delta=-0.5
182
- @r=1 if (@b==0 and @c==0)
183
- end
184
- # 4
185
- if @r!=0
186
- @itype=3
187
- end
162
+ #
163
+ # CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
164
+ #
165
+ @kdelta = 1
166
+ delta = 0
167
+ @kdelta = 2 if (@a == 0 or @d == 0)
168
+ @kdelta += 2 if (@b == 0 or @c == 0)
169
+ #
170
+ # kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
171
+ #
188
172
 
189
- #
190
- # STORE FREQUENCIES IN AA, BB, CC AND DD
191
- #
192
- @aa = @a + delta
193
- @bb = @b - delta
194
- @cc = @c - delta
195
- @dd = @d + delta
196
- @tot = @aa+@bb+@cc+@dd
197
- #
198
- # CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
199
- # IF (AA * DD - BB * CC) 7, 5, 6
173
+ raise "Rows and columns should have more than 0 items" if @kdelta==4
200
174
 
201
- corr_dir=@aa * @dd - @bb * @cc
202
- if(corr_dir < 0)
203
- # 7
204
- @probaa = @bb.quo(@tot)
205
- @probac = (@bb + @dd).quo(@tot)
206
- @ksign = 2
207
- # -> 8
208
- else
209
- if (corr_dir==0)
210
- # 5
211
- @itype=4
175
+ # GOTO (4, 1, 2 , 92), kdelta
176
+ #
177
+ # delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
178
+ #
179
+
180
+ if(@kdelta==2)
181
+ # 1
182
+ delta=0.5
183
+ @r=-1 if (@a==0 and @d==0)
184
+ elsif(@kdelta==3)
185
+ # 2
186
+ delta=-0.5
187
+ @r=1 if (@b==0 and @c==0)
188
+ end
189
+ # 4
190
+ if @r!=0
191
+ @itype=3
212
192
  end
213
- # 6
193
+
214
194
  #
215
- # COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
216
- # PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
217
- # KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
195
+ # STORE FREQUENCIES IN AA, BB, CC AND DD
218
196
  #
219
-
220
- @probaa = @aa.quo(@tot)
221
- @probac = (@aa+@cc).quo(@tot)
222
- @ksign=1
223
- end
224
- # 8
225
-
226
- @probab = (@aa+@bb).quo(@tot)
227
-
228
- #
229
- # COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
230
- # SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
231
- #
232
- @zac = Distribution::Normal.p_value(@probac)
233
- @zab = Distribution::Normal.p_value(@probab)
234
- @ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
235
- #
236
- # WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
237
- #
238
- if (@r != 0 or @itype > 0)
239
- compute_sdzero
240
- return true
241
- end
242
- #
243
- # WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
244
- #
245
- if (@a == @b and @b == @c)
246
- calculate_cosine
247
- return true
248
- end
249
- #
250
- # INITIAL ESTIMATE OF CORRELATION IS YULES Y
251
- #
252
- @rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
253
- @iter = 0
254
- begin
197
+ @aa = @a + delta
198
+ @bb = @b - delta
199
+ @cc = @c - delta
200
+ @dd = @d + delta
201
+ @tot = @aa+@bb+@cc+@dd
255
202
  #
256
- # IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
203
+ # CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
204
+ # IF (AA * DD - BB * CC) 7, 5, 6
205
+
206
+ corr_dir=@aa * @dd - @bb * @cc
207
+ if(corr_dir < 0)
208
+ # 7
209
+ @probaa = @bb.quo(@tot)
210
+ @probac = (@bb + @dd).quo(@tot)
211
+ @ksign = 2
212
+ # -> 8
213
+ else
214
+ if (corr_dir==0)
215
+ # 5
216
+ @itype=4
217
+ end
218
+ # 6
219
+ #
220
+ # COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
221
+ # PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
222
+ # KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
223
+ #
224
+
225
+ @probaa = @aa.quo(@tot)
226
+ @probac = (@aa+@cc).quo(@tot)
227
+ @ksign=1
228
+ end
229
+ # 8
230
+
231
+ @probab = (@aa+@bb).quo(@tot)
232
+
233
+ #
234
+ # COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
235
+ # SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
257
236
  #
258
- #10
259
- if @rr>RCUT
260
- gaussian_quadrature
237
+ @zac = Distribution::Normal.p_value(@probac)
238
+ @zab = Distribution::Normal.p_value(@probab)
239
+ @ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
240
+ #
241
+ # WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
242
+ #
243
+ if (@r != 0 or @itype > 0)
244
+ compute_sdzero
261
245
  return true
262
246
  end
263
247
  #
264
- # TETRACHORIC SERIES IS COMPUTED
248
+ # WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
249
+ #
250
+ if (@a == @b and @b == @c)
251
+ calculate_cosine
252
+ return true
253
+ end
265
254
  #
266
- # INITIALIZATION
255
+ # INITIAL ESTIMATE OF CORRELATION IS YULES Y
267
256
  #
268
- va=1.0
269
- vb=@zac.to_f
270
- wa=1.0
271
- wb=@zab.to_f
272
- term = 1.0
273
- iterm = 0.0
274
- @sum = @probab * @probac
275
- deriv = 0.0
276
- sr = @ss
277
- #15
257
+ @rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
258
+ @iter = 0
278
259
  begin
279
- if(sr.abs<=CONST)
260
+ #
261
+ # IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
262
+ #
263
+ #10
264
+ if @rr>RCUT
265
+ gaussian_quadrature
266
+ return true
267
+ end
268
+ #
269
+ # TETRACHORIC SERIES IS COMPUTED
270
+ #
271
+ # INITIALIZATION
272
+ #
273
+ va=1.0
274
+ vb=@zac.to_f
275
+ wa=1.0
276
+ wb=@zab.to_f
277
+ term = 1.0
278
+ iterm = 0.0
279
+ @sum = @probab * @probac
280
+ deriv = 0.0
281
+ sr = @ss
282
+ #15
283
+ begin
284
+ if(sr.abs<=CONST)
285
+ #
286
+ # RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
287
+ #
288
+ sr = sr / CONST
289
+ va = va * CHALF
290
+ vb = vb * CHALF
291
+ wa = wa * CHALF
292
+ wb = wb * CHALF
293
+ end
294
+ #
295
+ # FORM SUM AND DERIVATIVE OF SERIES
296
+ #
297
+ # 20
298
+ dr = sr * va * wa
299
+ sr = sr * @rr / term
300
+ cof = sr * va * wa
280
301
  #
281
- # RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
302
+ # ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
282
303
  #
283
- sr = sr / CONST
284
- va = va * CHALF
285
- vb = vb * CHALF
286
- wa = wa * CHALF
287
- wb = wb * CHALF
304
+ iterm+= 1
305
+ iterm=0 if (cof.abs > CONV)
306
+ @sum = @sum + cof
307
+ deriv += dr
308
+ vaa = va
309
+ waa = wa
310
+ va = vb
311
+ wa = wb
312
+ vb = @zac * va - term * vaa
313
+ wb = @zab * wa - term * waa
314
+ term += 1
315
+ end while (iterm < 2 or term < 6)
316
+ #
317
+ # CHECK IF ITERATION CONVERGED
318
+ #
319
+ if((@sum-@probaa).abs <= CITER)
320
+ @itype=term
321
+ calculate_sdr
322
+ return true
288
323
  end
289
324
  #
290
- # FORM SUM AND DERIVATIVE OF SERIES
325
+ # CALCULATE NEXT ESTIMATE OF CORRELATION
291
326
  #
292
- # 20
293
- dr = sr * va * wa
294
- sr = sr * @rr / term
295
- cof = sr * va * wa
327
+ #25
328
+ @iter += 1
296
329
  #
297
- # ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
330
+ # IF TOO MANY ITERATlONS, RUN IS TERMINATED
298
331
  #
299
- iterm+= 1
300
- iterm=0 if (cof.abs > CONV)
301
- @sum = @sum + cof
302
- deriv += dr
303
- vaa = va
304
- waa = wa
305
- va = vb
306
- wa = wb
307
- vb = @zac * va - term * vaa
308
- wb = @zab * wa - term * waa
309
- term += 1
310
- end while (iterm < 2 or term < 6)
311
- #
312
- # CHECK IF ITERATION CONVERGED
313
- #
314
- if((@sum-@probaa).abs <= CITER)
315
- @itype=term
316
- calculate_sdr
317
- return true
318
- end
319
- #
320
- # CALCULATE NEXT ESTIMATE OF CORRELATION
321
- #
322
- #25
323
- @iter += 1
324
- #
325
- # IF TOO MANY ITERATlONS, RUN IS TERMINATED
326
- #
327
- delta = (@sum - @probaa) / deriv
328
- @rrprev = @rr
329
- @rr = @rr - delta
330
- @rr += 0.5 * delta if(@iter == 1)
331
- @rr= RLIMIT if (@rr > RLIMIT)
332
- @rr =0 if (@rr < 0.0)
333
- end while @iter < NITER
334
- raise "Too many iteration"
335
- # GOTO 10
336
- end
332
+ delta = (@sum - @probaa) / deriv
333
+ @rrprev = @rr
334
+ @rr = @rr - delta
335
+ @rr += 0.5 * delta if(@iter == 1)
336
+ @rr= RLIMIT if (@rr > RLIMIT)
337
+ @rr =0 if (@rr < 0.0)
338
+ end while @iter < NITER
339
+ raise "Too many iteration"
340
+ # GOTO 10
341
+ end
337
342
  # GAUSSIAN QUADRATURE
338
343
  # 40
339
344
  def gaussian_quadrature
@@ -342,8 +347,8 @@ module Statsample
342
347
  @sum=@probab*@probac
343
348
  @rrprev=0
344
349
  end
345
-
346
- # 41
350
+
351
+ # 41
347
352
  sumprv = @probab - @sum
348
353
  @prob = @bb.quo(@tot)
349
354
  @prob = @aa.quo(@tot) if (@ksign == 2)
@@ -353,7 +358,7 @@ module Statsample
353
358
  # COMPUTATION OF INTEGRAL (SUM) BY QUADRATURE
354
359
  #
355
360
  # 42
356
-
361
+
357
362
  begin
358
363
  rrsq = Math::sqrt(1 - @rr ** 2)
359
364
  amid = 0.5 * (UPLIM + @zac)
@@ -362,8 +367,8 @@ module Statsample
362
367
  (1..16).each do |iquad|
363
368
  xla = amid + X[iquad] * xlen
364
369
  xlb = amid - X[iquad] * xlen
365
-
366
-
370
+
371
+
367
372
  #
368
373
  # TO AVOID UNDERFLOWS, TEMPA AND TEMPB ARE USED
369
374
  #
@@ -372,7 +377,7 @@ module Statsample
372
377
  @sum = @sum + W[iquad] * Math::exp(-0.5 * xla ** 2) * Distribution::Normal.cdf(tempa)
373
378
  end
374
379
  tempb = (@zab - @rr * xlb) / rrsq
375
-
380
+
376
381
  if (tempb >= -6.0)
377
382
  @sum = @sum + W[iquad] * Math::exp(-0.5 * xlb ** 2) * Distribution::Normal.cdf(tempb)
378
383
  end
@@ -380,29 +385,29 @@ module Statsample
380
385
  @sum=@sum*xlen / SQT2PI
381
386
  #
382
387
  # CHECK IF ITERATION HAS CONVERGED
383
- #
384
- if ((@prob - @sum).abs <= CITER)
388
+ #
389
+ if ((@prob - @sum).abs <= CITER)
385
390
  calculate_sdr
386
391
  return true
387
392
  end
388
393
  # ESTIMATE CORRELATION FOR NEXT ITERATION BY LINEAR INTERPOLATION
389
-
394
+
390
395
  rrest = ((@prob - @sum) * @rrprev - (@prob - sumprv) * @rr) / (sumprv - @sum)
391
- rrest = RLIMIT if (rrest > RLIMIT)
392
- rrest = 0 if (rrest < 0)
396
+ rrest = RLIMIT if (rrest > RLIMIT)
397
+ rrest = 0 if (rrest < 0)
393
398
  @rrprev = @rr
394
399
  @rr = rrest
395
400
  sumprv = @sum
396
401
  #
397
402
  # if estimate has same value on two iterations, stop iteration
398
403
  #
399
- if @rr == @rrprev
404
+ if @rr == @rrprev
400
405
  calculate_sdr
401
406
  return true
402
407
  end
403
-
404
-
405
- end while @iter < NITER
408
+
409
+
410
+ end while @iter < NITER
406
411
  raise "Too many iterations"
407
412
  # ir a 42
408
413
  end
@@ -414,16 +419,16 @@ module Statsample
414
419
  @itype = 2
415
420
  calculate_sdr
416
421
  end
417
-
418
-
422
+
423
+
419
424
  def calculate_sdr # :nodoc:
420
425
  #
421
426
  # COMPUTE SDR
422
427
  #
423
428
  @r = @rr
424
- rrsq = Math::sqrt(1.0 - @r ** 2)
429
+ rrsq = Math::sqrt(1.0 - @r ** 2)
425
430
  @itype = -@itype if (@kdelta > 1)
426
- if (@ksign != 1)
431
+ if (@ksign != 1)
427
432
  @r = -@r
428
433
  @zac = -@zac
429
434
  end
@@ -431,13 +436,13 @@ module Statsample
431
436
  pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
432
437
  @pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
433
438
  @pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
434
-
439
+
435
440
  @sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
436
441
  @sdr=0 if (@sdr<0)
437
442
  @sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
438
443
  compute_sdzero
439
444
  end
440
-
445
+
441
446
  # 85
442
447
  #
443
448
  # COMPUTE SDZERO
@@ -1,14 +1,14 @@
1
1
  require 'statsample/bivariate/tetrachoric'
2
2
  require 'statsample/bivariate/polychoric'
3
3
  module Statsample
4
- # Diverse correlation methods
4
+ # Diverse bivariate methods, including #covariance, #pearson correlation (r), #spearman ranked correlation (rho), #tetrachoric correlation and #polychoric correlation.
5
5
  module Bivariate
6
6
  class << self
7
7
  # Covariance between two vectors
8
8
  def covariance(v1,v2)
9
9
  v1a,v2a=Statsample.only_valid(v1,v2)
10
10
  return nil if v1a.size==0
11
- if HAS_GSL
11
+ if Statsample.has_gsl?
12
12
  GSL::Stats::covariance(v1a.gsl, v2a.gsl)
13
13
  else
14
14
  covariance_slow(v1a,v2a)
@@ -31,11 +31,12 @@ module Statsample
31
31
  (0...v1a.size).each {|i| t+=((v1a[i]-m1)*(v2a[i]-m2)) }
32
32
  t.to_f / (v1a.size-1)
33
33
  end
34
- # Calculate Pearson correlation coefficient between 2 vectors
34
+
35
+ # Calculate Pearson correlation coefficient (r) between 2 vectors
35
36
  def pearson(v1,v2)
36
37
  v1a,v2a=Statsample.only_valid(v1,v2)
37
38
  return nil if v1a.size ==0
38
- if HAS_GSL
39
+ if Statsample.has_gsl?
39
40
  GSL::Stats::correlation(v1a.gsl, v2a.gsl)
40
41
  else
41
42
  pearson_slow(v1a,v2a)
@@ -177,7 +178,7 @@ module Statsample
177
178
  Matrix.rows(rows)
178
179
  end
179
180
 
180
- # Spearman ranked correlation coefficient between 2 vectors
181
+ # Spearman ranked correlation coefficient (rho) between 2 vectors
181
182
  def spearman(v1,v2)
182
183
  v1a,v2a=Statsample.only_valid(v1,v2)
183
184
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
@@ -195,7 +196,6 @@ module Statsample
195
196
  ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
196
197
  end
197
198
  # Kendall Rank Correlation Coefficient.
198
- #
199
199
  # Based on Hervé Adbi article
200
200
  def tau_a(v1,v2)
201
201
  v1a,v2a=Statsample.only_valid(v1,v2)