statsample 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -5,7 +5,7 @@ module Statsample
5
5
  tc=Tetrachoric.new_with_vectors(v1,v2)
6
6
  tc.r
7
7
  end
8
-
8
+
9
9
  # Tetrachoric correlation matrix.
10
10
  # Order of rows and columns depends on Dataset#fields order
11
11
  def self.tetrachoric_correlation_matrix(ds)
@@ -21,20 +21,20 @@ module Statsample
21
21
  end
22
22
  end
23
23
  end
24
- #
25
24
  # Compute tetrachoric correlation.
26
- #
27
- # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
- # documentation about tetrachoric correlation.
25
+ #
26
+ # The <em>tetrachoric</em> correlation is a measure of
27
+ # bivariate association arising when both observed variates
28
+ # are categorical variables that result from dichotomizing
29
+ # the two undelying continuous variables (Drasgow, 2006).
30
+ # The tetrachoric correlation is a good way to measure rater agreement (Uebersax, 2006)
29
31
  #
30
32
  # This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
31
- #
32
- # == References:
33
- # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
34
33
  #
35
- # <b>Usage</b>.
34
+ #
35
+ # == Usage
36
36
  # With two variables x and y on a crosstab like this:
37
- #
37
+ #
38
38
  # -------------
39
39
  # | y=0 | y=1 |
40
40
  # -------------
@@ -43,20 +43,25 @@ module Statsample
43
43
  # x = 1 | c | d |
44
44
  # -------------
45
45
  #
46
- # Use:
46
+ # The code will be
47
47
  # tc=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
48
48
  # tc.r # correlation
49
49
  # tc.se # standard error
50
50
  # tc.threshold_y # threshold for y variable
51
51
  # tc.threshold_x # threshold for x variable
52
-
53
-
52
+ #
53
+ # == References:
54
+ #
55
+ # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. <em>Applied Statistics, 26</em>, 343-351.
56
+ # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
57
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
58
+
54
59
  class Tetrachoric
55
60
  include GetText
56
61
  bindtextdomain("statsample")
57
62
  attr_reader :r
58
63
  attr_accessor :name
59
-
64
+
60
65
  TWOPI=Math::PI*2
61
66
  SQT2PI= 2.50662827
62
67
  RLIMIT = 0.9999
@@ -113,7 +118,7 @@ module Statsample
113
118
  rp.add(self)
114
119
  rp.to_text
115
120
  end
116
-
121
+
117
122
  def to_reportbuilder(generator) # :nodoc:
118
123
  section=ReportBuilder::Section.new(:name=>@name)
119
124
  t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
@@ -129,211 +134,211 @@ module Statsample
129
134
  section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
130
135
  generator.parse_element(section)
131
136
  end
132
-
133
- # Creates a new tetrachoric object for analysis
137
+
138
+ # Creates a new tetrachoric object for analysis
134
139
  def initialize(a,b,c,d)
135
140
  @a,@b,@c,@d=a,b,c,d
136
141
  @name=_("Tetrachoric correlation")
137
142
  #
138
143
  # CHECK IF ANY CELL FREQUENCY IS NEGATIVE
139
144
  #
140
- raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
145
+ raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
141
146
  compute
142
147
  end
143
148
  # Compute the tetrachoric correlation.
144
149
  # Called on object creation.
145
150
  #
146
151
  def compute
147
-
148
- #
149
- # INITIALIZATION
150
- #
151
- @r = 0
152
- sdzero = 0
153
- @sdr = 0
154
- @itype = 0
155
- @ifault = 0
156
-
157
- #
158
- # CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
159
- #
160
- @kdelta = 1
161
- delta = 0
162
- @kdelta = 2 if (@a == 0 or @d == 0)
163
- @kdelta += 2 if (@b == 0 or @c == 0)
164
- #
165
- # kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
166
- #
167
152
 
168
- raise "Rows and columns should have more than 0 items" if @kdelta==4
153
+ #
154
+ # INITIALIZATION
155
+ #
156
+ @r = 0
157
+ sdzero = 0
158
+ @sdr = 0
159
+ @itype = 0
160
+ @ifault = 0
169
161
 
170
- # GOTO (4, 1, 2 , 92), kdelta
171
- #
172
- # delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
173
- #
174
-
175
- if(@kdelta==2)
176
- # 1
177
- delta=0.5
178
- @r=-1 if (@a==0 and @d==0)
179
- elsif(@kdelta==3)
180
- # 2
181
- delta=-0.5
182
- @r=1 if (@b==0 and @c==0)
183
- end
184
- # 4
185
- if @r!=0
186
- @itype=3
187
- end
162
+ #
163
+ # CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
164
+ #
165
+ @kdelta = 1
166
+ delta = 0
167
+ @kdelta = 2 if (@a == 0 or @d == 0)
168
+ @kdelta += 2 if (@b == 0 or @c == 0)
169
+ #
170
+ # kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
171
+ #
188
172
 
189
- #
190
- # STORE FREQUENCIES IN AA, BB, CC AND DD
191
- #
192
- @aa = @a + delta
193
- @bb = @b - delta
194
- @cc = @c - delta
195
- @dd = @d + delta
196
- @tot = @aa+@bb+@cc+@dd
197
- #
198
- # CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
199
- # IF (AA * DD - BB * CC) 7, 5, 6
173
+ raise "Rows and columns should have more than 0 items" if @kdelta==4
200
174
 
201
- corr_dir=@aa * @dd - @bb * @cc
202
- if(corr_dir < 0)
203
- # 7
204
- @probaa = @bb.quo(@tot)
205
- @probac = (@bb + @dd).quo(@tot)
206
- @ksign = 2
207
- # -> 8
208
- else
209
- if (corr_dir==0)
210
- # 5
211
- @itype=4
175
+ # GOTO (4, 1, 2 , 92), kdelta
176
+ #
177
+ # delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
178
+ #
179
+
180
+ if(@kdelta==2)
181
+ # 1
182
+ delta=0.5
183
+ @r=-1 if (@a==0 and @d==0)
184
+ elsif(@kdelta==3)
185
+ # 2
186
+ delta=-0.5
187
+ @r=1 if (@b==0 and @c==0)
188
+ end
189
+ # 4
190
+ if @r!=0
191
+ @itype=3
212
192
  end
213
- # 6
193
+
214
194
  #
215
- # COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
216
- # PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
217
- # KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
195
+ # STORE FREQUENCIES IN AA, BB, CC AND DD
218
196
  #
219
-
220
- @probaa = @aa.quo(@tot)
221
- @probac = (@aa+@cc).quo(@tot)
222
- @ksign=1
223
- end
224
- # 8
225
-
226
- @probab = (@aa+@bb).quo(@tot)
227
-
228
- #
229
- # COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
230
- # SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
231
- #
232
- @zac = Distribution::Normal.p_value(@probac)
233
- @zab = Distribution::Normal.p_value(@probab)
234
- @ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
235
- #
236
- # WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
237
- #
238
- if (@r != 0 or @itype > 0)
239
- compute_sdzero
240
- return true
241
- end
242
- #
243
- # WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
244
- #
245
- if (@a == @b and @b == @c)
246
- calculate_cosine
247
- return true
248
- end
249
- #
250
- # INITIAL ESTIMATE OF CORRELATION IS YULES Y
251
- #
252
- @rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
253
- @iter = 0
254
- begin
197
+ @aa = @a + delta
198
+ @bb = @b - delta
199
+ @cc = @c - delta
200
+ @dd = @d + delta
201
+ @tot = @aa+@bb+@cc+@dd
255
202
  #
256
- # IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
203
+ # CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
204
+ # IF (AA * DD - BB * CC) 7, 5, 6
205
+
206
+ corr_dir=@aa * @dd - @bb * @cc
207
+ if(corr_dir < 0)
208
+ # 7
209
+ @probaa = @bb.quo(@tot)
210
+ @probac = (@bb + @dd).quo(@tot)
211
+ @ksign = 2
212
+ # -> 8
213
+ else
214
+ if (corr_dir==0)
215
+ # 5
216
+ @itype=4
217
+ end
218
+ # 6
219
+ #
220
+ # COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
221
+ # PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
222
+ # KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
223
+ #
224
+
225
+ @probaa = @aa.quo(@tot)
226
+ @probac = (@aa+@cc).quo(@tot)
227
+ @ksign=1
228
+ end
229
+ # 8
230
+
231
+ @probab = (@aa+@bb).quo(@tot)
232
+
233
+ #
234
+ # COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
235
+ # SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
257
236
  #
258
- #10
259
- if @rr>RCUT
260
- gaussian_quadrature
237
+ @zac = Distribution::Normal.p_value(@probac)
238
+ @zab = Distribution::Normal.p_value(@probab)
239
+ @ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
240
+ #
241
+ # WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
242
+ #
243
+ if (@r != 0 or @itype > 0)
244
+ compute_sdzero
261
245
  return true
262
246
  end
263
247
  #
264
- # TETRACHORIC SERIES IS COMPUTED
248
+ # WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
249
+ #
250
+ if (@a == @b and @b == @c)
251
+ calculate_cosine
252
+ return true
253
+ end
265
254
  #
266
- # INITIALIZATION
255
+ # INITIAL ESTIMATE OF CORRELATION IS YULES Y
267
256
  #
268
- va=1.0
269
- vb=@zac.to_f
270
- wa=1.0
271
- wb=@zab.to_f
272
- term = 1.0
273
- iterm = 0.0
274
- @sum = @probab * @probac
275
- deriv = 0.0
276
- sr = @ss
277
- #15
257
+ @rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
258
+ @iter = 0
278
259
  begin
279
- if(sr.abs<=CONST)
260
+ #
261
+ # IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
262
+ #
263
+ #10
264
+ if @rr>RCUT
265
+ gaussian_quadrature
266
+ return true
267
+ end
268
+ #
269
+ # TETRACHORIC SERIES IS COMPUTED
270
+ #
271
+ # INITIALIZATION
272
+ #
273
+ va=1.0
274
+ vb=@zac.to_f
275
+ wa=1.0
276
+ wb=@zab.to_f
277
+ term = 1.0
278
+ iterm = 0.0
279
+ @sum = @probab * @probac
280
+ deriv = 0.0
281
+ sr = @ss
282
+ #15
283
+ begin
284
+ if(sr.abs<=CONST)
285
+ #
286
+ # RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
287
+ #
288
+ sr = sr / CONST
289
+ va = va * CHALF
290
+ vb = vb * CHALF
291
+ wa = wa * CHALF
292
+ wb = wb * CHALF
293
+ end
294
+ #
295
+ # FORM SUM AND DERIVATIVE OF SERIES
296
+ #
297
+ # 20
298
+ dr = sr * va * wa
299
+ sr = sr * @rr / term
300
+ cof = sr * va * wa
280
301
  #
281
- # RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
302
+ # ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
282
303
  #
283
- sr = sr / CONST
284
- va = va * CHALF
285
- vb = vb * CHALF
286
- wa = wa * CHALF
287
- wb = wb * CHALF
304
+ iterm+= 1
305
+ iterm=0 if (cof.abs > CONV)
306
+ @sum = @sum + cof
307
+ deriv += dr
308
+ vaa = va
309
+ waa = wa
310
+ va = vb
311
+ wa = wb
312
+ vb = @zac * va - term * vaa
313
+ wb = @zab * wa - term * waa
314
+ term += 1
315
+ end while (iterm < 2 or term < 6)
316
+ #
317
+ # CHECK IF ITERATION CONVERGED
318
+ #
319
+ if((@sum-@probaa).abs <= CITER)
320
+ @itype=term
321
+ calculate_sdr
322
+ return true
288
323
  end
289
324
  #
290
- # FORM SUM AND DERIVATIVE OF SERIES
325
+ # CALCULATE NEXT ESTIMATE OF CORRELATION
291
326
  #
292
- # 20
293
- dr = sr * va * wa
294
- sr = sr * @rr / term
295
- cof = sr * va * wa
327
+ #25
328
+ @iter += 1
296
329
  #
297
- # ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
330
+ # IF TOO MANY ITERATlONS, RUN IS TERMINATED
298
331
  #
299
- iterm+= 1
300
- iterm=0 if (cof.abs > CONV)
301
- @sum = @sum + cof
302
- deriv += dr
303
- vaa = va
304
- waa = wa
305
- va = vb
306
- wa = wb
307
- vb = @zac * va - term * vaa
308
- wb = @zab * wa - term * waa
309
- term += 1
310
- end while (iterm < 2 or term < 6)
311
- #
312
- # CHECK IF ITERATION CONVERGED
313
- #
314
- if((@sum-@probaa).abs <= CITER)
315
- @itype=term
316
- calculate_sdr
317
- return true
318
- end
319
- #
320
- # CALCULATE NEXT ESTIMATE OF CORRELATION
321
- #
322
- #25
323
- @iter += 1
324
- #
325
- # IF TOO MANY ITERATlONS, RUN IS TERMINATED
326
- #
327
- delta = (@sum - @probaa) / deriv
328
- @rrprev = @rr
329
- @rr = @rr - delta
330
- @rr += 0.5 * delta if(@iter == 1)
331
- @rr= RLIMIT if (@rr > RLIMIT)
332
- @rr =0 if (@rr < 0.0)
333
- end while @iter < NITER
334
- raise "Too many iteration"
335
- # GOTO 10
336
- end
332
+ delta = (@sum - @probaa) / deriv
333
+ @rrprev = @rr
334
+ @rr = @rr - delta
335
+ @rr += 0.5 * delta if(@iter == 1)
336
+ @rr= RLIMIT if (@rr > RLIMIT)
337
+ @rr =0 if (@rr < 0.0)
338
+ end while @iter < NITER
339
+ raise "Too many iteration"
340
+ # GOTO 10
341
+ end
337
342
  # GAUSSIAN QUADRATURE
338
343
  # 40
339
344
  def gaussian_quadrature
@@ -342,8 +347,8 @@ module Statsample
342
347
  @sum=@probab*@probac
343
348
  @rrprev=0
344
349
  end
345
-
346
- # 41
350
+
351
+ # 41
347
352
  sumprv = @probab - @sum
348
353
  @prob = @bb.quo(@tot)
349
354
  @prob = @aa.quo(@tot) if (@ksign == 2)
@@ -353,7 +358,7 @@ module Statsample
353
358
  # COMPUTATION OF INTEGRAL (SUM) BY QUADRATURE
354
359
  #
355
360
  # 42
356
-
361
+
357
362
  begin
358
363
  rrsq = Math::sqrt(1 - @rr ** 2)
359
364
  amid = 0.5 * (UPLIM + @zac)
@@ -362,8 +367,8 @@ module Statsample
362
367
  (1..16).each do |iquad|
363
368
  xla = amid + X[iquad] * xlen
364
369
  xlb = amid - X[iquad] * xlen
365
-
366
-
370
+
371
+
367
372
  #
368
373
  # TO AVOID UNDERFLOWS, TEMPA AND TEMPB ARE USED
369
374
  #
@@ -372,7 +377,7 @@ module Statsample
372
377
  @sum = @sum + W[iquad] * Math::exp(-0.5 * xla ** 2) * Distribution::Normal.cdf(tempa)
373
378
  end
374
379
  tempb = (@zab - @rr * xlb) / rrsq
375
-
380
+
376
381
  if (tempb >= -6.0)
377
382
  @sum = @sum + W[iquad] * Math::exp(-0.5 * xlb ** 2) * Distribution::Normal.cdf(tempb)
378
383
  end
@@ -380,29 +385,29 @@ module Statsample
380
385
  @sum=@sum*xlen / SQT2PI
381
386
  #
382
387
  # CHECK IF ITERATION HAS CONVERGED
383
- #
384
- if ((@prob - @sum).abs <= CITER)
388
+ #
389
+ if ((@prob - @sum).abs <= CITER)
385
390
  calculate_sdr
386
391
  return true
387
392
  end
388
393
  # ESTIMATE CORRELATION FOR NEXT ITERATION BY LINEAR INTERPOLATION
389
-
394
+
390
395
  rrest = ((@prob - @sum) * @rrprev - (@prob - sumprv) * @rr) / (sumprv - @sum)
391
- rrest = RLIMIT if (rrest > RLIMIT)
392
- rrest = 0 if (rrest < 0)
396
+ rrest = RLIMIT if (rrest > RLIMIT)
397
+ rrest = 0 if (rrest < 0)
393
398
  @rrprev = @rr
394
399
  @rr = rrest
395
400
  sumprv = @sum
396
401
  #
397
402
  # if estimate has same value on two iterations, stop iteration
398
403
  #
399
- if @rr == @rrprev
404
+ if @rr == @rrprev
400
405
  calculate_sdr
401
406
  return true
402
407
  end
403
-
404
-
405
- end while @iter < NITER
408
+
409
+
410
+ end while @iter < NITER
406
411
  raise "Too many iterations"
407
412
  # ir a 42
408
413
  end
@@ -414,16 +419,16 @@ module Statsample
414
419
  @itype = 2
415
420
  calculate_sdr
416
421
  end
417
-
418
-
422
+
423
+
419
424
  def calculate_sdr # :nodoc:
420
425
  #
421
426
  # COMPUTE SDR
422
427
  #
423
428
  @r = @rr
424
- rrsq = Math::sqrt(1.0 - @r ** 2)
429
+ rrsq = Math::sqrt(1.0 - @r ** 2)
425
430
  @itype = -@itype if (@kdelta > 1)
426
- if (@ksign != 1)
431
+ if (@ksign != 1)
427
432
  @r = -@r
428
433
  @zac = -@zac
429
434
  end
@@ -431,13 +436,13 @@ module Statsample
431
436
  pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
432
437
  @pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
433
438
  @pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
434
-
439
+
435
440
  @sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
436
441
  @sdr=0 if (@sdr<0)
437
442
  @sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
438
443
  compute_sdzero
439
444
  end
440
-
445
+
441
446
  # 85
442
447
  #
443
448
  # COMPUTE SDZERO
@@ -1,14 +1,14 @@
1
1
  require 'statsample/bivariate/tetrachoric'
2
2
  require 'statsample/bivariate/polychoric'
3
3
  module Statsample
4
- # Diverse correlation methods
4
+ # Diverse bivariate methods, including #covariance, #pearson correlation (r), #spearman ranked correlation (rho), #tetrachoric correlation and #polychoric correlation.
5
5
  module Bivariate
6
6
  class << self
7
7
  # Covariance between two vectors
8
8
  def covariance(v1,v2)
9
9
  v1a,v2a=Statsample.only_valid(v1,v2)
10
10
  return nil if v1a.size==0
11
- if HAS_GSL
11
+ if Statsample.has_gsl?
12
12
  GSL::Stats::covariance(v1a.gsl, v2a.gsl)
13
13
  else
14
14
  covariance_slow(v1a,v2a)
@@ -31,11 +31,12 @@ module Statsample
31
31
  (0...v1a.size).each {|i| t+=((v1a[i]-m1)*(v2a[i]-m2)) }
32
32
  t.to_f / (v1a.size-1)
33
33
  end
34
- # Calculate Pearson correlation coefficient between 2 vectors
34
+
35
+ # Calculate Pearson correlation coefficient (r) between 2 vectors
35
36
  def pearson(v1,v2)
36
37
  v1a,v2a=Statsample.only_valid(v1,v2)
37
38
  return nil if v1a.size ==0
38
- if HAS_GSL
39
+ if Statsample.has_gsl?
39
40
  GSL::Stats::correlation(v1a.gsl, v2a.gsl)
40
41
  else
41
42
  pearson_slow(v1a,v2a)
@@ -177,7 +178,7 @@ module Statsample
177
178
  Matrix.rows(rows)
178
179
  end
179
180
 
180
- # Spearman ranked correlation coefficient between 2 vectors
181
+ # Spearman ranked correlation coefficient (rho) between 2 vectors
181
182
  def spearman(v1,v2)
182
183
  v1a,v2a=Statsample.only_valid(v1,v2)
183
184
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
@@ -195,7 +196,6 @@ module Statsample
195
196
  ((m1.mean-m0.mean).to_f / ds['c'].sdp) * Math::sqrt(m0.size*m1.size.to_f / ds.cases**2)
196
197
  end
197
198
  # Kendall Rank Correlation Coefficient.
198
- #
199
199
  # Based on Hervé Adbi article
200
200
  def tau_a(v1,v2)
201
201
  v1a,v2a=Statsample.only_valid(v1,v2)