statsample 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data.tar.gz.sig +2 -1
  2. data/History.txt +11 -0
  3. data/Manifest.txt +2 -3
  4. data/README.txt +0 -17
  5. data/Rakefile +10 -9
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/principal_axis.rb +2 -0
  8. data/examples/u_test.rb +8 -0
  9. data/lib/distribution.rb +1 -1
  10. data/lib/statsample.rb +12 -12
  11. data/lib/statsample/anova/oneway.rb +4 -4
  12. data/lib/statsample/bivariate.rb +10 -3
  13. data/lib/statsample/bivariate/pearson.rb +55 -0
  14. data/lib/statsample/dataset.rb +57 -49
  15. data/lib/statsample/dominanceanalysis.rb +1 -2
  16. data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
  17. data/lib/statsample/factor.rb +0 -1
  18. data/lib/statsample/factor/parallelanalysis.rb +9 -13
  19. data/lib/statsample/factor/pca.rb +5 -10
  20. data/lib/statsample/factor/principalaxis.rb +27 -33
  21. data/lib/statsample/matrix.rb +11 -11
  22. data/lib/statsample/mle.rb +0 -1
  23. data/lib/statsample/regression.rb +0 -1
  24. data/lib/statsample/reliability.rb +2 -2
  25. data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
  26. data/lib/statsample/reliability/scaleanalysis.rb +5 -6
  27. data/lib/statsample/test/f.rb +2 -5
  28. data/lib/statsample/test/levene.rb +2 -5
  29. data/lib/statsample/test/t.rb +4 -13
  30. data/lib/statsample/test/umannwhitney.rb +19 -19
  31. data/po/es/statsample.mo +0 -0
  32. data/po/es/statsample.po +304 -111
  33. data/po/statsample.pot +224 -90
  34. data/test/test_bivariate.rb +8 -69
  35. data/test/test_reliability.rb +3 -4
  36. metadata +30 -18
  37. metadata.gz.sig +0 -0
  38. data/lib/statsample/bivariate/polychoric.rb +0 -893
  39. data/lib/statsample/bivariate/tetrachoric.rb +0 -457
  40. data/test/test_bivariate_polychoric.rb +0 -70
@@ -1,457 +0,0 @@
1
- module Statsample
2
- module Bivariate
3
- # Calculate Tetrachoric correlation for two vectors.
4
- def self.tetrachoric(v1,v2)
5
- tc=Tetrachoric.new_with_vectors(v1,v2)
6
- tc.r
7
- end
8
-
9
- # Tetrachoric correlation matrix.
10
- # Order of rows and columns depends on Dataset#fields order
11
- def self.tetrachoric_correlation_matrix(ds)
12
- ds.collect_matrix do |row,col|
13
- if row==col
14
- 1.0
15
- else
16
- begin
17
- tetrachoric(ds[row],ds[col])
18
- rescue RuntimeError
19
- nil
20
- end
21
- end
22
- end
23
- end
24
- # Compute tetrachoric correlation.
25
- #
26
- # The <em>tetrachoric</em> correlation is a measure of
27
- # bivariate association arising when both observed variates
28
- # are categorical variables that result from dichotomizing
29
- # the two undelying continuous variables (Drasgow, 2006).
30
- # The tetrachoric correlation is a good way to measure rater agreement (Uebersax, 2006)
31
- #
32
- # This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
33
- #
34
- #
35
- # == Usage
36
- # With two variables x and y on a crosstab like this:
37
- #
38
- # -------------
39
- # | y=0 | y=1 |
40
- # -------------
41
- # x = 0 | a | b |
42
- # -------------
43
- # x = 1 | c | d |
44
- # -------------
45
- #
46
- # The code will be
47
- # tc=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
48
- # tc.r # correlation
49
- # tc.se # standard error
50
- # tc.threshold_y # threshold for y variable
51
- # tc.threshold_x # threshold for x variable
52
- #
53
- # == References:
54
- #
55
- # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. <em>Applied Statistics, 26</em>, 343-351.
56
- # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
57
- # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
58
-
59
- class Tetrachoric
60
- include GetText
61
- bindtextdomain("statsample")
62
- attr_reader :r
63
- attr_accessor :name
64
-
65
- TWOPI=Math::PI*2
66
- SQT2PI= 2.50662827
67
- RLIMIT = 0.9999
68
- RCUT= 0.95
69
- UPLIM= 5.0
70
- CONST= 1E-36
71
- CHALF= 1E-18
72
- CONV =1E-8
73
- CITER = 1E-6
74
- NITER = 25
75
- X=[0,0.9972638618, 0.9856115115, 0.9647622556, 0.9349060759, 0.8963211558, 0.8493676137, 0.7944837960, 0.7321821187, 0.6630442669, 0.5877157572, 0.5068999089, 0.4213512761, 0.3318686023, 0.2392873623, 0.1444719616, 0.0483076657]
76
- W=[0, 0.0070186100, 0.0162743947, 0.0253920653, 0.0342738629, 0.0428358980, 0.0509980593, 0.0586840935, 0.0658222228, 0.0723457941, 0.0781938958, 0.0833119242, 0.0876520930, 0.0911738787, 0.0938443991, 0.0956387201, 0.0965400885]
77
- # Creates a Tetrachoric object based on a 2x2 Matrix.
78
- def self.new_with_matrix(m)
79
- Tetrachoric.new(m[0,0], m[0,1], m[1,0],m[1,1])
80
- end
81
- # Creates a Tetrachoric object based on two vectors.
82
- # The vectors are dichotomized previously.
83
- def self.new_with_vectors(v1,v2)
84
- v1a, v2a=Statsample.only_valid(v1,v2)
85
- v1a=v1a.dichotomize
86
- v2a=v2a.dichotomize
87
- raise "v1 have only 0" if v1a.factors==[0]
88
- raise "v2 have only 0" if v2a.factors==[0]
89
- a,b,c,d = 0,0,0,0
90
- v1a.each_index{|i|
91
- x,y=v1a[i],v2a[i]
92
- a+=1 if x==0 and y==0
93
- b+=1 if x==0 and y==1
94
- c+=1 if x==1 and y==0
95
- d+=1 if x==1 and y==1
96
- }
97
- Tetrachoric.new(a,b,c,d)
98
- end
99
- # Standard error
100
- def se
101
- @sdr
102
- end
103
- # Threshold for variable x (rows)
104
- # Point on gauss curve under X rater select cases
105
- def threshold_x
106
- @zab
107
- end
108
-
109
- # Threshold for variable y (columns)
110
- # Point on gauss curve under Y rater select cases
111
-
112
- def threshold_y
113
- @zac
114
- end
115
- # Summary of the analysis
116
- def summary
117
- ReportBuilder.new(:name=>@name).add(self).to_text
118
- end
119
-
120
- def report_building(generator) # :nodoc:
121
- section=ReportBuilder::Section.new(:name=>@name)
122
- t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
123
- t.row(["X=0", @a,@b,@a+@b])
124
- t.row(["X=1", @c,@d,@c+@d])
125
- t.hr
126
- t.row(["T", @a+@c,@b+@d,@a+@b+@c+@d])
127
- section.add(t)
128
- #generator.parse_element(t)
129
- section.add(sprintf("r: %0.3f",r))
130
- section.add(_("SE: %0.3f") % se)
131
- section.add(_("Threshold X: %0.3f ") % [threshold_x] )
132
- section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
133
- generator.parse_element(section)
134
- end
135
-
136
- # Creates a new tetrachoric object for analysis
137
- def initialize(a,b,c,d)
138
- @a,@b,@c,@d=a,b,c,d
139
- @name=_("Tetrachoric correlation")
140
- #
141
- # CHECK IF ANY CELL FREQUENCY IS NEGATIVE
142
- #
143
- raise "All frequencies should be positive" if (@a < 0 or @b < 0 or @c < 0 or @d < 0)
144
- compute
145
- end
146
- # Compute the tetrachoric correlation.
147
- # Called on object creation.
148
- #
149
- def compute
150
-
151
- #
152
- # INITIALIZATION
153
- #
154
- @r = 0
155
- sdzero = 0
156
- @sdr = 0
157
- @itype = 0
158
- @ifault = 0
159
-
160
- #
161
- # CHECK IF ANY FREQUENCY IS 0.0 AND SET kdelta
162
- #
163
- @kdelta = 1
164
- delta = 0
165
- @kdelta = 2 if (@a == 0 or @d == 0)
166
- @kdelta += 2 if (@b == 0 or @c == 0)
167
- #
168
- # kdelta=4 MEANS TABLE HAS 0.0 ROW OR COLUMN, RUN IS TERMINATED
169
- #
170
-
171
- raise "Rows and columns should have more than 0 items" if @kdelta==4
172
-
173
- # GOTO (4, 1, 2 , 92), kdelta
174
- #
175
- # delta IS 0.0, 0.5 OR -0.5 ACCORDING TO WHICH CELL IS 0.0
176
- #
177
-
178
- if(@kdelta==2)
179
- # 1
180
- delta=0.5
181
- @r=-1 if (@a==0 and @d==0)
182
- elsif(@kdelta==3)
183
- # 2
184
- delta=-0.5
185
- @r=1 if (@b==0 and @c==0)
186
- end
187
- # 4
188
- if @r!=0
189
- @itype=3
190
- end
191
-
192
- #
193
- # STORE FREQUENCIES IN AA, BB, CC AND DD
194
- #
195
- @aa = @a + delta
196
- @bb = @b - delta
197
- @cc = @c - delta
198
- @dd = @d + delta
199
- @tot = @aa+@bb+@cc+@dd
200
- #
201
- # CHECK IF CORRELATION IS NEGATIVE, 0.0, POSITIVE
202
- # IF (AA * DD - BB * CC) 7, 5, 6
203
-
204
- corr_dir=@aa * @dd - @bb * @cc
205
- if(corr_dir < 0)
206
- # 7
207
- @probaa = @bb.quo(@tot)
208
- @probac = (@bb + @dd).quo(@tot)
209
- @ksign = 2
210
- # -> 8
211
- else
212
- if (corr_dir==0)
213
- # 5
214
- @itype=4
215
- end
216
- # 6
217
- #
218
- # COMPUTE PROBABILITIES OF QUADRANT AND OF MARGINALS
219
- # PROBAA AND PROBAC CHOSEN SO THAT CORRELATION IS POSITIVE.
220
- # KSIGN INDICATES WHETHER QUADRANTS HAVE BEEN SWITCHED
221
- #
222
-
223
- @probaa = @aa.quo(@tot)
224
- @probac = (@aa+@cc).quo(@tot)
225
- @ksign=1
226
- end
227
- # 8
228
-
229
- @probab = (@aa+@bb).quo(@tot)
230
-
231
- #
232
- # COMPUTE NORMAL DEVIATES FOR THE MARGINAL FREQUENCIES
233
- # SINCE NO MARGINAL CAN BE 0.0, IE IS NOT CHECKED
234
- #
235
- @zac = Distribution::Normal.p_value(@probac)
236
- @zab = Distribution::Normal.p_value(@probab)
237
- @ss = Math::exp(-0.5 * (@zac ** 2 + @zab ** 2)).quo(TWOPI)
238
- #
239
- # WHEN R IS 0.0, 1.0 OR -1.0, TRANSFER TO COMPUTE SDZERO
240
- #
241
- if (@r != 0 or @itype > 0)
242
- compute_sdzero
243
- return true
244
- end
245
- #
246
- # WHEN MARGINALS ARE EQUAL, COSINE EVALUATION IS USED
247
- #
248
- if (@a == @b and @b == @c)
249
- calculate_cosine
250
- return true
251
- end
252
- #
253
- # INITIAL ESTIMATE OF CORRELATION IS YULES Y
254
- #
255
- @rr = ((Math::sqrt(@aa * @dd) - Math::sqrt(@bb * @cc)) ** 2) / (@aa * @dd - @bb * @cc).abs
256
- @iter = 0
257
- begin
258
- #
259
- # IF RR EXCEEDS RCUT, GAUSSIAN QUADRATURE IS USED
260
- #
261
- #10
262
- if @rr>RCUT
263
- gaussian_quadrature
264
- return true
265
- end
266
- #
267
- # TETRACHORIC SERIES IS COMPUTED
268
- #
269
- # INITIALIZATION
270
- #
271
- va=1.0
272
- vb=@zac.to_f
273
- wa=1.0
274
- wb=@zab.to_f
275
- term = 1.0
276
- iterm = 0.0
277
- @sum = @probab * @probac
278
- deriv = 0.0
279
- sr = @ss
280
- #15
281
- begin
282
- if(sr.abs<=CONST)
283
- #
284
- # RESCALE TERMS TO AVOID OVERFLOWS AND UNDERFLOWS
285
- #
286
- sr = sr / CONST
287
- va = va * CHALF
288
- vb = vb * CHALF
289
- wa = wa * CHALF
290
- wb = wb * CHALF
291
- end
292
- #
293
- # FORM SUM AND DERIVATIVE OF SERIES
294
- #
295
- # 20
296
- dr = sr * va * wa
297
- sr = sr * @rr / term
298
- cof = sr * va * wa
299
- #
300
- # ITERM COUNTS NO. OF CONSECUTIVE TERMS < CONV
301
- #
302
- iterm+= 1
303
- iterm=0 if (cof.abs > CONV)
304
- @sum = @sum + cof
305
- deriv += dr
306
- vaa = va
307
- waa = wa
308
- va = vb
309
- wa = wb
310
- vb = @zac * va - term * vaa
311
- wb = @zab * wa - term * waa
312
- term += 1
313
- end while (iterm < 2 or term < 6)
314
- #
315
- # CHECK IF ITERATION CONVERGED
316
- #
317
- if((@sum-@probaa).abs <= CITER)
318
- @itype=term
319
- calculate_sdr
320
- return true
321
- end
322
- #
323
- # CALCULATE NEXT ESTIMATE OF CORRELATION
324
- #
325
- #25
326
- @iter += 1
327
- #
328
- # IF TOO MANY ITERATlONS, RUN IS TERMINATED
329
- #
330
- delta = (@sum - @probaa) / deriv
331
- @rrprev = @rr
332
- @rr = @rr - delta
333
- @rr += 0.5 * delta if(@iter == 1)
334
- @rr= RLIMIT if (@rr > RLIMIT)
335
- @rr =0 if (@rr < 0.0)
336
- end while @iter < NITER
337
- raise "Too many iteration"
338
- # GOTO 10
339
- end
340
- # GAUSSIAN QUADRATURE
341
- # 40
342
- def gaussian_quadrature
343
- if(@iter==0)
344
- # INITIALIZATION, IF THIS IS FIRST ITERATION
345
- @sum=@probab*@probac
346
- @rrprev=0
347
- end
348
-
349
- # 41
350
- sumprv = @probab - @sum
351
- @prob = @bb.quo(@tot)
352
- @prob = @aa.quo(@tot) if (@ksign == 2)
353
- @itype = 1
354
- #
355
- # LOOP TO FIND ESTIMATE OF CORRELATION
356
- # COMPUTATION OF INTEGRAL (SUM) BY QUADRATURE
357
- #
358
- # 42
359
-
360
- begin
361
- rrsq = Math::sqrt(1 - @rr ** 2)
362
- amid = 0.5 * (UPLIM + @zac)
363
- xlen = UPLIM - amid
364
- @sum = 0
365
- (1..16).each do |iquad|
366
- xla = amid + X[iquad] * xlen
367
- xlb = amid - X[iquad] * xlen
368
-
369
-
370
- #
371
- # TO AVOID UNDERFLOWS, TEMPA AND TEMPB ARE USED
372
- #
373
- tempa = (@zab - @rr * xla) / rrsq
374
- if (tempa >= -6.0)
375
- @sum = @sum + W[iquad] * Math::exp(-0.5 * xla ** 2) * Distribution::Normal.cdf(tempa)
376
- end
377
- tempb = (@zab - @rr * xlb) / rrsq
378
-
379
- if (tempb >= -6.0)
380
- @sum = @sum + W[iquad] * Math::exp(-0.5 * xlb ** 2) * Distribution::Normal.cdf(tempb)
381
- end
382
- end # 44 ~ iquad
383
- @sum=@sum*xlen / SQT2PI
384
- #
385
- # CHECK IF ITERATION HAS CONVERGED
386
- #
387
- if ((@prob - @sum).abs <= CITER)
388
- calculate_sdr
389
- return true
390
- end
391
- # ESTIMATE CORRELATION FOR NEXT ITERATION BY LINEAR INTERPOLATION
392
-
393
- rrest = ((@prob - @sum) * @rrprev - (@prob - sumprv) * @rr) / (sumprv - @sum)
394
- rrest = RLIMIT if (rrest > RLIMIT)
395
- rrest = 0 if (rrest < 0)
396
- @rrprev = @rr
397
- @rr = rrest
398
- sumprv = @sum
399
- #
400
- # if estimate has same value on two iterations, stop iteration
401
- #
402
- if @rr == @rrprev
403
- calculate_sdr
404
- return true
405
- end
406
-
407
-
408
- end while @iter < NITER
409
- raise "Too many iterations"
410
- # ir a 42
411
- end
412
- def calculate_cosine
413
- #
414
- # WHEN ALL MARGINALS ARE EQUAL THE COSINE FUNCTION IS USED
415
- #
416
- @rr = -Math::cos(TWOPI * @probaa)
417
- @itype = 2
418
- calculate_sdr
419
- end
420
-
421
-
422
- def calculate_sdr # :nodoc:
423
- #
424
- # COMPUTE SDR
425
- #
426
- @r = @rr
427
- rrsq = Math::sqrt(1.0 - @r ** 2)
428
- @itype = -@itype if (@kdelta > 1)
429
- if (@ksign != 1)
430
- @r = -@r
431
- @zac = -@zac
432
- end
433
- # 71
434
- pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
435
- @pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
436
- @pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
437
-
438
- @sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
439
- @sdr=0 if (@sdr<0)
440
- @sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
441
- compute_sdzero
442
- end
443
-
444
- # 85
445
- #
446
- # COMPUTE SDZERO
447
- #
448
- def compute_sdzero
449
- @sdzero = Math::sqrt(((@aa + @bb) * (@aa + @cc) * (@bb + @dd) * (@cc + @dd)).quo(@tot)).quo(@tot ** 2 * @ss)
450
- @sdr = @sdzero if (@r == 0)
451
- end
452
- private :calculate_cosine, :calculate_sdr, :compute_sdzero, :compute, :gaussian_quadrature
453
- end
454
- end
455
- end
456
-
457
-