statsample 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data.tar.gz.sig +2 -1
  2. data/History.txt +11 -0
  3. data/Manifest.txt +2 -3
  4. data/README.txt +0 -17
  5. data/Rakefile +10 -9
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/principal_axis.rb +2 -0
  8. data/examples/u_test.rb +8 -0
  9. data/lib/distribution.rb +1 -1
  10. data/lib/statsample.rb +12 -12
  11. data/lib/statsample/anova/oneway.rb +4 -4
  12. data/lib/statsample/bivariate.rb +10 -3
  13. data/lib/statsample/bivariate/pearson.rb +55 -0
  14. data/lib/statsample/dataset.rb +57 -49
  15. data/lib/statsample/dominanceanalysis.rb +1 -2
  16. data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
  17. data/lib/statsample/factor.rb +0 -1
  18. data/lib/statsample/factor/parallelanalysis.rb +9 -13
  19. data/lib/statsample/factor/pca.rb +5 -10
  20. data/lib/statsample/factor/principalaxis.rb +27 -33
  21. data/lib/statsample/matrix.rb +11 -11
  22. data/lib/statsample/mle.rb +0 -1
  23. data/lib/statsample/regression.rb +0 -1
  24. data/lib/statsample/reliability.rb +2 -2
  25. data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
  26. data/lib/statsample/reliability/scaleanalysis.rb +5 -6
  27. data/lib/statsample/test/f.rb +2 -5
  28. data/lib/statsample/test/levene.rb +2 -5
  29. data/lib/statsample/test/t.rb +4 -13
  30. data/lib/statsample/test/umannwhitney.rb +19 -19
  31. data/po/es/statsample.mo +0 -0
  32. data/po/es/statsample.po +304 -111
  33. data/po/statsample.pot +224 -90
  34. data/test/test_bivariate.rb +8 -69
  35. data/test/test_reliability.rb +3 -4
  36. metadata +30 -18
  37. metadata.gz.sig +0 -0
  38. data/lib/statsample/bivariate/polychoric.rb +0 -893
  39. data/lib/statsample/bivariate/tetrachoric.rb +0 -457
  40. data/test/test_bivariate_polychoric.rb +0 -70
@@ -39,76 +39,15 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
39
39
  # Test ruby method
40
40
  v3a,v4a=Statsample.only_valid v3, v4
41
41
  assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a,v4a),0.001)
42
-
43
- end
44
- def test_tetrachoric_matrix
45
- ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
46
- tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
47
- tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
48
- tcm_obs.row_size.times do |i|
49
- tcm_obs.column_size do |j|
50
- assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
51
- end
52
- end
53
42
  end
54
- def test_poly_vs_tetra
55
- 2.times {
56
- # Should be the same results as Tetrachoric for 2x2 matrix
57
- matrix=Matrix[[150+rand(10),1000+rand(20)],[1000+rand(20),200+rand(20)]]
58
- tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
59
- poly = Statsample::Bivariate::Polychoric.new(matrix)
60
- poly.compute_two_step_mle_drasgow_ruby
61
- assert_in_delta(tetra.r,poly.r,0.0001)
62
- if Statsample.has_gsl?
63
- poly.compute_two_step_mle_drasgow_gsl
64
- assert_in_delta(tetra.r,poly.r,0.0001)
65
- else
66
- skip "compute_two_step_mle_drasgow_gsl not tested (requires GSL)"
67
- end
68
- }
69
- end
70
-
71
- def test_tetrachoric
72
- a,b,c,d=0,0,0,0
73
- assert_raises RuntimeError do
74
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
75
- end
76
- a,b,c,d=10,10,0,0
77
- assert_raises RuntimeError do
78
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
79
- end
80
- a,b,c,d=10,0,10,0
81
- assert_raises RuntimeError do
82
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
83
- end
84
- a,b,c,d=10,0,0,10
85
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
86
- assert_equal(1,tc.r)
87
- assert_equal(0,tc.se)
88
- a,b,c,d=0,10,10,0
89
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
90
- assert_equal(-1,tc.r)
91
- assert_equal(0,tc.se)
92
-
93
- a,b,c,d = 30,40,70,20
94
- tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
95
- assert_in_delta(-0.53980,tc.r,0.0001)
96
- assert_in_delta(0.09940,tc.se,0.0001)
97
- assert_in_delta(-0.15731,tc.threshold_x, 0.0001)
98
- assert_in_delta(0.31864,tc.threshold_y, 0.0001)
99
-
100
- x=%w{a a a a b b b a b b a a b b}.to_vector
101
- y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
102
- # crosstab
103
- # 0 1
104
- # a 4 3
105
- # b 2 5
106
- a,b,c,d=4,3,2,5
107
- tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
108
- tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
109
- assert_equal(tc1.r,tc2.r)
110
- assert_equal(tc1.se,tc2.se)
111
- assert(tc.summary)
43
+ def test_bivariate_pearson
44
+ v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
45
+ v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
46
+ r=Statsample::Bivariate::Pearson.new(v1,v2)
47
+ assert_in_delta(0.525,r.r, 0.001)
48
+ assert_in_delta(Statsample::Bivariate.t_pearson(v1,v2), r.t, 0.001)
49
+ assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8,:both), r.probability, 0.001)
50
+ assert(r.summary.size>0)
112
51
  end
113
52
  def test_matrix_correlation
114
53
  v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
@@ -31,8 +31,7 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
31
31
  should "method cronbach_alpha_from_n_s2_cov return correct values" do
32
32
  sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
33
33
  vm, cm = sa.variances_mean, sa.covariances_mean
34
- assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm,cm), 1e-10 )
35
-
34
+ assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm,cm), 1e-10)
36
35
  end
37
36
  should "return correct n for desired alpha, covariance and variance" do
38
37
  sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
@@ -102,7 +101,6 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
102
101
  context Statsample::Reliability::MultiScaleAnalysis do
103
102
 
104
103
  setup do
105
-
106
104
  size=100
107
105
  @scales=4
108
106
  @items_per_scale=10
@@ -132,7 +130,6 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
132
130
  end
133
131
  should "retrieve correct correlation matrix for each scale" do
134
132
  vectors={'complete'=>@ds.vector_sum}
135
-
136
133
  @scales.times {|s|
137
134
  vectors["scale_#{s}"]=@ds.dup(@items_per_scale.times.map {|i| "#{s}_#{i}"}).vector_sum
138
135
  }
@@ -163,6 +160,8 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
163
160
 
164
161
  #@msa.summary_correlation_matrix=true
165
162
  #@msa.summary_pca=true
163
+
164
+
166
165
  assert(@msa.summary.size>0)
167
166
  end
168
167
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 12
7
+ - 13
8
8
  - 0
9
- version: 0.12.0
9
+ version: 0.13.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
35
35
  rpP0jjs0
36
36
  -----END CERTIFICATE-----
37
37
 
38
- date: 2010-06-14 00:00:00 -04:00
38
+ date: 2010-06-21 00:00:00 -04:00
39
39
  default_executable:
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
@@ -132,9 +132,23 @@ dependencies:
132
132
  type: :runtime
133
133
  version_requirements: *id007
134
134
  - !ruby/object:Gem::Dependency
135
- name: rubyforge
135
+ name: statsample-bivariate-extension
136
136
  prerelease: false
137
137
  requirement: &id008 !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ segments:
142
+ - 0
143
+ - 13
144
+ - 0
145
+ version: 0.13.0
146
+ type: :runtime
147
+ version_requirements: *id008
148
+ - !ruby/object:Gem::Dependency
149
+ name: rubyforge
150
+ prerelease: false
151
+ requirement: &id009 !ruby/object:Gem::Requirement
138
152
  requirements:
139
153
  - - ">="
140
154
  - !ruby/object:Gem::Version
@@ -144,11 +158,11 @@ dependencies:
144
158
  - 4
145
159
  version: 2.0.4
146
160
  type: :development
147
- version_requirements: *id008
161
+ version_requirements: *id009
148
162
  - !ruby/object:Gem::Dependency
149
163
  name: shoulda
150
164
  prerelease: false
151
- requirement: &id009 !ruby/object:Gem::Requirement
165
+ requirement: &id010 !ruby/object:Gem::Requirement
152
166
  requirements:
153
167
  - - ">="
154
168
  - !ruby/object:Gem::Version
@@ -156,11 +170,11 @@ dependencies:
156
170
  - 0
157
171
  version: "0"
158
172
  type: :development
159
- version_requirements: *id009
173
+ version_requirements: *id010
160
174
  - !ruby/object:Gem::Dependency
161
175
  name: hoe
162
176
  prerelease: false
163
- requirement: &id010 !ruby/object:Gem::Requirement
177
+ requirement: &id011 !ruby/object:Gem::Requirement
164
178
  requirements:
165
179
  - - ">="
166
180
  - !ruby/object:Gem::Version
@@ -170,7 +184,7 @@ dependencies:
170
184
  - 1
171
185
  version: 2.6.1
172
186
  type: :development
173
- version_requirements: *id010
187
+ version_requirements: *id011
174
188
  description: |-
175
189
  A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
176
190
 
@@ -225,6 +239,7 @@ files:
225
239
  - examples/reliability.rb
226
240
  - examples/t_test.rb
227
241
  - examples/tetrachoric.rb
242
+ - examples/u_test.rb
228
243
  - examples/vector.rb
229
244
  - lib/distribution.rb
230
245
  - lib/distribution/chisquare.rb
@@ -239,8 +254,7 @@ files:
239
254
  - lib/statsample/anova/oneway.rb
240
255
  - lib/statsample/anova/twoway.rb
241
256
  - lib/statsample/bivariate.rb
242
- - lib/statsample/bivariate/polychoric.rb
243
- - lib/statsample/bivariate/tetrachoric.rb
257
+ - lib/statsample/bivariate/pearson.rb
244
258
  - lib/statsample/codification.rb
245
259
  - lib/statsample/combination.rb
246
260
  - lib/statsample/converter/csv.rb
@@ -301,7 +315,6 @@ files:
301
315
  - test/test_anovatwowaywithdataset.rb
302
316
  - test/test_anovawithvectors.rb
303
317
  - test/test_bivariate.rb
304
- - test/test_bivariate_polychoric.rb
305
318
  - test/test_codification.rb
306
319
  - test/test_combination.rb
307
320
  - test/test_crosstab.rb
@@ -345,13 +358,13 @@ post_install_message: |
345
358
  to retrieve gems gsl, statistics2 and a C extension
346
359
  to speed some methods.
347
360
 
348
- $sudo gem install statsample-optimization
361
+ $ sudo gem install statsample-optimization
349
362
 
350
- To use it, on Ubuntu I recommend install
351
- build-essential and libgsl0-dev using apt-get and
352
- compile ruby 1.8 or 1.9 from source code first.
363
+ On Ubuntu, install build-essential and libgsl0-dev
364
+ using apt-get and compile ruby 1.8 or 1.9 from
365
+ source code first.
353
366
 
354
- $sudo apt-get install build-essential libgsl0-dev
367
+ $ sudo apt-get install build-essential libgsl0-dev
355
368
 
356
369
 
357
370
  *****************************************************
@@ -400,7 +413,6 @@ test_files:
400
413
  - test/test_combination.rb
401
414
  - test/test_mle.rb
402
415
  - test/test_resample.rb
403
- - test/test_bivariate_polychoric.rb
404
416
  - test/test_stratified.rb
405
417
  - test/test_vector.rb
406
418
  - test/test_srs.rb
metadata.gz.sig CHANGED
Binary file
@@ -1,893 +0,0 @@
1
- require 'minimization'
2
- module Statsample
3
- module Bivariate
4
- # Calculate Polychoric correlation for two vectors.
5
- def self.polychoric(v1,v2)
6
- pc=Polychoric.new_with_vectors(v1,v2)
7
- pc.r
8
- end
9
-
10
- # Polychoric correlation matrix.
11
- # Order of rows and columns depends on Dataset#fields order
12
- def self.polychoric_correlation_matrix(ds)
13
- cache={}
14
- matrix=ds.collect_matrix do |row,col|
15
- if row==col
16
- 1.0
17
- else
18
- begin
19
- if cache[[col,row]].nil?
20
- poly=polychoric(ds[row],ds[col])
21
- cache[[row,col]]=poly
22
- poly
23
- else
24
- cache[[col,row]]
25
- end
26
- rescue RuntimeError
27
- nil
28
- end
29
- end
30
- end
31
- matrix.extend CovariateMatrix
32
- matrix.fields=ds.fields
33
- matrix
34
- end
35
-
36
- # = Polychoric correlation.
37
- #
38
- # The <em>polychoric</em> correlation is a measure of
39
- # bivariate association arising when both observed variates
40
- # are ordered, categorical variables that result from polychotomizing
41
- # the two undelying continuous variables (Drasgow, 2006)
42
- #
43
- # According to Drasgow(2006), there are tree methods to estimate
44
- # the polychoric correlation:
45
- #
46
- # 1. Maximum Likehood Estimator
47
- # 2. Two-step estimator and
48
- # 3. Polychoric series estimate.
49
- #
50
- # By default, two-step estimation are used. You can select
51
- # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
52
- #
53
- # == Use
54
- #
55
- # You should enter a Matrix with ordered data. For example:
56
- # -------------------
57
- # | y=0 | y=1 | y=2 |
58
- # -------------------
59
- # x = 0 | 1 | 10 | 20 |
60
- # -------------------
61
- # x = 1 | 20 | 20 | 50 |
62
- # -------------------
63
- #
64
- # The code will be
65
- #
66
- # matrix=Matrix[[1,10,20],[20,20,50]]
67
- # poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
68
- # puts poly.r
69
- #
70
- # See extensive documentation on Uebersax(2002) and Drasgow(2006)
71
- #
72
- # == References
73
- #
74
- # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
75
- # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
76
-
77
- class Polychoric
78
-
79
- class Processor
80
- attr_reader :alpha, :beta, :rho
81
- def initialize(alpha,beta,rho)
82
- @alpha=alpha
83
- @beta=beta
84
- @nr=@alpha.size+1
85
- @nc=@beta.size+1
86
- @rho=rho
87
- @pd=nil
88
- end
89
- def bipdf(i,j)
90
- Distribution::NormalBivariate.pdf(a(i), b(j), rho)
91
- end
92
- def a(i)
93
- i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
94
- end
95
- def b(j)
96
- j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
97
- end
98
- # Equation(10) from Olsson(1979)
99
- def fd_loglike_cell_a(i,j,k)
100
- if k==i
101
- Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
102
- elsif k==(i-1)
103
- -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
104
- else
105
- 0
106
- end
107
-
108
- end
109
- # phi_ij for each i and j
110
- # Uses equation(4) from Olsson(1979)
111
- def pd
112
- if @pd.nil?
113
- @pd=@nr.times.collect{ [0] * @nc}
114
- pc=@nr.times.collect{ [0] * @nc}
115
- @nr.times do |i|
116
- @nc.times do |j|
117
-
118
- if i==@nr-1 and j==@nc-1
119
- @pd[i][j]=1.0
120
- else
121
- a=(i==@nr-1) ? 100: alpha[i]
122
- b=(j==@nc-1) ? 100: beta[j]
123
- #puts "a:#{a} b:#{b}"
124
- @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
125
- end
126
- pc[i][j] = @pd[i][j]
127
- @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
128
- @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
129
- @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
130
- end
131
- end
132
- end
133
- @pd
134
- end
135
- end
136
-
137
- include GetText
138
- include DirtyMemoize
139
- bindtextdomain("statsample")
140
- # Name of the analysis
141
- attr_accessor :name
142
- # Max number of iterations used on iterative methods. Default to MAX_ITERATIONS
143
- attr_accessor :max_iterations
144
- # Debug algorithm (See iterations, for example)
145
- attr_accessor :debug
146
- # Minimizer type for two step. Default "brent"
147
- # See http://rb-gsl.rubyforge.org/min.html for reference.
148
- attr_accessor :minimizer_type_two_step
149
-
150
- # Minimizer type for joint estimate. Default "nmsimplex"
151
- # See http://rb-gsl.rubyforge.org/min.html for reference.
152
- attr_accessor :minimizer_type_joint
153
-
154
-
155
- # Method of calculation of polychoric series.
156
- # <tt>:two_step</tt> used by default.
157
- #
158
- # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
159
- # :polychoric_series:: polychoric series estimate, using
160
- # algorithm AS87 by Martinson and Hamdan (1975).
161
- # :joint:: one-step ML, based on R package 'polycor'
162
- # by J.Fox.
163
- attr_accessor :method
164
- # Absolute error for iteration.
165
- attr_accessor :epsilon
166
-
167
- # Number of iterations
168
- attr_reader :iteration
169
-
170
- # Log of algorithm
171
- attr_reader :log
172
-
173
-
174
- attr_reader :loglike_model
175
-
176
- METHOD=:two_step
177
- MAX_ITERATIONS=300
178
- EPSILON=1e-6
179
- MINIMIZER_TYPE_TWO_STEP="brent"
180
- MINIMIZER_TYPE_JOINT="nmsimplex"
181
- def self.new_with_vectors(v1,v2)
182
- Polychoric.new(Crosstab.new(v1,v2).to_matrix)
183
- end
184
- # Params:
185
- # * matrix: Contingence table
186
- # * opts: Any attribute
187
-
188
- def initialize(matrix, opts=Hash.new)
189
- @matrix=matrix
190
- @n=matrix.column_size
191
- @m=matrix.row_size
192
- raise "row size <1" if @m<=1
193
- raise "column size <1" if @n<=1
194
-
195
- @method=METHOD
196
- @name="Polychoric correlation"
197
- @max_iterations=MAX_ITERATIONS
198
- @epsilon=EPSILON
199
- @minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
200
- @minimizer_type_joint=MINIMIZER_TYPE_JOINT
201
- @debug=false
202
- @iteration=nil
203
- opts.each{|k,v|
204
- self.send("#{k}=",v) if self.respond_to? k
205
- }
206
- @r=nil
207
- @pd=nil
208
- compute_basic_parameters
209
- end
210
- # Returns the polychoric correlation
211
- attr_reader :r
212
- # Returns the rows thresholds
213
- attr_reader :alpha
214
- # Returns the columns thresholds
215
- attr_reader :beta
216
-
217
- dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
218
- dirty_memoize :r, :alpha, :beta
219
-
220
- alias :threshold_x :alpha
221
- alias :threshold_y :beta
222
-
223
-
224
- # Start the computation of polychoric correlation
225
- # based on attribute method
226
- def compute
227
- if @method==:two_step
228
- compute_two_step_mle_drasgow
229
- elsif @method==:joint
230
- compute_one_step_mle
231
- elsif @method==:polychoric_series
232
- compute_polychoric_series
233
- else
234
- raise "Not implemented"
235
- end
236
- end
237
- # Retrieve log likehood for actual data.
238
- def loglike_data
239
- loglike=0
240
- @nr.times do |i|
241
- @nc.times do |j|
242
- res=@matrix[i,j].quo(@total)
243
- if (res==0)
244
- res=1e-16
245
- end
246
- loglike+= @matrix[i,j] * Math::log(res )
247
- end
248
- end
249
- loglike
250
- end
251
-
252
- # Chi Square of model
253
- def chi_square
254
- if @loglike_model.nil?
255
- compute
256
- end
257
- -2*(@loglike_model-loglike_data)
258
- end
259
-
260
- def chi_square_df
261
- (@nr*@nc)-@nc-@nr
262
- end
263
-
264
-
265
-
266
-
267
- # Retrieve all cell probabilities for givens alpha, beta and rho
268
- def cell_probabilities(alpha,beta,rho)
269
- pd=@nr.times.collect{ [0] * @nc}
270
- pc=@nr.times.collect{ [0] * @nc}
271
- @nr.times do |i|
272
- @nc.times do |j|
273
-
274
- if i==@nr-1 and j==@nc-1
275
- pd[i][j]=1.0
276
- else
277
- a=(i==@nr-1) ? 100: alpha[i]
278
- b=(j==@nc-1) ? 100: beta[j]
279
- #puts "a:#{a} b:#{b}"
280
- pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
281
- end
282
- pc[i][j] = pd[i][j]
283
- pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
284
- pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
285
- pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
286
- end
287
- end
288
- @pd=pd
289
- pd
290
- end
291
- def loglike(alpha,beta,rho)
292
- if rho.abs>0.9999
293
- rho= (rho>0) ? 0.9999 : -0.9999
294
- end
295
- pr=Processor.new(alpha,beta,rho)
296
- loglike=0
297
-
298
-
299
- @nr.times do |i|
300
- @nc.times do |j|
301
- res=pr.pd[i][j]+EPSILON
302
- loglike+= @matrix[i,j] * Math::log( res )
303
- end
304
- end
305
- -loglike
306
- end
307
- # First derivate for rho
308
- # Uses equation (9) from Olsson(1979)
309
- def fd_loglike_rho(alpha,beta,rho)
310
- if rho.abs>0.9999
311
- rho= (rho>0) ? 0.9999 : -0.9999
312
- end
313
- total=0
314
- pr=Processor.new(alpha,beta,rho)
315
- @nr.times do |i|
316
- @nc.times do |j|
317
- pi=pr.pd[i][j] + EPSILON
318
- total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
319
- end
320
- end
321
- total
322
- end
323
-
324
- # First derivative for alpha_k
325
- def fd_loglike_a(alpha,beta,rho,k)
326
- fd_loglike_a_eq6(alpha,beta,rho,k)
327
- end
328
- # Uses equation (6) from Olsson(1979)
329
- def fd_loglike_a_eq6(alpha,beta,rho,k)
330
- if rho.abs>0.9999
331
- rho= (rho>0) ? 0.9999 : -0.9999
332
- end
333
- pr=Processor.new(alpha,beta,rho)
334
- total=0
335
- pd=pr.pd
336
- @nr.times do |i|
337
- @nc.times do |j|
338
- total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
339
- end
340
- end
341
- total
342
- end
343
- # Uses equation(13) from Olsson(1979)
344
- def fd_loglike_a_eq13(alpha,beta,rho,k)
345
- if rho.abs>0.9999
346
- rho= (rho>0) ? 0.9999 : -0.9999
347
- end
348
- pr=Processor.new(alpha,beta,rho)
349
- total=0
350
- a_k=pr.a(k)
351
- pd=pr.pd
352
- @nc.times do |j|
353
- #puts "j: #{j}"
354
- #puts "b #{j} : #{b.call(j)}"
355
- #puts "b #{j-1} : #{b.call(j-1)}"
356
-
357
- e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
358
- e_2=Distribution::Normal.pdf(a_k)
359
- e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
360
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
361
-
362
- total+= e_1*e_2*e_3
363
- end
364
- total
365
- end
366
- # First derivative for beta_m
367
- # Uses equation(14) from Olsson(1979)
368
- def fd_loglike_b(alpha,beta,rho,m)
369
- if rho.abs>0.9999
370
- rho= (rho>0) ? 0.9999 : -0.9999
371
- end
372
- pr=Processor.new(alpha,beta,rho)
373
- total=0
374
- b_m=pr.b m
375
- pd=pr.pd
376
- @nr.times do |i|
377
- #puts "j: #{j}"
378
- #puts "b #{j} : #{b.call(j)}"
379
- #puts "b #{j-1} : #{b.call(j-1)}"
380
-
381
- e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
382
- e_2=Distribution::Normal.pdf(b_m)
383
- e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
384
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
385
-
386
- total+= e_1*e_2*e_3
387
- end
388
- total
389
- end
390
-
391
-
392
- def compute_basic_parameters
393
- @nr=@matrix.row_size
394
- @nc=@matrix.column_size
395
- @sumr=[0]*@matrix.row_size
396
- @sumrac=[0]*@matrix.row_size
397
- @sumc=[0]*@matrix.column_size
398
- @sumcac=[0]*@matrix.column_size
399
- @alpha=[0]*(@nr-1)
400
- @beta=[0]*(@nc-1)
401
- @total=0
402
- @nr.times do |i|
403
- @nc.times do |j|
404
- @sumr[i]+=@matrix[i,j]
405
- @sumc[j]+=@matrix[i,j]
406
- @total+=@matrix[i,j]
407
- end
408
- end
409
- ac=0
410
- (@nr-1).times do |i|
411
- @sumrac[i]=@sumr[i]+ac
412
- @alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
413
- ac=@sumrac[i]
414
- end
415
- ac=0
416
- (@nc-1).times do |i|
417
- @sumcac[i]=@sumc[i]+ac
418
- @beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
419
- ac=@sumcac[i]
420
- end
421
- end
422
-
423
-
424
- # Computation of polychoric correlation usign two-step ML estimation.
425
- #
426
- # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
427
- #
428
- # The algorithm is based on code by Gegenfurtner(1992).
429
- #
430
- # <b>References</b>:
431
- # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
432
- # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
433
- #
434
- def compute_two_step_mle_drasgow
435
- if Statsample.has_gsl?
436
- compute_two_step_mle_drasgow_gsl
437
- else
438
- compute_two_step_mle_drasgow_ruby
439
- end
440
- end
441
-
442
- # Depends on minimization algorithm.
443
-
444
- def compute_two_step_mle_drasgow_ruby #:nodoc:
445
-
446
- f=proc {|rho|
447
- loglike(@alpha,@beta, rho)
448
- }
449
- @log="Minimizing using GSL Brent method\n"
450
- min=Minimization::Brent.new(-0.9999,0.9999,f)
451
- min.epsilon=@epsilon
452
- min.expected=0
453
- min.iterate
454
- @log+=min.log.to_table.to_s
455
- @r=min.x_minimum
456
- @loglike_model=-min.f_minimum
457
- puts @log if @debug
458
-
459
- end
460
-
461
-
462
- def compute_two_step_mle_drasgow_gsl #:nodoc:
463
-
464
- fn1=GSL::Function.alloc {|rho|
465
- loglike(@alpha,@beta, rho)
466
- }
467
- @iteration = 0
468
- max_iter = @max_iterations
469
- m = 0 # initial guess
470
- m_expected = 0
471
- a=-0.9999
472
- b=+0.9999
473
- gmf = GSL::Min::FMinimizer.alloc(@minimizer_type_two_step)
474
- gmf.set(fn1, m, a, b)
475
- header=sprintf("Two step minimization using %s method\n", gmf.name)
476
- header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
477
- "err", "err(est)")
478
-
479
- header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
480
- @log=header
481
- puts header if @debug
482
- begin
483
- @iteration += 1
484
- status = gmf.iterate
485
- status = gmf.test_interval(@epsilon, 0.0)
486
-
487
- if status == GSL::SUCCESS
488
- @log+="converged:"
489
- puts "converged:" if @debug
490
- end
491
- a = gmf.x_lower
492
- b = gmf.x_upper
493
- m = gmf.x_minimum
494
- message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
495
- @iteration, a, b, m, m - m_expected, b - a);
496
- @log+=message
497
- puts message if @debug
498
- end while status == GSL::CONTINUE and @iteration < @max_iterations
499
- @r=gmf.x_minimum
500
- @loglike_model=-gmf.f_minimum
501
- end
502
-
503
- # Compute Polychoric correlation with joint estimate.
504
- # Rho and thresholds are estimated at same time.
505
- # Code based on R package "polycor", by J.Fox.
506
- #
507
-
508
- def compute_one_step_mle
509
- # Get initial values with two-step aproach
510
- compute_two_step_mle_drasgow
511
- # Start iteration with past values
512
- rho=@r
513
- cut_alpha=@alpha
514
- cut_beta=@beta
515
- parameters=[rho]+cut_alpha+cut_beta
516
- minimization = Proc.new { |v, params|
517
- rho=v[0]
518
- alpha=v[1, @nr-1]
519
- beta=v[@nr, @nc-1]
520
-
521
- #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
522
- #(@nr-1).times {|k|
523
- # puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
524
- # puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
525
- #
526
- #}
527
- #(@nc-1).times {|k|
528
- # puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
529
- #}
530
-
531
- loglike(alpha,beta,rho)
532
- }
533
- np=@nc-1+@nr
534
- my_func = GSL::MultiMin::Function.alloc(minimization, np)
535
- my_func.set_params(parameters) # parameters
536
-
537
- x = GSL::Vector.alloc(parameters.dup)
538
-
539
- ss = GSL::Vector.alloc(np)
540
- ss.set_all(1.0)
541
-
542
- minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
543
- minimizer.set(my_func, x, ss)
544
-
545
- iter = 0
546
- message=""
547
- begin
548
- iter += 1
549
- status = minimizer.iterate()
550
- status = minimizer.test_size(@epsilon)
551
- if status == GSL::SUCCESS
552
- message="Joint MLE converged to minimum at\n"
553
- end
554
- x = minimizer.x
555
- message+= sprintf("%5d iterations", iter)+"\n";
556
- for i in 0...np do
557
- message+=sprintf("%10.3e ", x[i])
558
- end
559
- message+=sprintf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)+"\n";
560
- end while status == GSL::CONTINUE and iter < @max_iterations
561
- @iteration=iter
562
- @log+=message
563
- @r=minimizer.x[0]
564
- @alpha=minimizer.x[1,@nr-1].to_a
565
- @beta=minimizer.x[@nr,@nc-1].to_a
566
- @loglike_model= -minimizer.minimum
567
- end
568
-
569
- def matrix_for_rho(rho) # :nodoc:
570
- pd=@nr.times.collect{ [0]*@nc}
571
- pc=@nr.times.collect{ [0]*@nc}
572
- @nr.times { |i|
573
- @nc.times { |j|
574
- pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
575
- pc[i][j] = pd[i][j]
576
- pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
577
- pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
578
- pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
579
- res= pd[i][j]
580
- }
581
- }
582
- Matrix.rows(pc)
583
- end
584
-
585
- def expected # :nodoc:
586
- rt=[]
587
- ct=[]
588
- t=0
589
- @matrix.row_size.times {|i|
590
- @matrix.column_size.times {|j|
591
- rt[i]=0 if rt[i].nil?
592
- ct[j]=0 if ct[j].nil?
593
- rt[i]+=@matrix[i,j]
594
- ct[j]+=@matrix[i,j]
595
- t+=@matrix[i,j]
596
- }
597
- }
598
- m=[]
599
- @matrix.row_size.times {|i|
600
- row=[]
601
- @matrix.column_size.times {|j|
602
- row[j]=(rt[i]*ct[j]).quo(t)
603
- }
604
- m.push(row)
605
- }
606
-
607
- Matrix.rows(m)
608
- end
609
-
610
- # Compute polychoric correlation using polychoric series.
611
- # Algorithm: AS87, by Martinson and Hamdam(1975).
612
- #
613
- # <b>Warning</b>: According to Drasgow(2006), this
614
- # computation diverges greatly of joint and two-step methods.
615
- #
616
- def compute_polychoric_series
617
- @nn=@n-1
618
- @mm=@m-1
619
- @nn7=7*@nn
620
- @mm7=7*@mm
621
- @mn=@n*@m
622
- @cont=[nil]
623
- @n.times {|j|
624
- @m.times {|i|
625
- @cont.push(@matrix[i,j])
626
- }
627
- }
628
-
629
- pcorl=0
630
- cont=@cont
631
- xmean=0.0
632
- sum=0.0
633
- row=[]
634
- colmn=[]
635
- (1..@m).each do |i|
636
- row[i]=0.0
637
- l=i
638
- (1..@n).each do |j|
639
- row[i]=row[i]+cont[l]
640
- l+=@m
641
- end
642
- raise "Should not be empty rows" if(row[i]==0.0)
643
- xmean=xmean+row[i]*i.to_f
644
- sum+=row[i]
645
- end
646
- xmean=xmean/sum.to_f
647
- ymean=0.0
648
- (1..@n).each do |j|
649
- colmn[j]=0.0
650
- l=(j-1)*@m
651
- (1..@m).each do |i|
652
- l=l+1
653
- colmn[j]=colmn[j]+cont[l] #12
654
- end
655
- raise "Should not be empty cols" if colmn[j]==0
656
- ymean=ymean+colmn[j]*j.to_f
657
- end
658
- ymean=ymean/sum.to_f
659
- covxy=0.0
660
- (1..@m).each do |i|
661
- l=i
662
- (1..@n).each do |j|
663
- conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
664
- l=l+@m
665
- end
666
- end
667
-
668
- chisq=0.0
669
- (1..@m).each do |i|
670
- l=i
671
- (1..@n).each do |j|
672
- chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
673
- l=l+@m
674
- end
675
- end
676
-
677
- phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
678
- phisq=0 if(phisq<0.0)
679
- # Compute cumulative sum of columns and rows
680
- sumc=[]
681
- sumr=[]
682
- sumc[1]=colmn[1]
683
- sumr[1]=row[1]
684
- cum=0
685
- (1..@nn).each do |i| # goto 17 r20
686
- cum=cum+colmn[i]
687
- sumc[i]=cum
688
- end
689
- cum=0
690
- (1..@mm).each do |i|
691
- cum=cum+row[i]
692
- sumr[i]=cum
693
- end
694
- alpha=[]
695
- beta=[]
696
- # Compute points of polytomy
697
- (1..@mm).each do |i| #do 21
698
- alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
699
- end # 21
700
- (1..@nn).each do |i| #do 22
701
- beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
702
- end # 21
703
- @alpha=alpha[1,alpha.size]
704
- @beta=beta[1,beta.size]
705
- @sumr=row[1,row.size]
706
- @sumc=colmn[1,colmn.size]
707
- @total=sum
708
-
709
- # Compute Fourier coefficients a and b. Verified
710
- h=hermit(alpha,@mm)
711
- hh=hermit(beta,@nn)
712
- a=[]
713
- b=[]
714
- if @m!=2 # goto 24
715
- mmm=@m-2
716
- (1..mmm).each do |i| #do 23
717
- a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
718
- a2=sumr[i] * xnorm(alpha[i+1])
719
- a3=sumr[i+1] * xnorm(alpha[i])
720
- l=i
721
- (1..7).each do |j| #do 23
722
- a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
723
- l=l+@mm
724
- end
725
- end #23
726
- end
727
- # 24
728
-
729
-
730
- if @n!=2 # goto 26
731
- nnn=@n-2
732
- (1..nnn).each do |i| #do 25
733
- a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
734
- a2=sumc[i] * xnorm(beta[i+1])
735
- a3=sumc[i+1] * xnorm(beta[i])
736
- l=i
737
- (1..7).each do |j| #do 25
738
- b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
739
- l=l+@nn
740
- end # 25
741
- end # 25
742
- end
743
- #26 r20
744
- l = @mm
745
- a1 = -sum * xnorm(alpha[@mm])
746
- a2 = row[@m] * sumr[@mm]
747
- (1..7).each do |j| # do 27
748
- a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
749
- l=l+@mm
750
- end # 27
751
-
752
- l = @nn
753
- a1 = -sum * xnorm(beta[@nn])
754
- a2 = colmn[@n] * sumc[@nn]
755
-
756
- (1..7).each do |j| # do 28
757
- b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
758
- l = l + @nn
759
- end # 28
760
- rcof=[]
761
- # compute coefficients rcof of polynomial of order 8
762
- rcof[1]=-phisq
763
- (2..9).each do |i| # do 30
764
- rcof[i]=0.0
765
- end #30
766
- m1=@mm
767
- (1..@mm).each do |i| # do 31
768
- m1=m1+1
769
- m2=m1+@mm
770
- m3=m2+@mm
771
- m4=m3+@mm
772
- m5=m4+@mm
773
- m6=m5+@mm
774
- n1=@nn
775
- (1..@nn).each do |j| # do 31
776
- n1=n1+1
777
- n2=n1+@nn
778
- n3=n2+@nn
779
- n4=n3+@nn
780
- n5=n4+@nn
781
- n6=n5+@nn
782
-
783
- rcof[3] = rcof[3] + a[i]**2 * b[j]**2
784
-
785
- rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
786
-
787
- rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
788
- 2.0 * a[i] * a[m2] * b[j] * b[n2]
789
-
790
- rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
791
- b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
792
-
793
- rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
794
- 2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
795
- b[n1] * b[n3])
796
-
797
- rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
798
- a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
799
-
800
- rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
801
- 2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
802
- b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
803
- end # 31
804
- end # 31
805
-
806
- rcof=rcof[1,rcof.size]
807
- poly = GSL::Poly.alloc(rcof)
808
- roots=poly.solve
809
- rootr=[nil]
810
- rooti=[nil]
811
- roots.each {|c|
812
- rootr.push(c.real)
813
- rooti.push(c.im)
814
- }
815
- @rootr=rootr
816
- @rooti=rooti
817
-
818
- norts=0
819
- (1..7).each do |i| # do 43
820
-
821
- next if rooti[i]!=0.0
822
- if (covxy>=0.0)
823
- next if(rootr[i]<0.0 or rootr[i]>1.0)
824
- pcorl=rootr[i]
825
- norts=norts+1
826
- else
827
- if (rootr[i]>=-1.0 and rootr[i]<0.0)
828
- pcorl=rootr[i]
829
- norts=norts+1
830
- end
831
- end
832
- end # 43
833
- raise "Error" if norts==0
834
- @r=pcorl
835
-
836
- @loglike_model=-loglike(@alpha, @beta, @r)
837
-
838
- end
839
- #Computes vector h(mm7) of orthogonal hermite...
840
- def hermit(s,k) # :nodoc:
841
- h=[]
842
- (1..k).each do |i| # do 14
843
- l=i
844
- ll=i+k
845
- lll=ll+k
846
- h[i]=1.0
847
- h[ll]=s[i]
848
- v=1.0
849
- (2..6).each do |j| #do 14
850
- w=Math::sqrt(j)
851
- h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
852
- v=w
853
- l=l+k
854
- ll=ll+k
855
- lll=lll+k
856
- end
857
- end
858
- h
859
- end
860
- def xnorm(t) # :nodoc:
861
- Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
862
- end
863
-
864
- def summary
865
- rp=ReportBuilder.new(:no_title=>true).add(self).to_text
866
- end
867
-
868
-
869
- def report_building(generator) # :nodoc:
870
- compute if dirty?
871
- section=ReportBuilder::Section.new(:name=>@name)
872
- t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
873
- @m.times do |i|
874
- t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
875
- end
876
- t.hr
877
- t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
878
- section.add(t)
879
- section.add(sprintf("r: %0.4f",r))
880
- t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
881
- threshold_x.each_with_index {|val,i|
882
- t.row(["Threshold X #{i}", sprintf("%0.4f", val)])
883
- }
884
- threshold_y.each_with_index {|val,i|
885
- t.row(["Threshold Y #{i}", sprintf("%0.4f", val)])
886
- }
887
- section.add(t)
888
- section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
889
- generator.parse_element(section)
890
- end
891
- end
892
- end
893
- end