statsample-bivariate-extension 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig ADDED
Binary file
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.13.0 / 2010-06-21
2
+
3
+ * First independent version of statsample-bivariate-extension
4
+
data/Manifest.txt ADDED
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/statsample/bivariate/polychoric.rb
6
+ lib/statsample/bivariate/tetrachoric.rb
7
+ spec/spec_helper.rb
data/README.txt ADDED
@@ -0,0 +1,50 @@
1
+ = statsample-bivariate-extension
2
+
3
+ * http://ruby-statsample.rubyforge.org/
4
+
5
+ == DESCRIPTION:
6
+
7
+ Provides advanced bivariate statistics:
8
+ * Tetrachoric correlation
9
+ * Polychoric correlation
10
+
11
+ == FEATURES/PROBLEMS:
12
+
13
+ * Statsample::Bivariate::Polychoric class provides polychoric correlation
14
+ * Statsample::Bivariate::Tetracoric class provides tetrachoric correlation
15
+
16
+
17
+ == SYNOPSIS:
18
+
19
+ === Tetrachoric correlation
20
+
21
+ require 'statsample'
22
+ a=40
23
+ b=10
24
+ c=20
25
+ d=30
26
+ tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
27
+ puts tetra.summary
28
+
29
+ === Polychoric correlation
30
+
31
+ require 'statsample'
32
+ ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
33
+
34
+ poly=Statsample::Bivariate::Polychoric.new(ct)
35
+ puts poly.summary
36
+
37
+
38
+ == REQUIREMENTS:
39
+
40
+ * Statsample
41
+
42
+ == INSTALL:
43
+
44
+ This gem is a statsample dependency. If you want to install it separatly
45
+
46
+ * sudo gem install statsample-bivariate-extension
47
+
48
+ == LICENSE:
49
+
50
+ GPL-2
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # -*- ruby -*-
3
+
4
+ require 'rubygems'
5
+ require 'hoe'
6
+
7
+ $:.unshift(File.dirname(__FILE__)+"/lib")
8
+
9
+ require 'statsample/bivariate/extension_version.rb'
10
+ Hoe.spec 'statsample-bivariate-extension' do
11
+ self.rubyforge_name = 'ruby-statsample'
12
+ self.version=Statsample::Bivariate::EXTENSION_VERSION
13
+ self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
14
+ self.extra_deps << ["statsample","~>0.13.0"]
15
+
16
+ end
17
+
18
+ # vim: syntax=ruby
@@ -0,0 +1,886 @@
1
+ require 'minimization'
2
+ module Statsample
3
+ module Bivariate
4
+ # Calculate Polychoric correlation for two vectors.
5
+ def self.polychoric(v1,v2)
6
+ pc=Polychoric.new_with_vectors(v1,v2)
7
+ pc.r
8
+ end
9
+
10
+ # Polychoric correlation matrix.
11
+ # Order of rows and columns depends on Dataset#fields order
12
+ def self.polychoric_correlation_matrix(ds)
13
+ cache={}
14
+ matrix=ds.collect_matrix do |row,col|
15
+ if row==col
16
+ 1.0
17
+ else
18
+ begin
19
+ if cache[[col,row]].nil?
20
+ poly=polychoric(ds[row],ds[col])
21
+ cache[[row,col]]=poly
22
+ poly
23
+ else
24
+ cache[[col,row]]
25
+ end
26
+ rescue RuntimeError
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
34
+ end
35
+
36
+ # = Polychoric correlation.
37
+ #
38
+ # The <em>polychoric</em> correlation is a measure of
39
+ # bivariate association arising when both observed variates
40
+ # are ordered, categorical variables that result from polychotomizing
41
+ # the two undelying continuous variables (Drasgow, 2006)
42
+ #
43
+ # According to Drasgow(2006), there are tree methods to estimate
44
+ # the polychoric correlation:
45
+ #
46
+ # 1. Maximum Likehood Estimator
47
+ # 2. Two-step estimator and
48
+ # 3. Polychoric series estimate.
49
+ #
50
+ # By default, two-step estimation are used. You can select
51
+ # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
52
+ #
53
+ # == Use
54
+ #
55
+ # You should enter a Matrix with ordered data. For example:
56
+ # -------------------
57
+ # | y=0 | y=1 | y=2 |
58
+ # -------------------
59
+ # x = 0 | 1 | 10 | 20 |
60
+ # -------------------
61
+ # x = 1 | 20 | 20 | 50 |
62
+ # -------------------
63
+ #
64
+ # The code will be
65
+ #
66
+ # matrix=Matrix[[1,10,20],[20,20,50]]
67
+ # poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
68
+ # puts poly.r
69
+ #
70
+ # See extensive documentation on Uebersax(2002) and Drasgow(2006)
71
+ #
72
+ # == References
73
+ #
74
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
75
+ # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
76
+
77
+ class Polychoric
78
+ include Summarizable
79
+ class Processor
80
+ attr_reader :alpha, :beta, :rho
81
+ def initialize(alpha,beta,rho)
82
+ @alpha=alpha
83
+ @beta=beta
84
+ @nr=@alpha.size+1
85
+ @nc=@beta.size+1
86
+ @rho=rho
87
+ @pd=nil
88
+ end
89
+ def bipdf(i,j)
90
+ Distribution::NormalBivariate.pdf(a(i), b(j), rho)
91
+ end
92
+ def a(i)
93
+ i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
94
+ end
95
+ def b(j)
96
+ j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
97
+ end
98
+ # Equation(10) from Olsson(1979)
99
+ def fd_loglike_cell_a(i,j,k)
100
+ if k==i
101
+ Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
102
+ elsif k==(i-1)
103
+ -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
104
+ else
105
+ 0
106
+ end
107
+
108
+ end
109
+ # phi_ij for each i and j
110
+ # Uses equation(4) from Olsson(1979)
111
+ def pd
112
+ if @pd.nil?
113
+ @pd=@nr.times.collect{ [0] * @nc}
114
+ pc=@nr.times.collect{ [0] * @nc}
115
+ @nr.times do |i|
116
+ @nc.times do |j|
117
+
118
+ if i==@nr-1 and j==@nc-1
119
+ @pd[i][j]=1.0
120
+ else
121
+ a=(i==@nr-1) ? 100: alpha[i]
122
+ b=(j==@nc-1) ? 100: beta[j]
123
+ #puts "a:#{a} b:#{b}"
124
+ @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
125
+ end
126
+ pc[i][j] = @pd[i][j]
127
+ @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
128
+ @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
129
+ @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
130
+ end
131
+ end
132
+ end
133
+ @pd
134
+ end
135
+ end
136
+
137
+ include DirtyMemoize
138
+ # Name of the analysis
139
+ attr_accessor :name
140
+ # Max number of iterations used on iterative methods. Default to MAX_ITERATIONS
141
+ attr_accessor :max_iterations
142
+ # Debug algorithm (See iterations, for example)
143
+ attr_accessor :debug
144
+ # Minimizer type for two step. Default "brent"
145
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
146
+ attr_accessor :minimizer_type_two_step
147
+
148
+ # Minimizer type for joint estimate. Default "nmsimplex"
149
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
150
+ attr_accessor :minimizer_type_joint
151
+
152
+
153
+ # Method of calculation of polychoric series.
154
+ # <tt>:two_step</tt> used by default.
155
+ #
156
+ # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
157
+ # :polychoric_series:: polychoric series estimate, using
158
+ # algorithm AS87 by Martinson and Hamdan (1975).
159
+ # :joint:: one-step ML, based on R package 'polycor'
160
+ # by J.Fox.
161
+ attr_accessor :method
162
+ # Absolute error for iteration.
163
+ attr_accessor :epsilon
164
+
165
+ # Number of iterations
166
+ attr_reader :iteration
167
+
168
+ # Log of algorithm
169
+ attr_reader :log
170
+
171
+
172
+ attr_reader :loglike_model
173
+
174
+ METHOD=:two_step
175
+ MAX_ITERATIONS=300
176
+ EPSILON=1e-6
177
+ MINIMIZER_TYPE_TWO_STEP="brent"
178
+ MINIMIZER_TYPE_JOINT="nmsimplex"
179
+ def self.new_with_vectors(v1,v2)
180
+ Polychoric.new(Crosstab.new(v1,v2).to_matrix)
181
+ end
182
+ # Params:
183
+ # * matrix: Contingence table
184
+ # * opts: Any attribute
185
+
186
+ def initialize(matrix, opts=Hash.new)
187
+ @matrix=matrix
188
+ @n=matrix.column_size
189
+ @m=matrix.row_size
190
+ raise "row size <1" if @m<=1
191
+ raise "column size <1" if @n<=1
192
+
193
+ @method=METHOD
194
+ @name=_("Polychoric correlation")
195
+ @max_iterations=MAX_ITERATIONS
196
+ @epsilon=EPSILON
197
+ @minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
198
+ @minimizer_type_joint=MINIMIZER_TYPE_JOINT
199
+ @debug=false
200
+ @iteration=nil
201
+ opts.each{|k,v|
202
+ self.send("#{k}=",v) if self.respond_to? k
203
+ }
204
+ @r=nil
205
+ @pd=nil
206
+ compute_basic_parameters
207
+ end
208
+ # Returns the polychoric correlation
209
+ attr_reader :r
210
+ # Returns the rows thresholds
211
+ attr_reader :alpha
212
+ # Returns the columns thresholds
213
+ attr_reader :beta
214
+
215
+ dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
216
+ dirty_memoize :r, :alpha, :beta
217
+
218
+ alias :threshold_x :alpha
219
+ alias :threshold_y :beta
220
+
221
+
222
+ # Start the computation of polychoric correlation
223
+ # based on attribute method
224
+ def compute
225
+ if @method==:two_step
226
+ compute_two_step_mle_drasgow
227
+ elsif @method==:joint
228
+ compute_one_step_mle
229
+ elsif @method==:polychoric_series
230
+ compute_polychoric_series
231
+ else
232
+ raise "Not implemented"
233
+ end
234
+ end
235
+ # Retrieve log likehood for actual data.
236
+ def loglike_data
237
+ loglike=0
238
+ @nr.times do |i|
239
+ @nc.times do |j|
240
+ res=@matrix[i,j].quo(@total)
241
+ if (res==0)
242
+ res=1e-16
243
+ end
244
+ loglike+= @matrix[i,j] * Math::log(res )
245
+ end
246
+ end
247
+ loglike
248
+ end
249
+
250
+ # Chi Square of model
251
+ def chi_square
252
+ if @loglike_model.nil?
253
+ compute
254
+ end
255
+ -2*(@loglike_model-loglike_data)
256
+ end
257
+
258
+ def chi_square_df
259
+ (@nr*@nc)-@nc-@nr
260
+ end
261
+
262
+
263
+
264
+
265
+ # Retrieve all cell probabilities for givens alpha, beta and rho
266
+ def cell_probabilities(alpha,beta,rho)
267
+ pd=@nr.times.collect{ [0] * @nc}
268
+ pc=@nr.times.collect{ [0] * @nc}
269
+ @nr.times do |i|
270
+ @nc.times do |j|
271
+
272
+ if i==@nr-1 and j==@nc-1
273
+ pd[i][j]=1.0
274
+ else
275
+ a=(i==@nr-1) ? 100: alpha[i]
276
+ b=(j==@nc-1) ? 100: beta[j]
277
+ #puts "a:#{a} b:#{b}"
278
+ pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
279
+ end
280
+ pc[i][j] = pd[i][j]
281
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
282
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
283
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
284
+ end
285
+ end
286
+ @pd=pd
287
+ pd
288
+ end
289
+ def loglike(alpha,beta,rho)
290
+ if rho.abs>0.9999
291
+ rho= (rho>0) ? 0.9999 : -0.9999
292
+ end
293
+ pr=Processor.new(alpha,beta,rho)
294
+ loglike=0
295
+
296
+
297
+ @nr.times do |i|
298
+ @nc.times do |j|
299
+ res=pr.pd[i][j]+EPSILON
300
+ loglike+= @matrix[i,j] * Math::log( res )
301
+ end
302
+ end
303
+ -loglike
304
+ end
305
+ # First derivate for rho
306
+ # Uses equation (9) from Olsson(1979)
307
+ def fd_loglike_rho(alpha,beta,rho)
308
+ if rho.abs>0.9999
309
+ rho= (rho>0) ? 0.9999 : -0.9999
310
+ end
311
+ total=0
312
+ pr=Processor.new(alpha,beta,rho)
313
+ @nr.times do |i|
314
+ @nc.times do |j|
315
+ pi=pr.pd[i][j] + EPSILON
316
+ total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
317
+ end
318
+ end
319
+ total
320
+ end
321
+
322
+ # First derivative for alpha_k
323
+ def fd_loglike_a(alpha,beta,rho,k)
324
+ fd_loglike_a_eq6(alpha,beta,rho,k)
325
+ end
326
+ # Uses equation (6) from Olsson(1979)
327
+ def fd_loglike_a_eq6(alpha,beta,rho,k)
328
+ if rho.abs>0.9999
329
+ rho= (rho>0) ? 0.9999 : -0.9999
330
+ end
331
+ pr=Processor.new(alpha,beta,rho)
332
+ total=0
333
+ pd=pr.pd
334
+ @nr.times do |i|
335
+ @nc.times do |j|
336
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
337
+ end
338
+ end
339
+ total
340
+ end
341
+ # Uses equation(13) from Olsson(1979)
342
+ def fd_loglike_a_eq13(alpha,beta,rho,k)
343
+ if rho.abs>0.9999
344
+ rho= (rho>0) ? 0.9999 : -0.9999
345
+ end
346
+ pr=Processor.new(alpha,beta,rho)
347
+ total=0
348
+ a_k=pr.a(k)
349
+ pd=pr.pd
350
+ @nc.times do |j|
351
+ #puts "j: #{j}"
352
+ #puts "b #{j} : #{b.call(j)}"
353
+ #puts "b #{j-1} : #{b.call(j-1)}"
354
+
355
+ e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
356
+ e_2=Distribution::Normal.pdf(a_k)
357
+ e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
358
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
359
+
360
+ total+= e_1*e_2*e_3
361
+ end
362
+ total
363
+ end
364
+ # First derivative for beta_m
365
+ # Uses equation(14) from Olsson(1979)
366
+ def fd_loglike_b(alpha,beta,rho,m)
367
+ if rho.abs>0.9999
368
+ rho= (rho>0) ? 0.9999 : -0.9999
369
+ end
370
+ pr=Processor.new(alpha,beta,rho)
371
+ total=0
372
+ b_m=pr.b m
373
+ pd=pr.pd
374
+ @nr.times do |i|
375
+ #puts "j: #{j}"
376
+ #puts "b #{j} : #{b.call(j)}"
377
+ #puts "b #{j-1} : #{b.call(j-1)}"
378
+
379
+ e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
380
+ e_2=Distribution::Normal.pdf(b_m)
381
+ e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
382
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
383
+
384
+ total+= e_1*e_2*e_3
385
+ end
386
+ total
387
+ end
388
+
389
+
390
+ def compute_basic_parameters
391
+ @nr=@matrix.row_size
392
+ @nc=@matrix.column_size
393
+ @sumr=[0]*@matrix.row_size
394
+ @sumrac=[0]*@matrix.row_size
395
+ @sumc=[0]*@matrix.column_size
396
+ @sumcac=[0]*@matrix.column_size
397
+ @alpha=[0]*(@nr-1)
398
+ @beta=[0]*(@nc-1)
399
+ @total=0
400
+ @nr.times do |i|
401
+ @nc.times do |j|
402
+ @sumr[i]+=@matrix[i,j]
403
+ @sumc[j]+=@matrix[i,j]
404
+ @total+=@matrix[i,j]
405
+ end
406
+ end
407
+ ac=0
408
+ (@nr-1).times do |i|
409
+ @sumrac[i]=@sumr[i]+ac
410
+ @alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
411
+ ac=@sumrac[i]
412
+ end
413
+ ac=0
414
+ (@nc-1).times do |i|
415
+ @sumcac[i]=@sumc[i]+ac
416
+ @beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
417
+ ac=@sumcac[i]
418
+ end
419
+ end
420
+
421
+
422
+ # Computation of polychoric correlation usign two-step ML estimation.
423
+ #
424
+ # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
425
+ #
426
+ # The algorithm is based on code by Gegenfurtner(1992).
427
+ #
428
+ # <b>References</b>:
429
+ # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
430
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
431
+ #
432
+ def compute_two_step_mle_drasgow
433
+ if Statsample.has_gsl?
434
+ compute_two_step_mle_drasgow_gsl
435
+ else
436
+ compute_two_step_mle_drasgow_ruby
437
+ end
438
+ end
439
+
440
+ # Depends on minimization algorithm.
441
+
442
+ def compute_two_step_mle_drasgow_ruby #:nodoc:
443
+
444
+ f=proc {|rho|
445
+ loglike(@alpha,@beta, rho)
446
+ }
447
+ @log=_("Minimizing using GSL Brent method\n")
448
+ min=Minimization::Brent.new(-0.9999,0.9999,f)
449
+ min.epsilon=@epsilon
450
+ min.expected=0
451
+ min.iterate
452
+ @log+=min.log.to_table.to_s
453
+ @r=min.x_minimum
454
+ @loglike_model=-min.f_minimum
455
+ puts @log if @debug
456
+
457
+ end
458
+
459
+
460
+ def compute_two_step_mle_drasgow_gsl #:nodoc:
461
+
462
+ fn1=GSL::Function.alloc {|rho|
463
+ loglike(@alpha,@beta, rho)
464
+ }
465
+ @iteration = 0
466
+ max_iter = @max_iterations
467
+ m = 0 # initial guess
468
+ m_expected = 0
469
+ a=-0.9999
470
+ b=+0.9999
471
+ gmf = GSL::Min::FMinimizer.alloc(@minimizer_type_two_step)
472
+ gmf.set(fn1, m, a, b)
473
+ header=_("Two step minimization using %s method\n") % gmf.name
474
+ header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
475
+ "err", "err(est)")
476
+
477
+ header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
478
+ @log=header
479
+ puts header if @debug
480
+ begin
481
+ @iteration += 1
482
+ status = gmf.iterate
483
+ status = gmf.test_interval(@epsilon, 0.0)
484
+
485
+ if status == GSL::SUCCESS
486
+ @log+="converged:"
487
+ puts "converged:" if @debug
488
+ end
489
+ a = gmf.x_lower
490
+ b = gmf.x_upper
491
+ m = gmf.x_minimum
492
+ message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
493
+ @iteration, a, b, m, m - m_expected, b - a);
494
+ @log+=message
495
+ puts message if @debug
496
+ end while status == GSL::CONTINUE and @iteration < @max_iterations
497
+ @r=gmf.x_minimum
498
+ @loglike_model=-gmf.f_minimum
499
+ end
500
+
501
+ # Compute Polychoric correlation with joint estimate.
502
+ # Rho and thresholds are estimated at same time.
503
+ # Code based on R package "polycor", by J.Fox.
504
+ #
505
+
506
+ def compute_one_step_mle
507
+ # Get initial values with two-step aproach
508
+ compute_two_step_mle_drasgow
509
+ # Start iteration with past values
510
+ rho=@r
511
+ cut_alpha=@alpha
512
+ cut_beta=@beta
513
+ parameters=[rho]+cut_alpha+cut_beta
514
+ minimization = Proc.new { |v, params|
515
+ rho=v[0]
516
+ alpha=v[1, @nr-1]
517
+ beta=v[@nr, @nc-1]
518
+
519
+ #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
520
+ #(@nr-1).times {|k|
521
+ # puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
522
+ # puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
523
+ #
524
+ #}
525
+ #(@nc-1).times {|k|
526
+ # puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
527
+ #}
528
+
529
+ loglike(alpha,beta,rho)
530
+ }
531
+ np=@nc-1+@nr
532
+ my_func = GSL::MultiMin::Function.alloc(minimization, np)
533
+ my_func.set_params(parameters) # parameters
534
+
535
+ x = GSL::Vector.alloc(parameters.dup)
536
+
537
+ ss = GSL::Vector.alloc(np)
538
+ ss.set_all(1.0)
539
+
540
+ minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
541
+ minimizer.set(my_func, x, ss)
542
+
543
+ iter = 0
544
+ message=""
545
+ begin
546
+ iter += 1
547
+ status = minimizer.iterate()
548
+ status = minimizer.test_size(@epsilon)
549
+ if status == GSL::SUCCESS
550
+ message="Joint MLE converged to minimum at\n"
551
+ end
552
+ x = minimizer.x
553
+ message+= sprintf("%5d iterations", iter)+"\n";
554
+ for i in 0...np do
555
+ message+=sprintf("%10.3e ", x[i])
556
+ end
557
+ message+=sprintf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)+"\n";
558
+ end while status == GSL::CONTINUE and iter < @max_iterations
559
+ @iteration=iter
560
+ @log+=message
561
+ @r=minimizer.x[0]
562
+ @alpha=minimizer.x[1,@nr-1].to_a
563
+ @beta=minimizer.x[@nr,@nc-1].to_a
564
+ @loglike_model= -minimizer.minimum
565
+ end
566
+
567
+ def matrix_for_rho(rho) # :nodoc:
568
+ pd=@nr.times.collect{ [0]*@nc}
569
+ pc=@nr.times.collect{ [0]*@nc}
570
+ @nr.times { |i|
571
+ @nc.times { |j|
572
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
573
+ pc[i][j] = pd[i][j]
574
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
575
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
576
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
577
+ res= pd[i][j]
578
+ }
579
+ }
580
+ Matrix.rows(pc)
581
+ end
582
+
583
+ def expected # :nodoc:
584
+ rt=[]
585
+ ct=[]
586
+ t=0
587
+ @matrix.row_size.times {|i|
588
+ @matrix.column_size.times {|j|
589
+ rt[i]=0 if rt[i].nil?
590
+ ct[j]=0 if ct[j].nil?
591
+ rt[i]+=@matrix[i,j]
592
+ ct[j]+=@matrix[i,j]
593
+ t+=@matrix[i,j]
594
+ }
595
+ }
596
+ m=[]
597
+ @matrix.row_size.times {|i|
598
+ row=[]
599
+ @matrix.column_size.times {|j|
600
+ row[j]=(rt[i]*ct[j]).quo(t)
601
+ }
602
+ m.push(row)
603
+ }
604
+
605
+ Matrix.rows(m)
606
+ end
607
+
608
+ # Compute polychoric correlation using polychoric series.
609
+ # Algorithm: AS87, by Martinson and Hamdam(1975).
610
+ #
611
+ # <b>Warning</b>: According to Drasgow(2006), this
612
+ # computation diverges greatly of joint and two-step methods.
613
+ #
614
+ def compute_polychoric_series
615
+ @nn=@n-1
616
+ @mm=@m-1
617
+ @nn7=7*@nn
618
+ @mm7=7*@mm
619
+ @mn=@n*@m
620
+ @cont=[nil]
621
+ @n.times {|j|
622
+ @m.times {|i|
623
+ @cont.push(@matrix[i,j])
624
+ }
625
+ }
626
+
627
+ pcorl=0
628
+ cont=@cont
629
+ xmean=0.0
630
+ sum=0.0
631
+ row=[]
632
+ colmn=[]
633
+ (1..@m).each do |i|
634
+ row[i]=0.0
635
+ l=i
636
+ (1..@n).each do |j|
637
+ row[i]=row[i]+cont[l]
638
+ l+=@m
639
+ end
640
+ raise "Should not be empty rows" if(row[i]==0.0)
641
+ xmean=xmean+row[i]*i.to_f
642
+ sum+=row[i]
643
+ end
644
+ xmean=xmean/sum.to_f
645
+ ymean=0.0
646
+ (1..@n).each do |j|
647
+ colmn[j]=0.0
648
+ l=(j-1)*@m
649
+ (1..@m).each do |i|
650
+ l=l+1
651
+ colmn[j]=colmn[j]+cont[l] #12
652
+ end
653
+ raise "Should not be empty cols" if colmn[j]==0
654
+ ymean=ymean+colmn[j]*j.to_f
655
+ end
656
+ ymean=ymean/sum.to_f
657
+ covxy=0.0
658
+ (1..@m).each do |i|
659
+ l=i
660
+ (1..@n).each do |j|
661
+ conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
662
+ l=l+@m
663
+ end
664
+ end
665
+
666
+ chisq=0.0
667
+ (1..@m).each do |i|
668
+ l=i
669
+ (1..@n).each do |j|
670
+ chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
671
+ l=l+@m
672
+ end
673
+ end
674
+
675
+ phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
676
+ phisq=0 if(phisq<0.0)
677
+ # Compute cumulative sum of columns and rows
678
+ sumc=[]
679
+ sumr=[]
680
+ sumc[1]=colmn[1]
681
+ sumr[1]=row[1]
682
+ cum=0
683
+ (1..@nn).each do |i| # goto 17 r20
684
+ cum=cum+colmn[i]
685
+ sumc[i]=cum
686
+ end
687
+ cum=0
688
+ (1..@mm).each do |i|
689
+ cum=cum+row[i]
690
+ sumr[i]=cum
691
+ end
692
+ alpha=[]
693
+ beta=[]
694
+ # Compute points of polytomy
695
+ (1..@mm).each do |i| #do 21
696
+ alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
697
+ end # 21
698
+ (1..@nn).each do |i| #do 22
699
+ beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
700
+ end # 21
701
+ @alpha=alpha[1,alpha.size]
702
+ @beta=beta[1,beta.size]
703
+ @sumr=row[1,row.size]
704
+ @sumc=colmn[1,colmn.size]
705
+ @total=sum
706
+
707
+ # Compute Fourier coefficients a and b. Verified
708
+ h=hermit(alpha,@mm)
709
+ hh=hermit(beta,@nn)
710
+ a=[]
711
+ b=[]
712
+ if @m!=2 # goto 24
713
+ mmm=@m-2
714
+ (1..mmm).each do |i| #do 23
715
+ a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
716
+ a2=sumr[i] * xnorm(alpha[i+1])
717
+ a3=sumr[i+1] * xnorm(alpha[i])
718
+ l=i
719
+ (1..7).each do |j| #do 23
720
+ a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
721
+ l=l+@mm
722
+ end
723
+ end #23
724
+ end
725
+ # 24
726
+
727
+
728
+ if @n!=2 # goto 26
729
+ nnn=@n-2
730
+ (1..nnn).each do |i| #do 25
731
+ a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
732
+ a2=sumc[i] * xnorm(beta[i+1])
733
+ a3=sumc[i+1] * xnorm(beta[i])
734
+ l=i
735
+ (1..7).each do |j| #do 25
736
+ b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
737
+ l=l+@nn
738
+ end # 25
739
+ end # 25
740
+ end
741
+ #26 r20
742
+ l = @mm
743
+ a1 = -sum * xnorm(alpha[@mm])
744
+ a2 = row[@m] * sumr[@mm]
745
+ (1..7).each do |j| # do 27
746
+ a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
747
+ l=l+@mm
748
+ end # 27
749
+
750
+ l = @nn
751
+ a1 = -sum * xnorm(beta[@nn])
752
+ a2 = colmn[@n] * sumc[@nn]
753
+
754
+ (1..7).each do |j| # do 28
755
+ b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
756
+ l = l + @nn
757
+ end # 28
758
+ rcof=[]
759
+ # compute coefficients rcof of polynomial of order 8
760
+ rcof[1]=-phisq
761
+ (2..9).each do |i| # do 30
762
+ rcof[i]=0.0
763
+ end #30
764
+ m1=@mm
765
+ (1..@mm).each do |i| # do 31
766
+ m1=m1+1
767
+ m2=m1+@mm
768
+ m3=m2+@mm
769
+ m4=m3+@mm
770
+ m5=m4+@mm
771
+ m6=m5+@mm
772
+ n1=@nn
773
+ (1..@nn).each do |j| # do 31
774
+ n1=n1+1
775
+ n2=n1+@nn
776
+ n3=n2+@nn
777
+ n4=n3+@nn
778
+ n5=n4+@nn
779
+ n6=n5+@nn
780
+
781
+ rcof[3] = rcof[3] + a[i]**2 * b[j]**2
782
+
783
+ rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
784
+
785
+ rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
786
+ 2.0 * a[i] * a[m2] * b[j] * b[n2]
787
+
788
+ rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
789
+ b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
790
+
791
+ rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
792
+ 2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
793
+ b[n1] * b[n3])
794
+
795
+ rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
796
+ a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
797
+
798
+ rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
799
+ 2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
800
+ b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
801
+ end # 31
802
+ end # 31
803
+
804
+ rcof=rcof[1,rcof.size]
805
+ poly = GSL::Poly.alloc(rcof)
806
+ roots=poly.solve
807
+ rootr=[nil]
808
+ rooti=[nil]
809
+ roots.each {|c|
810
+ rootr.push(c.real)
811
+ rooti.push(c.im)
812
+ }
813
+ @rootr=rootr
814
+ @rooti=rooti
815
+
816
+ norts=0
817
+ (1..7).each do |i| # do 43
818
+
819
+ next if rooti[i]!=0.0
820
+ if (covxy>=0.0)
821
+ next if(rootr[i]<0.0 or rootr[i]>1.0)
822
+ pcorl=rootr[i]
823
+ norts=norts+1
824
+ else
825
+ if (rootr[i]>=-1.0 and rootr[i]<0.0)
826
+ pcorl=rootr[i]
827
+ norts=norts+1
828
+ end
829
+ end
830
+ end # 43
831
+ raise "Error" if norts==0
832
+ @r=pcorl
833
+
834
+ @loglike_model=-loglike(@alpha, @beta, @r)
835
+
836
+ end
837
+ #Computes vector h(mm7) of orthogonal hermite...
838
+ def hermit(s,k) # :nodoc:
839
+ h=[]
840
+ (1..k).each do |i| # do 14
841
+ l=i
842
+ ll=i+k
843
+ lll=ll+k
844
+ h[i]=1.0
845
+ h[ll]=s[i]
846
+ v=1.0
847
+ (2..6).each do |j| #do 14
848
+ w=Math::sqrt(j)
849
+ h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
850
+ v=w
851
+ l=l+k
852
+ ll=ll+k
853
+ lll=lll+k
854
+ end
855
+ end
856
+ h
857
+ end
858
+ def xnorm(t) # :nodoc:
859
+ Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
860
+ end
861
+
862
+ def report_building(generator) # :nodoc:
863
+ compute if dirty?
864
+ section=ReportBuilder::Section.new(:name=>@name)
865
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
866
+ @m.times do |i|
867
+ t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
868
+ end
869
+ t.hr
870
+ t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
871
+ section.add(t)
872
+ section.add(sprintf("r: %0.4f",r))
873
+ t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
874
+ threshold_x.each_with_index {|val,i|
875
+ t.row([_("Threshold X %d") % i, sprintf("%0.4f", val)])
876
+ }
877
+ threshold_y.each_with_index {|val,i|
878
+ t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
879
+ }
880
+ section.add(t)
881
+ section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
882
+ generator.parse_element(section)
883
+ end
884
+ end
885
+ end
886
+ end