statsample-bivariate-extension 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig ADDED
Binary file
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ === 0.13.0 / 2010-06-21
2
+
3
+ * First independent version of statsample-bivariate-extension
4
+
data/Manifest.txt ADDED
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/statsample/bivariate/polychoric.rb
6
+ lib/statsample/bivariate/tetrachoric.rb
7
+ spec/spec_helper.rb
data/README.txt ADDED
@@ -0,0 +1,50 @@
1
+ = statsample-bivariate-extension
2
+
3
+ * http://ruby-statsample.rubyforge.org/
4
+
5
+ == DESCRIPTION:
6
+
7
+ Provides advanced bivariate statistics:
8
+ * Tetrachoric correlation
9
+ * Polychoric correlation
10
+
11
+ == FEATURES/PROBLEMS:
12
+
13
+ * Statsample::Bivariate::Polychoric class provides polychoric correlation
14
+ * Statsample::Bivariate::Tetracoric class provides tetrachoric correlation
15
+
16
+
17
+ == SYNOPSIS:
18
+
19
+ === Tetrachoric correlation
20
+
21
+ require 'statsample'
22
+ a=40
23
+ b=10
24
+ c=20
25
+ d=30
26
+ tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
27
+ puts tetra.summary
28
+
29
+ === Polychoric correlation
30
+
31
+ require 'statsample'
32
+ ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
33
+
34
+ poly=Statsample::Bivariate::Polychoric.new(ct)
35
+ puts poly.summary
36
+
37
+
38
+ == REQUIREMENTS:
39
+
40
+ * Statsample
41
+
42
+ == INSTALL:
43
+
44
+ This gem is a statsample dependency. If you want to install it separatly
45
+
46
+ * sudo gem install statsample-bivariate-extension
47
+
48
+ == LICENSE:
49
+
50
+ GPL-2
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/ruby
2
+ # -*- ruby -*-
3
+
4
+ require 'rubygems'
5
+ require 'hoe'
6
+
7
+ $:.unshift(File.dirname(__FILE__)+"/lib")
8
+
9
+ require 'statsample/bivariate/extension_version.rb'
10
+ Hoe.spec 'statsample-bivariate-extension' do
11
+ self.rubyforge_name = 'ruby-statsample'
12
+ self.version=Statsample::Bivariate::EXTENSION_VERSION
13
+ self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
14
+ self.extra_deps << ["statsample","~>0.13.0"]
15
+
16
+ end
17
+
18
+ # vim: syntax=ruby
@@ -0,0 +1,886 @@
1
+ require 'minimization'
2
+ module Statsample
3
+ module Bivariate
4
+ # Calculate Polychoric correlation for two vectors.
5
+ def self.polychoric(v1,v2)
6
+ pc=Polychoric.new_with_vectors(v1,v2)
7
+ pc.r
8
+ end
9
+
10
+ # Polychoric correlation matrix.
11
+ # Order of rows and columns depends on Dataset#fields order
12
+ def self.polychoric_correlation_matrix(ds)
13
+ cache={}
14
+ matrix=ds.collect_matrix do |row,col|
15
+ if row==col
16
+ 1.0
17
+ else
18
+ begin
19
+ if cache[[col,row]].nil?
20
+ poly=polychoric(ds[row],ds[col])
21
+ cache[[row,col]]=poly
22
+ poly
23
+ else
24
+ cache[[col,row]]
25
+ end
26
+ rescue RuntimeError
27
+ nil
28
+ end
29
+ end
30
+ end
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
34
+ end
35
+
36
+ # = Polychoric correlation.
37
+ #
38
+ # The <em>polychoric</em> correlation is a measure of
39
+ # bivariate association arising when both observed variates
40
+ # are ordered, categorical variables that result from polychotomizing
41
+ # the two undelying continuous variables (Drasgow, 2006)
42
+ #
43
+ # According to Drasgow(2006), there are tree methods to estimate
44
+ # the polychoric correlation:
45
+ #
46
+ # 1. Maximum Likehood Estimator
47
+ # 2. Two-step estimator and
48
+ # 3. Polychoric series estimate.
49
+ #
50
+ # By default, two-step estimation are used. You can select
51
+ # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
52
+ #
53
+ # == Use
54
+ #
55
+ # You should enter a Matrix with ordered data. For example:
56
+ # -------------------
57
+ # | y=0 | y=1 | y=2 |
58
+ # -------------------
59
+ # x = 0 | 1 | 10 | 20 |
60
+ # -------------------
61
+ # x = 1 | 20 | 20 | 50 |
62
+ # -------------------
63
+ #
64
+ # The code will be
65
+ #
66
+ # matrix=Matrix[[1,10,20],[20,20,50]]
67
+ # poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
68
+ # puts poly.r
69
+ #
70
+ # See extensive documentation on Uebersax(2002) and Drasgow(2006)
71
+ #
72
+ # == References
73
+ #
74
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
75
+ # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
76
+
77
+ class Polychoric
78
+ include Summarizable
79
+ class Processor
80
+ attr_reader :alpha, :beta, :rho
81
+ def initialize(alpha,beta,rho)
82
+ @alpha=alpha
83
+ @beta=beta
84
+ @nr=@alpha.size+1
85
+ @nc=@beta.size+1
86
+ @rho=rho
87
+ @pd=nil
88
+ end
89
+ def bipdf(i,j)
90
+ Distribution::NormalBivariate.pdf(a(i), b(j), rho)
91
+ end
92
+ def a(i)
93
+ i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
94
+ end
95
+ def b(j)
96
+ j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
97
+ end
98
+ # Equation(10) from Olsson(1979)
99
+ def fd_loglike_cell_a(i,j,k)
100
+ if k==i
101
+ Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
102
+ elsif k==(i-1)
103
+ -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
104
+ else
105
+ 0
106
+ end
107
+
108
+ end
109
+ # phi_ij for each i and j
110
+ # Uses equation(4) from Olsson(1979)
111
+ def pd
112
+ if @pd.nil?
113
+ @pd=@nr.times.collect{ [0] * @nc}
114
+ pc=@nr.times.collect{ [0] * @nc}
115
+ @nr.times do |i|
116
+ @nc.times do |j|
117
+
118
+ if i==@nr-1 and j==@nc-1
119
+ @pd[i][j]=1.0
120
+ else
121
+ a=(i==@nr-1) ? 100: alpha[i]
122
+ b=(j==@nc-1) ? 100: beta[j]
123
+ #puts "a:#{a} b:#{b}"
124
+ @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
125
+ end
126
+ pc[i][j] = @pd[i][j]
127
+ @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
128
+ @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
129
+ @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
130
+ end
131
+ end
132
+ end
133
+ @pd
134
+ end
135
+ end
136
+
137
+ include DirtyMemoize
138
+ # Name of the analysis
139
+ attr_accessor :name
140
+ # Max number of iterations used on iterative methods. Default to MAX_ITERATIONS
141
+ attr_accessor :max_iterations
142
+ # Debug algorithm (See iterations, for example)
143
+ attr_accessor :debug
144
+ # Minimizer type for two step. Default "brent"
145
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
146
+ attr_accessor :minimizer_type_two_step
147
+
148
+ # Minimizer type for joint estimate. Default "nmsimplex"
149
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
150
+ attr_accessor :minimizer_type_joint
151
+
152
+
153
+ # Method of calculation of polychoric series.
154
+ # <tt>:two_step</tt> used by default.
155
+ #
156
+ # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
157
+ # :polychoric_series:: polychoric series estimate, using
158
+ # algorithm AS87 by Martinson and Hamdan (1975).
159
+ # :joint:: one-step ML, based on R package 'polycor'
160
+ # by J.Fox.
161
+ attr_accessor :method
162
+ # Absolute error for iteration.
163
+ attr_accessor :epsilon
164
+
165
+ # Number of iterations
166
+ attr_reader :iteration
167
+
168
+ # Log of algorithm
169
+ attr_reader :log
170
+
171
+
172
+ attr_reader :loglike_model
173
+
174
+ METHOD=:two_step
175
+ MAX_ITERATIONS=300
176
+ EPSILON=1e-6
177
+ MINIMIZER_TYPE_TWO_STEP="brent"
178
+ MINIMIZER_TYPE_JOINT="nmsimplex"
179
+ def self.new_with_vectors(v1,v2)
180
+ Polychoric.new(Crosstab.new(v1,v2).to_matrix)
181
+ end
182
+ # Params:
183
+ # * matrix: Contingence table
184
+ # * opts: Any attribute
185
+
186
+ def initialize(matrix, opts=Hash.new)
187
+ @matrix=matrix
188
+ @n=matrix.column_size
189
+ @m=matrix.row_size
190
+ raise "row size <1" if @m<=1
191
+ raise "column size <1" if @n<=1
192
+
193
+ @method=METHOD
194
+ @name=_("Polychoric correlation")
195
+ @max_iterations=MAX_ITERATIONS
196
+ @epsilon=EPSILON
197
+ @minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
198
+ @minimizer_type_joint=MINIMIZER_TYPE_JOINT
199
+ @debug=false
200
+ @iteration=nil
201
+ opts.each{|k,v|
202
+ self.send("#{k}=",v) if self.respond_to? k
203
+ }
204
+ @r=nil
205
+ @pd=nil
206
+ compute_basic_parameters
207
+ end
208
+ # Returns the polychoric correlation
209
+ attr_reader :r
210
+ # Returns the rows thresholds
211
+ attr_reader :alpha
212
+ # Returns the columns thresholds
213
+ attr_reader :beta
214
+
215
+ dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
216
+ dirty_memoize :r, :alpha, :beta
217
+
218
+ alias :threshold_x :alpha
219
+ alias :threshold_y :beta
220
+
221
+
222
+ # Start the computation of polychoric correlation
223
+ # based on attribute method
224
+ def compute
225
+ if @method==:two_step
226
+ compute_two_step_mle_drasgow
227
+ elsif @method==:joint
228
+ compute_one_step_mle
229
+ elsif @method==:polychoric_series
230
+ compute_polychoric_series
231
+ else
232
+ raise "Not implemented"
233
+ end
234
+ end
235
+ # Retrieve log likehood for actual data.
236
+ def loglike_data
237
+ loglike=0
238
+ @nr.times do |i|
239
+ @nc.times do |j|
240
+ res=@matrix[i,j].quo(@total)
241
+ if (res==0)
242
+ res=1e-16
243
+ end
244
+ loglike+= @matrix[i,j] * Math::log(res )
245
+ end
246
+ end
247
+ loglike
248
+ end
249
+
250
+ # Chi Square of model
251
+ def chi_square
252
+ if @loglike_model.nil?
253
+ compute
254
+ end
255
+ -2*(@loglike_model-loglike_data)
256
+ end
257
+
258
+ def chi_square_df
259
+ (@nr*@nc)-@nc-@nr
260
+ end
261
+
262
+
263
+
264
+
265
+ # Retrieve all cell probabilities for givens alpha, beta and rho
266
+ def cell_probabilities(alpha,beta,rho)
267
+ pd=@nr.times.collect{ [0] * @nc}
268
+ pc=@nr.times.collect{ [0] * @nc}
269
+ @nr.times do |i|
270
+ @nc.times do |j|
271
+
272
+ if i==@nr-1 and j==@nc-1
273
+ pd[i][j]=1.0
274
+ else
275
+ a=(i==@nr-1) ? 100: alpha[i]
276
+ b=(j==@nc-1) ? 100: beta[j]
277
+ #puts "a:#{a} b:#{b}"
278
+ pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
279
+ end
280
+ pc[i][j] = pd[i][j]
281
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
282
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
283
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
284
+ end
285
+ end
286
+ @pd=pd
287
+ pd
288
+ end
289
+ def loglike(alpha,beta,rho)
290
+ if rho.abs>0.9999
291
+ rho= (rho>0) ? 0.9999 : -0.9999
292
+ end
293
+ pr=Processor.new(alpha,beta,rho)
294
+ loglike=0
295
+
296
+
297
+ @nr.times do |i|
298
+ @nc.times do |j|
299
+ res=pr.pd[i][j]+EPSILON
300
+ loglike+= @matrix[i,j] * Math::log( res )
301
+ end
302
+ end
303
+ -loglike
304
+ end
305
+ # First derivate for rho
306
+ # Uses equation (9) from Olsson(1979)
307
+ def fd_loglike_rho(alpha,beta,rho)
308
+ if rho.abs>0.9999
309
+ rho= (rho>0) ? 0.9999 : -0.9999
310
+ end
311
+ total=0
312
+ pr=Processor.new(alpha,beta,rho)
313
+ @nr.times do |i|
314
+ @nc.times do |j|
315
+ pi=pr.pd[i][j] + EPSILON
316
+ total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
317
+ end
318
+ end
319
+ total
320
+ end
321
+
322
+ # First derivative for alpha_k
323
+ def fd_loglike_a(alpha,beta,rho,k)
324
+ fd_loglike_a_eq6(alpha,beta,rho,k)
325
+ end
326
+ # Uses equation (6) from Olsson(1979)
327
+ def fd_loglike_a_eq6(alpha,beta,rho,k)
328
+ if rho.abs>0.9999
329
+ rho= (rho>0) ? 0.9999 : -0.9999
330
+ end
331
+ pr=Processor.new(alpha,beta,rho)
332
+ total=0
333
+ pd=pr.pd
334
+ @nr.times do |i|
335
+ @nc.times do |j|
336
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
337
+ end
338
+ end
339
+ total
340
+ end
341
+ # Uses equation(13) from Olsson(1979)
342
+ def fd_loglike_a_eq13(alpha,beta,rho,k)
343
+ if rho.abs>0.9999
344
+ rho= (rho>0) ? 0.9999 : -0.9999
345
+ end
346
+ pr=Processor.new(alpha,beta,rho)
347
+ total=0
348
+ a_k=pr.a(k)
349
+ pd=pr.pd
350
+ @nc.times do |j|
351
+ #puts "j: #{j}"
352
+ #puts "b #{j} : #{b.call(j)}"
353
+ #puts "b #{j-1} : #{b.call(j-1)}"
354
+
355
+ e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
356
+ e_2=Distribution::Normal.pdf(a_k)
357
+ e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
358
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
359
+
360
+ total+= e_1*e_2*e_3
361
+ end
362
+ total
363
+ end
364
+ # First derivative for beta_m
365
+ # Uses equation(14) from Olsson(1979)
366
+ def fd_loglike_b(alpha,beta,rho,m)
367
+ if rho.abs>0.9999
368
+ rho= (rho>0) ? 0.9999 : -0.9999
369
+ end
370
+ pr=Processor.new(alpha,beta,rho)
371
+ total=0
372
+ b_m=pr.b m
373
+ pd=pr.pd
374
+ @nr.times do |i|
375
+ #puts "j: #{j}"
376
+ #puts "b #{j} : #{b.call(j)}"
377
+ #puts "b #{j-1} : #{b.call(j-1)}"
378
+
379
+ e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
380
+ e_2=Distribution::Normal.pdf(b_m)
381
+ e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
382
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
383
+
384
+ total+= e_1*e_2*e_3
385
+ end
386
+ total
387
+ end
388
+
389
+
390
+ def compute_basic_parameters
391
+ @nr=@matrix.row_size
392
+ @nc=@matrix.column_size
393
+ @sumr=[0]*@matrix.row_size
394
+ @sumrac=[0]*@matrix.row_size
395
+ @sumc=[0]*@matrix.column_size
396
+ @sumcac=[0]*@matrix.column_size
397
+ @alpha=[0]*(@nr-1)
398
+ @beta=[0]*(@nc-1)
399
+ @total=0
400
+ @nr.times do |i|
401
+ @nc.times do |j|
402
+ @sumr[i]+=@matrix[i,j]
403
+ @sumc[j]+=@matrix[i,j]
404
+ @total+=@matrix[i,j]
405
+ end
406
+ end
407
+ ac=0
408
+ (@nr-1).times do |i|
409
+ @sumrac[i]=@sumr[i]+ac
410
+ @alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
411
+ ac=@sumrac[i]
412
+ end
413
+ ac=0
414
+ (@nc-1).times do |i|
415
+ @sumcac[i]=@sumc[i]+ac
416
+ @beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
417
+ ac=@sumcac[i]
418
+ end
419
+ end
420
+
421
+
422
+ # Computation of polychoric correlation usign two-step ML estimation.
423
+ #
424
+ # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
425
+ #
426
+ # The algorithm is based on code by Gegenfurtner(1992).
427
+ #
428
+ # <b>References</b>:
429
+ # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
430
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
431
+ #
432
+ def compute_two_step_mle_drasgow
433
+ if Statsample.has_gsl?
434
+ compute_two_step_mle_drasgow_gsl
435
+ else
436
+ compute_two_step_mle_drasgow_ruby
437
+ end
438
+ end
439
+
440
+ # Depends on minimization algorithm.
441
+
442
+ def compute_two_step_mle_drasgow_ruby #:nodoc:
443
+
444
+ f=proc {|rho|
445
+ loglike(@alpha,@beta, rho)
446
+ }
447
+ @log=_("Minimizing using GSL Brent method\n")
448
+ min=Minimization::Brent.new(-0.9999,0.9999,f)
449
+ min.epsilon=@epsilon
450
+ min.expected=0
451
+ min.iterate
452
+ @log+=min.log.to_table.to_s
453
+ @r=min.x_minimum
454
+ @loglike_model=-min.f_minimum
455
+ puts @log if @debug
456
+
457
+ end
458
+
459
+
460
+ def compute_two_step_mle_drasgow_gsl #:nodoc:
461
+
462
+ fn1=GSL::Function.alloc {|rho|
463
+ loglike(@alpha,@beta, rho)
464
+ }
465
+ @iteration = 0
466
+ max_iter = @max_iterations
467
+ m = 0 # initial guess
468
+ m_expected = 0
469
+ a=-0.9999
470
+ b=+0.9999
471
+ gmf = GSL::Min::FMinimizer.alloc(@minimizer_type_two_step)
472
+ gmf.set(fn1, m, a, b)
473
+ header=_("Two step minimization using %s method\n") % gmf.name
474
+ header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
475
+ "err", "err(est)")
476
+
477
+ header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
478
+ @log=header
479
+ puts header if @debug
480
+ begin
481
+ @iteration += 1
482
+ status = gmf.iterate
483
+ status = gmf.test_interval(@epsilon, 0.0)
484
+
485
+ if status == GSL::SUCCESS
486
+ @log+="converged:"
487
+ puts "converged:" if @debug
488
+ end
489
+ a = gmf.x_lower
490
+ b = gmf.x_upper
491
+ m = gmf.x_minimum
492
+ message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
493
+ @iteration, a, b, m, m - m_expected, b - a);
494
+ @log+=message
495
+ puts message if @debug
496
+ end while status == GSL::CONTINUE and @iteration < @max_iterations
497
+ @r=gmf.x_minimum
498
+ @loglike_model=-gmf.f_minimum
499
+ end
500
+
501
+ # Compute Polychoric correlation with joint estimate.
502
+ # Rho and thresholds are estimated at same time.
503
+ # Code based on R package "polycor", by J.Fox.
504
+ #
505
+
506
+ def compute_one_step_mle
507
+ # Get initial values with two-step aproach
508
+ compute_two_step_mle_drasgow
509
+ # Start iteration with past values
510
+ rho=@r
511
+ cut_alpha=@alpha
512
+ cut_beta=@beta
513
+ parameters=[rho]+cut_alpha+cut_beta
514
+ minimization = Proc.new { |v, params|
515
+ rho=v[0]
516
+ alpha=v[1, @nr-1]
517
+ beta=v[@nr, @nc-1]
518
+
519
+ #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
520
+ #(@nr-1).times {|k|
521
+ # puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
522
+ # puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
523
+ #
524
+ #}
525
+ #(@nc-1).times {|k|
526
+ # puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
527
+ #}
528
+
529
+ loglike(alpha,beta,rho)
530
+ }
531
+ np=@nc-1+@nr
532
+ my_func = GSL::MultiMin::Function.alloc(minimization, np)
533
+ my_func.set_params(parameters) # parameters
534
+
535
+ x = GSL::Vector.alloc(parameters.dup)
536
+
537
+ ss = GSL::Vector.alloc(np)
538
+ ss.set_all(1.0)
539
+
540
+ minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
541
+ minimizer.set(my_func, x, ss)
542
+
543
+ iter = 0
544
+ message=""
545
+ begin
546
+ iter += 1
547
+ status = minimizer.iterate()
548
+ status = minimizer.test_size(@epsilon)
549
+ if status == GSL::SUCCESS
550
+ message="Joint MLE converged to minimum at\n"
551
+ end
552
+ x = minimizer.x
553
+ message+= sprintf("%5d iterations", iter)+"\n";
554
+ for i in 0...np do
555
+ message+=sprintf("%10.3e ", x[i])
556
+ end
557
+ message+=sprintf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)+"\n";
558
+ end while status == GSL::CONTINUE and iter < @max_iterations
559
+ @iteration=iter
560
+ @log+=message
561
+ @r=minimizer.x[0]
562
+ @alpha=minimizer.x[1,@nr-1].to_a
563
+ @beta=minimizer.x[@nr,@nc-1].to_a
564
+ @loglike_model= -minimizer.minimum
565
+ end
566
+
567
+ def matrix_for_rho(rho) # :nodoc:
568
+ pd=@nr.times.collect{ [0]*@nc}
569
+ pc=@nr.times.collect{ [0]*@nc}
570
+ @nr.times { |i|
571
+ @nc.times { |j|
572
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
573
+ pc[i][j] = pd[i][j]
574
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
575
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
576
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
577
+ res= pd[i][j]
578
+ }
579
+ }
580
+ Matrix.rows(pc)
581
+ end
582
+
583
+ def expected # :nodoc:
584
+ rt=[]
585
+ ct=[]
586
+ t=0
587
+ @matrix.row_size.times {|i|
588
+ @matrix.column_size.times {|j|
589
+ rt[i]=0 if rt[i].nil?
590
+ ct[j]=0 if ct[j].nil?
591
+ rt[i]+=@matrix[i,j]
592
+ ct[j]+=@matrix[i,j]
593
+ t+=@matrix[i,j]
594
+ }
595
+ }
596
+ m=[]
597
+ @matrix.row_size.times {|i|
598
+ row=[]
599
+ @matrix.column_size.times {|j|
600
+ row[j]=(rt[i]*ct[j]).quo(t)
601
+ }
602
+ m.push(row)
603
+ }
604
+
605
+ Matrix.rows(m)
606
+ end
607
+
608
+ # Compute polychoric correlation using polychoric series.
609
+ # Algorithm: AS87, by Martinson and Hamdam(1975).
610
+ #
611
+ # <b>Warning</b>: According to Drasgow(2006), this
612
+ # computation diverges greatly of joint and two-step methods.
613
+ #
614
+ def compute_polychoric_series
615
+ @nn=@n-1
616
+ @mm=@m-1
617
+ @nn7=7*@nn
618
+ @mm7=7*@mm
619
+ @mn=@n*@m
620
+ @cont=[nil]
621
+ @n.times {|j|
622
+ @m.times {|i|
623
+ @cont.push(@matrix[i,j])
624
+ }
625
+ }
626
+
627
+ pcorl=0
628
+ cont=@cont
629
+ xmean=0.0
630
+ sum=0.0
631
+ row=[]
632
+ colmn=[]
633
+ (1..@m).each do |i|
634
+ row[i]=0.0
635
+ l=i
636
+ (1..@n).each do |j|
637
+ row[i]=row[i]+cont[l]
638
+ l+=@m
639
+ end
640
+ raise "Should not be empty rows" if(row[i]==0.0)
641
+ xmean=xmean+row[i]*i.to_f
642
+ sum+=row[i]
643
+ end
644
+ xmean=xmean/sum.to_f
645
+ ymean=0.0
646
+ (1..@n).each do |j|
647
+ colmn[j]=0.0
648
+ l=(j-1)*@m
649
+ (1..@m).each do |i|
650
+ l=l+1
651
+ colmn[j]=colmn[j]+cont[l] #12
652
+ end
653
+ raise "Should not be empty cols" if colmn[j]==0
654
+ ymean=ymean+colmn[j]*j.to_f
655
+ end
656
+ ymean=ymean/sum.to_f
657
+ covxy=0.0
658
+ (1..@m).each do |i|
659
+ l=i
660
+ (1..@n).each do |j|
661
+ conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
662
+ l=l+@m
663
+ end
664
+ end
665
+
666
+ chisq=0.0
667
+ (1..@m).each do |i|
668
+ l=i
669
+ (1..@n).each do |j|
670
+ chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
671
+ l=l+@m
672
+ end
673
+ end
674
+
675
+ phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
676
+ phisq=0 if(phisq<0.0)
677
+ # Compute cumulative sum of columns and rows
678
+ sumc=[]
679
+ sumr=[]
680
+ sumc[1]=colmn[1]
681
+ sumr[1]=row[1]
682
+ cum=0
683
+ (1..@nn).each do |i| # goto 17 r20
684
+ cum=cum+colmn[i]
685
+ sumc[i]=cum
686
+ end
687
+ cum=0
688
+ (1..@mm).each do |i|
689
+ cum=cum+row[i]
690
+ sumr[i]=cum
691
+ end
692
+ alpha=[]
693
+ beta=[]
694
+ # Compute points of polytomy
695
+ (1..@mm).each do |i| #do 21
696
+ alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
697
+ end # 21
698
+ (1..@nn).each do |i| #do 22
699
+ beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
700
+ end # 21
701
+ @alpha=alpha[1,alpha.size]
702
+ @beta=beta[1,beta.size]
703
+ @sumr=row[1,row.size]
704
+ @sumc=colmn[1,colmn.size]
705
+ @total=sum
706
+
707
+ # Compute Fourier coefficients a and b. Verified
708
+ h=hermit(alpha,@mm)
709
+ hh=hermit(beta,@nn)
710
+ a=[]
711
+ b=[]
712
+ if @m!=2 # goto 24
713
+ mmm=@m-2
714
+ (1..mmm).each do |i| #do 23
715
+ a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
716
+ a2=sumr[i] * xnorm(alpha[i+1])
717
+ a3=sumr[i+1] * xnorm(alpha[i])
718
+ l=i
719
+ (1..7).each do |j| #do 23
720
+ a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
721
+ l=l+@mm
722
+ end
723
+ end #23
724
+ end
725
+ # 24
726
+
727
+
728
+ if @n!=2 # goto 26
729
+ nnn=@n-2
730
+ (1..nnn).each do |i| #do 25
731
+ a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
732
+ a2=sumc[i] * xnorm(beta[i+1])
733
+ a3=sumc[i+1] * xnorm(beta[i])
734
+ l=i
735
+ (1..7).each do |j| #do 25
736
+ b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
737
+ l=l+@nn
738
+ end # 25
739
+ end # 25
740
+ end
741
+ #26 r20
742
+ l = @mm
743
+ a1 = -sum * xnorm(alpha[@mm])
744
+ a2 = row[@m] * sumr[@mm]
745
+ (1..7).each do |j| # do 27
746
+ a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
747
+ l=l+@mm
748
+ end # 27
749
+
750
+ l = @nn
751
+ a1 = -sum * xnorm(beta[@nn])
752
+ a2 = colmn[@n] * sumc[@nn]
753
+
754
+ (1..7).each do |j| # do 28
755
+ b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
756
+ l = l + @nn
757
+ end # 28
758
+ rcof=[]
759
+ # compute coefficients rcof of polynomial of order 8
760
+ rcof[1]=-phisq
761
+ (2..9).each do |i| # do 30
762
+ rcof[i]=0.0
763
+ end #30
764
+ m1=@mm
765
+ (1..@mm).each do |i| # do 31
766
+ m1=m1+1
767
+ m2=m1+@mm
768
+ m3=m2+@mm
769
+ m4=m3+@mm
770
+ m5=m4+@mm
771
+ m6=m5+@mm
772
+ n1=@nn
773
+ (1..@nn).each do |j| # do 31
774
+ n1=n1+1
775
+ n2=n1+@nn
776
+ n3=n2+@nn
777
+ n4=n3+@nn
778
+ n5=n4+@nn
779
+ n6=n5+@nn
780
+
781
+ rcof[3] = rcof[3] + a[i]**2 * b[j]**2
782
+
783
+ rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
784
+
785
+ rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
786
+ 2.0 * a[i] * a[m2] * b[j] * b[n2]
787
+
788
+ rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
789
+ b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
790
+
791
+ rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
792
+ 2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
793
+ b[n1] * b[n3])
794
+
795
+ rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
796
+ a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
797
+
798
+ rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
799
+ 2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
800
+ b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
801
+ end # 31
802
+ end # 31
803
+
804
+ rcof=rcof[1,rcof.size]
805
+ poly = GSL::Poly.alloc(rcof)
806
+ roots=poly.solve
807
+ rootr=[nil]
808
+ rooti=[nil]
809
+ roots.each {|c|
810
+ rootr.push(c.real)
811
+ rooti.push(c.im)
812
+ }
813
+ @rootr=rootr
814
+ @rooti=rooti
815
+
816
+ norts=0
817
+ (1..7).each do |i| # do 43
818
+
819
+ next if rooti[i]!=0.0
820
+ if (covxy>=0.0)
821
+ next if(rootr[i]<0.0 or rootr[i]>1.0)
822
+ pcorl=rootr[i]
823
+ norts=norts+1
824
+ else
825
+ if (rootr[i]>=-1.0 and rootr[i]<0.0)
826
+ pcorl=rootr[i]
827
+ norts=norts+1
828
+ end
829
+ end
830
+ end # 43
831
+ raise "Error" if norts==0
832
+ @r=pcorl
833
+
834
+ @loglike_model=-loglike(@alpha, @beta, @r)
835
+
836
+ end
837
+ #Computes vector h(mm7) of orthogonal hermite...
838
+ def hermit(s,k) # :nodoc:
839
+ h=[]
840
+ (1..k).each do |i| # do 14
841
+ l=i
842
+ ll=i+k
843
+ lll=ll+k
844
+ h[i]=1.0
845
+ h[ll]=s[i]
846
+ v=1.0
847
+ (2..6).each do |j| #do 14
848
+ w=Math::sqrt(j)
849
+ h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
850
+ v=w
851
+ l=l+k
852
+ ll=ll+k
853
+ lll=lll+k
854
+ end
855
+ end
856
+ h
857
+ end
858
+ def xnorm(t) # :nodoc:
859
+ Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
860
+ end
861
+
862
+ def report_building(generator) # :nodoc:
863
+ compute if dirty?
864
+ section=ReportBuilder::Section.new(:name=>@name)
865
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
866
+ @m.times do |i|
867
+ t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
868
+ end
869
+ t.hr
870
+ t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
871
+ section.add(t)
872
+ section.add(sprintf("r: %0.4f",r))
873
+ t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
874
+ threshold_x.each_with_index {|val,i|
875
+ t.row([_("Threshold X %d") % i, sprintf("%0.4f", val)])
876
+ }
877
+ threshold_y.each_with_index {|val,i|
878
+ t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
879
+ }
880
+ section.add(t)
881
+ section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
882
+ generator.parse_element(section)
883
+ end
884
+ end
885
+ end
886
+ end