statsample-bivariate-extension 0.13.2 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +7 -0
- data/Manifest.txt +3 -0
- data/Rakefile +10 -0
- data/lib/statsample/bivariate/extension_version.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +118 -200
- data/lib/statsample/bivariate/polychoric/processor.rb +261 -0
- data/spec/spec.opts +3 -0
- data/spec/statsample/bivariate/polychoric_processor_spec.rb +47 -0
- data/spec/statsample/bivariate/polychoric_spec.rb +38 -30
- metadata +6 -3
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.13.3 / 2010-06-22
|
2
|
+
|
3
|
+
* Bug fix on Processor.fd_loglike_cell_rho.
|
4
|
+
* Chi square added on spec.
|
5
|
+
* Olsson (1979) added on references
|
6
|
+
* Derivative version of joint estimate operational
|
7
|
+
|
1
8
|
=== 0.13.1 / 2010-06-21
|
2
9
|
* Better specs. Bug fix on Manifest.txt
|
3
10
|
|
data/Manifest.txt
CHANGED
@@ -6,7 +6,10 @@ data/tetmat_matrix.txt
|
|
6
6
|
data/tetmat_test.txt
|
7
7
|
lib/statsample/bivariate/extension_version.rb
|
8
8
|
lib/statsample/bivariate/polychoric.rb
|
9
|
+
lib/statsample/bivariate/polychoric/processor.rb
|
9
10
|
lib/statsample/bivariate/tetrachoric.rb
|
11
|
+
spec/spec.opts
|
10
12
|
spec/spec_helper.rb
|
13
|
+
spec/statsample/bivariate/polychoric_processor_spec.rb
|
11
14
|
spec/statsample/bivariate/polychoric_spec.rb
|
12
15
|
spec/statsample/bivariate/tetrachoric_spec.rb
|
data/Rakefile
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# -*- ruby -*-
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
|
+
require 'spec/rake/spectask'
|
5
6
|
require 'hoe'
|
6
7
|
|
7
8
|
Hoe.plugin :git
|
@@ -15,4 +16,13 @@ Hoe.spec 'statsample-bivariate-extension' do
|
|
15
16
|
self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
|
16
17
|
end
|
17
18
|
|
19
|
+
|
20
|
+
|
21
|
+
desc "Run all spec with RCov"
|
22
|
+
Spec::Rake::SpecTask.new('test_with_rcov') do |t|
|
23
|
+
t.spec_files = FileList['spec/**/*.rb']
|
24
|
+
t.rcov = true
|
25
|
+
t.rcov_opts = ['--exclude', 'spec']
|
26
|
+
end
|
27
|
+
|
18
28
|
# vim: syntax=ruby
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'minimization'
|
2
|
+
require 'statsample/bivariate/polychoric/processor'
|
2
3
|
module Statsample
|
3
4
|
module Bivariate
|
4
5
|
# Calculate Polychoric correlation for two vectors.
|
@@ -44,11 +45,11 @@ module Statsample
|
|
44
45
|
# the polychoric correlation:
|
45
46
|
#
|
46
47
|
# 1. Maximum Likehood Estimator
|
47
|
-
# 2. Two-step estimator
|
48
|
+
# 2. Two-step estimator
|
48
49
|
# 3. Polychoric series estimate.
|
49
50
|
#
|
50
51
|
# By default, two-step estimation are used. You can select
|
51
|
-
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
|
52
|
+
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
|
52
53
|
#
|
53
54
|
# == Use
|
54
55
|
#
|
@@ -71,69 +72,13 @@ module Statsample
|
|
71
72
|
#
|
72
73
|
# == References
|
73
74
|
#
|
74
|
-
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
75
75
|
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
76
|
+
# * Olsson, U. (1979) Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika 44, 443-460.
|
77
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
78
|
+
|
76
79
|
|
77
80
|
class Polychoric
|
78
81
|
include Summarizable
|
79
|
-
class Processor
|
80
|
-
attr_reader :alpha, :beta, :rho
|
81
|
-
def initialize(alpha,beta,rho)
|
82
|
-
@alpha=alpha
|
83
|
-
@beta=beta
|
84
|
-
@nr=@alpha.size+1
|
85
|
-
@nc=@beta.size+1
|
86
|
-
@rho=rho
|
87
|
-
@pd=nil
|
88
|
-
end
|
89
|
-
def bipdf(i,j)
|
90
|
-
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
91
|
-
end
|
92
|
-
def a(i)
|
93
|
-
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
94
|
-
end
|
95
|
-
def b(j)
|
96
|
-
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
97
|
-
end
|
98
|
-
# Equation(10) from Olsson(1979)
|
99
|
-
def fd_loglike_cell_a(i,j,k)
|
100
|
-
if k==i
|
101
|
-
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
102
|
-
elsif k==(i-1)
|
103
|
-
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
104
|
-
else
|
105
|
-
0
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
# phi_ij for each i and j
|
110
|
-
# Uses equation(4) from Olsson(1979)
|
111
|
-
def pd
|
112
|
-
if @pd.nil?
|
113
|
-
@pd=@nr.times.collect{ [0] * @nc}
|
114
|
-
pc=@nr.times.collect{ [0] * @nc}
|
115
|
-
@nr.times do |i|
|
116
|
-
@nc.times do |j|
|
117
|
-
|
118
|
-
if i==@nr-1 and j==@nc-1
|
119
|
-
@pd[i][j]=1.0
|
120
|
-
else
|
121
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
122
|
-
b=(j==@nc-1) ? 100: beta[j]
|
123
|
-
#puts "a:#{a} b:#{b}"
|
124
|
-
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
125
|
-
end
|
126
|
-
pc[i][j] = @pd[i][j]
|
127
|
-
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
128
|
-
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
129
|
-
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
@pd
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
82
|
include DirtyMemoize
|
138
83
|
# Name of the analysis
|
139
84
|
attr_accessor :name
|
@@ -258,133 +203,8 @@ module Statsample
|
|
258
203
|
def chi_square_df
|
259
204
|
(@nr*@nc)-@nc-@nr
|
260
205
|
end
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
# Retrieve all cell probabilities for givens alpha, beta and rho
|
266
|
-
def cell_probabilities(alpha,beta,rho)
|
267
|
-
pd=@nr.times.collect{ [0] * @nc}
|
268
|
-
pc=@nr.times.collect{ [0] * @nc}
|
269
|
-
@nr.times do |i|
|
270
|
-
@nc.times do |j|
|
271
|
-
|
272
|
-
if i==@nr-1 and j==@nc-1
|
273
|
-
pd[i][j]=1.0
|
274
|
-
else
|
275
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
276
|
-
b=(j==@nc-1) ? 100: beta[j]
|
277
|
-
#puts "a:#{a} b:#{b}"
|
278
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
279
|
-
end
|
280
|
-
pc[i][j] = pd[i][j]
|
281
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
282
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
283
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
284
|
-
end
|
285
|
-
end
|
286
|
-
@pd=pd
|
287
|
-
pd
|
288
|
-
end
|
289
|
-
def loglike(alpha,beta,rho)
|
290
|
-
if rho.abs>0.9999
|
291
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
292
|
-
end
|
293
|
-
pr=Processor.new(alpha,beta,rho)
|
294
|
-
loglike=0
|
295
|
-
|
296
|
-
|
297
|
-
@nr.times do |i|
|
298
|
-
@nc.times do |j|
|
299
|
-
res=pr.pd[i][j]+EPSILON
|
300
|
-
loglike+= @matrix[i,j] * Math::log( res )
|
301
|
-
end
|
302
|
-
end
|
303
|
-
-loglike
|
304
|
-
end
|
305
|
-
# First derivate for rho
|
306
|
-
# Uses equation (9) from Olsson(1979)
|
307
|
-
def fd_loglike_rho(alpha,beta,rho)
|
308
|
-
if rho.abs>0.9999
|
309
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
310
|
-
end
|
311
|
-
total=0
|
312
|
-
pr=Processor.new(alpha,beta,rho)
|
313
|
-
@nr.times do |i|
|
314
|
-
@nc.times do |j|
|
315
|
-
pi=pr.pd[i][j] + EPSILON
|
316
|
-
total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
|
317
|
-
end
|
318
|
-
end
|
319
|
-
total
|
320
|
-
end
|
321
206
|
|
322
|
-
|
323
|
-
def fd_loglike_a(alpha,beta,rho,k)
|
324
|
-
fd_loglike_a_eq6(alpha,beta,rho,k)
|
325
|
-
end
|
326
|
-
# Uses equation (6) from Olsson(1979)
|
327
|
-
def fd_loglike_a_eq6(alpha,beta,rho,k)
|
328
|
-
if rho.abs>0.9999
|
329
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
330
|
-
end
|
331
|
-
pr=Processor.new(alpha,beta,rho)
|
332
|
-
total=0
|
333
|
-
pd=pr.pd
|
334
|
-
@nr.times do |i|
|
335
|
-
@nc.times do |j|
|
336
|
-
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
|
337
|
-
end
|
338
|
-
end
|
339
|
-
total
|
340
|
-
end
|
341
|
-
# Uses equation(13) from Olsson(1979)
|
342
|
-
def fd_loglike_a_eq13(alpha,beta,rho,k)
|
343
|
-
if rho.abs>0.9999
|
344
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
345
|
-
end
|
346
|
-
pr=Processor.new(alpha,beta,rho)
|
347
|
-
total=0
|
348
|
-
a_k=pr.a(k)
|
349
|
-
pd=pr.pd
|
350
|
-
@nc.times do |j|
|
351
|
-
#puts "j: #{j}"
|
352
|
-
#puts "b #{j} : #{b.call(j)}"
|
353
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
354
|
-
|
355
|
-
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
356
|
-
e_2=Distribution::Normal.pdf(a_k)
|
357
|
-
e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
358
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
359
|
-
|
360
|
-
total+= e_1*e_2*e_3
|
361
|
-
end
|
362
|
-
total
|
363
|
-
end
|
364
|
-
# First derivative for beta_m
|
365
|
-
# Uses equation(14) from Olsson(1979)
|
366
|
-
def fd_loglike_b(alpha,beta,rho,m)
|
367
|
-
if rho.abs>0.9999
|
368
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
369
|
-
end
|
370
|
-
pr=Processor.new(alpha,beta,rho)
|
371
|
-
total=0
|
372
|
-
b_m=pr.b m
|
373
|
-
pd=pr.pd
|
374
|
-
@nr.times do |i|
|
375
|
-
#puts "j: #{j}"
|
376
|
-
#puts "b #{j} : #{b.call(j)}"
|
377
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
378
|
-
|
379
|
-
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
380
|
-
e_2=Distribution::Normal.pdf(b_m)
|
381
|
-
e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
382
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
383
|
-
|
384
|
-
total+= e_1*e_2*e_3
|
385
|
-
end
|
386
|
-
total
|
387
|
-
end
|
207
|
+
|
388
208
|
|
389
209
|
|
390
210
|
def compute_basic_parameters
|
@@ -442,7 +262,8 @@ module Statsample
|
|
442
262
|
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
443
263
|
|
444
264
|
f=proc {|rho|
|
445
|
-
|
265
|
+
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
266
|
+
pr.loglike
|
446
267
|
}
|
447
268
|
@log=_("Minimizing using GSL Brent method\n")
|
448
269
|
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
@@ -459,8 +280,9 @@ module Statsample
|
|
459
280
|
|
460
281
|
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
461
282
|
|
462
|
-
fn1=GSL::Function.alloc {|rho|
|
463
|
-
|
283
|
+
fn1=GSL::Function.alloc {|rho|
|
284
|
+
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
285
|
+
pr.loglike
|
464
286
|
}
|
465
287
|
@iteration = 0
|
466
288
|
max_iter = @max_iterations
|
@@ -498,12 +320,101 @@ module Statsample
|
|
498
320
|
@loglike_model=-gmf.f_minimum
|
499
321
|
end
|
500
322
|
|
323
|
+
|
324
|
+
def compute_derivatives_vector(v,df)
|
325
|
+
new_rho=v[0]
|
326
|
+
new_alpha=v[1, @nr-1]
|
327
|
+
new_beta=v[@nr, @nc-1]
|
328
|
+
if new_rho.abs>0.9999
|
329
|
+
new_rho= (new_rho>0) ? 0.9999 : -0.9999
|
330
|
+
end
|
331
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
332
|
+
|
333
|
+
df[0]=-pr.fd_loglike_rho
|
334
|
+
new_alpha.to_a.each_with_index {|v,i|
|
335
|
+
df[i+1]=-pr.fd_loglike_a(i)
|
336
|
+
}
|
337
|
+
offset=new_alpha.size+1
|
338
|
+
new_beta.to_a.each_with_index {|v,i|
|
339
|
+
df[offset+i]=-pr.fd_loglike_b(i)
|
340
|
+
}
|
341
|
+
end
|
342
|
+
|
343
|
+
def compute_one_step_mle
|
344
|
+
compute_one_step_mle_with_derivatives
|
345
|
+
end
|
346
|
+
|
347
|
+
|
348
|
+
def compute_one_step_mle_with_derivatives
|
349
|
+
# Get initial values with two-step aproach
|
350
|
+
compute_two_step_mle_drasgow
|
351
|
+
# Start iteration with past values
|
352
|
+
rho=@r
|
353
|
+
cut_alpha=@alpha
|
354
|
+
cut_beta=@beta
|
355
|
+
parameters=[rho]+cut_alpha+cut_beta
|
356
|
+
np=@nc-1+@nr
|
357
|
+
|
358
|
+
|
359
|
+
loglike_f = Proc.new { |v, params|
|
360
|
+
new_rho=v[0]
|
361
|
+
new_alpha=v[1, @nr-1]
|
362
|
+
new_beta=v[@nr, @nc-1]
|
363
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
364
|
+
pr.loglike
|
365
|
+
}
|
366
|
+
|
367
|
+
loglike_df = Proc.new {|v, params, df |
|
368
|
+
compute_derivatives_vector(v,df)
|
369
|
+
}
|
370
|
+
|
371
|
+
|
372
|
+
my_func = GSL::MultiMin::Function_fdf.alloc(loglike_f,loglike_df, np)
|
373
|
+
my_func.set_params(parameters) # parameters
|
374
|
+
|
375
|
+
x = GSL::Vector.alloc(parameters.dup)
|
376
|
+
minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
|
377
|
+
minimizer.set(my_func, x, 1, 1e-3)
|
378
|
+
|
379
|
+
iter = 0
|
380
|
+
message=""
|
381
|
+
begin_time=Time.new
|
382
|
+
begin
|
383
|
+
iter += 1
|
384
|
+
status = minimizer.iterate()
|
385
|
+
#p minimizer.f
|
386
|
+
#p minimizer.gradient
|
387
|
+
status = minimizer.test_gradient(1e-3)
|
388
|
+
if status == GSL::SUCCESS
|
389
|
+
total_time=Time.new-begin_time
|
390
|
+
message+="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
|
391
|
+
end
|
392
|
+
x = minimizer.x
|
393
|
+
message+= sprintf("%5d iterations", iter)+"\n";
|
394
|
+
message+= "args="
|
395
|
+
for i in 0...np do
|
396
|
+
message+=sprintf("%10.3e ", x[i])
|
397
|
+
end
|
398
|
+
message+=sprintf("f() = %7.3f\n" , minimizer.f)+"\n";
|
399
|
+
end while status == GSL::CONTINUE and iter < @max_iterations
|
400
|
+
|
401
|
+
@iteration=iter
|
402
|
+
@log+=message
|
403
|
+
@r=minimizer.x[0]
|
404
|
+
@alpha=minimizer.x[1,@nr-1].to_a
|
405
|
+
@beta=minimizer.x[@nr,@nc-1].to_a
|
406
|
+
@loglike_model= -minimizer.minimum
|
407
|
+
|
408
|
+
pr=Processor.new(@alpha,@beta,@r,@matrix)
|
409
|
+
|
410
|
+
end
|
411
|
+
|
501
412
|
# Compute Polychoric correlation with joint estimate.
|
502
413
|
# Rho and thresholds are estimated at same time.
|
503
414
|
# Code based on R package "polycor", by J.Fox.
|
504
415
|
#
|
505
416
|
|
506
|
-
def
|
417
|
+
def compute_one_step_mle_without_derivatives
|
507
418
|
# Get initial values with two-step aproach
|
508
419
|
compute_two_step_mle_drasgow
|
509
420
|
# Start iteration with past values
|
@@ -511,10 +422,12 @@ module Statsample
|
|
511
422
|
cut_alpha=@alpha
|
512
423
|
cut_beta=@beta
|
513
424
|
parameters=[rho]+cut_alpha+cut_beta
|
425
|
+
np=@nc-1+@nr
|
426
|
+
|
514
427
|
minimization = Proc.new { |v, params|
|
515
|
-
|
516
|
-
|
517
|
-
|
428
|
+
new_rho=v[0]
|
429
|
+
new_alpha=v[1, @nr-1]
|
430
|
+
new_beta=v[@nr, @nc-1]
|
518
431
|
|
519
432
|
#puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
|
520
433
|
#(@nr-1).times {|k|
|
@@ -525,10 +438,12 @@ module Statsample
|
|
525
438
|
#(@nc-1).times {|k|
|
526
439
|
# puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
|
527
440
|
#}
|
441
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
528
442
|
|
529
|
-
|
443
|
+
df=Array.new(np)
|
444
|
+
#compute_derivatives_vector(v,df)
|
445
|
+
pr.loglike
|
530
446
|
}
|
531
|
-
np=@nc-1+@nr
|
532
447
|
my_func = GSL::MultiMin::Function.alloc(minimization, np)
|
533
448
|
my_func.set_params(parameters) # parameters
|
534
449
|
|
@@ -542,12 +457,14 @@ module Statsample
|
|
542
457
|
|
543
458
|
iter = 0
|
544
459
|
message=""
|
460
|
+
begin_time=Time.new
|
545
461
|
begin
|
546
462
|
iter += 1
|
547
463
|
status = minimizer.iterate()
|
548
464
|
status = minimizer.test_size(@epsilon)
|
549
465
|
if status == GSL::SUCCESS
|
550
|
-
|
466
|
+
total_time=Time.new-begin_time
|
467
|
+
message="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
|
551
468
|
end
|
552
469
|
x = minimizer.x
|
553
470
|
message+= sprintf("%5d iterations", iter)+"\n";
|
@@ -830,8 +747,8 @@ module Statsample
|
|
830
747
|
end # 43
|
831
748
|
raise "Error" if norts==0
|
832
749
|
@r=pcorl
|
833
|
-
|
834
|
-
@loglike_model=-loglike
|
750
|
+
pr=Processor.new(@alpha,@beta,@r,@matrix)
|
751
|
+
@loglike_model=-pr.loglike
|
835
752
|
|
836
753
|
end
|
837
754
|
#Computes vector h(mm7) of orthogonal hermite...
|
@@ -878,7 +795,8 @@ module Statsample
|
|
878
795
|
t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
|
879
796
|
}
|
880
797
|
section.add(t)
|
881
|
-
section.add(_("
|
798
|
+
section.add(_("Iterations: %d") % @iteration)
|
799
|
+
section.add(_("Test of bivariate normality: X^2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
|
882
800
|
generator.parse_element(section)
|
883
801
|
end
|
884
802
|
end
|
@@ -0,0 +1,261 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Bivariate
|
3
|
+
class Polychoric
|
4
|
+
# Provides statistics for a given combination of rho, alpha and beta and contingence table.
|
5
|
+
class Processor
|
6
|
+
attr_reader :alpha, :beta, :rho, :matrix
|
7
|
+
EPSILON=1e-10
|
8
|
+
def initialize(alpha,beta,rho,matrix=nil)
|
9
|
+
@alpha=alpha
|
10
|
+
@beta=beta
|
11
|
+
@matrix=matrix
|
12
|
+
@nr=@alpha.size+1
|
13
|
+
@nc=@beta.size+1
|
14
|
+
@rho=rho
|
15
|
+
@pd=nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def bipdf(i,j)
|
19
|
+
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
20
|
+
end
|
21
|
+
|
22
|
+
def loglike
|
23
|
+
rho=@rho
|
24
|
+
if rho.abs>0.9999
|
25
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
26
|
+
end
|
27
|
+
loglike=0
|
28
|
+
@nr.times do |i|
|
29
|
+
@nc.times do |j|
|
30
|
+
res=pd[i][j]+EPSILON
|
31
|
+
loglike+= @matrix[i,j] * Math::log( res )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
-loglike
|
35
|
+
end
|
36
|
+
|
37
|
+
def a(i)
|
38
|
+
raise "Index #{i} should be <= #{@nr-1}" if i>@nr-1
|
39
|
+
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
40
|
+
end
|
41
|
+
def b(j)
|
42
|
+
raise "Index #{j} should be <= #{@nc-1}" if j>@nc-1
|
43
|
+
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
44
|
+
end
|
45
|
+
|
46
|
+
def eq12(u,v)
|
47
|
+
Distribution::Normal.pdf(u)*Distribution::Normal.cdf((v-rho*u).quo( Math::sqrt(1-rho**2)))
|
48
|
+
end
|
49
|
+
|
50
|
+
def eq12b(u,v)
|
51
|
+
Distribution::Normal.pdf(v) * Distribution::Normal.cdf((u-rho*v).quo( Math::sqrt(1-rho**2)))
|
52
|
+
|
53
|
+
end
|
54
|
+
# Equation(8) from Olsson(1979)
|
55
|
+
def fd_loglike_cell_rho(i, j)
|
56
|
+
bipdf(i,j) - bipdf(i-1,j) - bipdf(i, j-1) + bipdf(i-1, j-1)
|
57
|
+
end
|
58
|
+
# Equation(10) from Olsson(1979)
|
59
|
+
def fd_loglike_cell_a(i, j, k)
|
60
|
+
=begin
|
61
|
+
if k==i
|
62
|
+
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
63
|
+
elsif k==(i-1)
|
64
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
65
|
+
else
|
66
|
+
0
|
67
|
+
end
|
68
|
+
=end
|
69
|
+
if k==i
|
70
|
+
eq12(a(k),b(j))-eq12(a(k), b(j-1))
|
71
|
+
elsif k==(i-1)
|
72
|
+
-eq12(a(k),b(j))+eq12(a(k), b(j-1))
|
73
|
+
else
|
74
|
+
0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def fd_loglike_cell_b(i, j, m)
|
79
|
+
if m==j
|
80
|
+
eq12b(a(i),b(m))-eq12b(a(i-1),b(m))
|
81
|
+
elsif m==(j-1)
|
82
|
+
-eq12b(a(i),b(m))+eq12b(a(i-1),b(m))
|
83
|
+
else
|
84
|
+
0
|
85
|
+
end
|
86
|
+
=begin
|
87
|
+
if m==j
|
88
|
+
Distribution::NormalBivariate.pd_cdf_x(a(i),b(m), rho) - Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
|
89
|
+
elsif m==(j-1)
|
90
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(i),b(m),rho) + Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
|
91
|
+
else
|
92
|
+
0
|
93
|
+
end
|
94
|
+
=end
|
95
|
+
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
# phi_ij for each i and j
|
100
|
+
# Uses equation(4) from Olsson(1979)
|
101
|
+
def pd
|
102
|
+
if @pd.nil?
|
103
|
+
@pd=@nr.times.collect{ [0] * @nc}
|
104
|
+
pc=@nr.times.collect{ [0] * @nc}
|
105
|
+
@nr.times do |i|
|
106
|
+
@nc.times do |j|
|
107
|
+
|
108
|
+
if i==@nr-1 and j==@nc-1
|
109
|
+
@pd[i][j]=1.0
|
110
|
+
else
|
111
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
112
|
+
b=(j==@nc-1) ? 100: beta[j]
|
113
|
+
#puts "a:#{a} b:#{b}"
|
114
|
+
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
115
|
+
end
|
116
|
+
pc[i][j] = @pd[i][j]
|
117
|
+
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
118
|
+
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
119
|
+
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
@pd
|
124
|
+
end
|
125
|
+
|
126
|
+
# First derivate for rho
|
127
|
+
# Uses equation (9) from Olsson(1979)
|
128
|
+
|
129
|
+
def fd_loglike_rho
|
130
|
+
rho=@rho
|
131
|
+
if rho.abs>0.9999
|
132
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
133
|
+
end
|
134
|
+
total=0
|
135
|
+
@nr.times do |i|
|
136
|
+
@nc.times do |j|
|
137
|
+
pi=pd[i][j] + EPSILON
|
138
|
+
total+= (@matrix[i,j].quo(pi)) * (bipdf(i,j)-bipdf(i-1,j)-bipdf(i,j-1)+bipdf(i-1,j-1))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
total
|
142
|
+
end
|
143
|
+
|
144
|
+
# First derivative for alpha_k
|
145
|
+
# Uses equation (6)
|
146
|
+
def fd_loglike_a(k)
|
147
|
+
fd_loglike_a_eq6(k)
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
# Uses equation (6) from Olsson(1979)
|
153
|
+
def fd_loglike_a_eq6(k)
|
154
|
+
rho=@rho
|
155
|
+
if rho.abs>0.9999
|
156
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
157
|
+
end
|
158
|
+
total=0
|
159
|
+
@nr.times do |i|
|
160
|
+
@nc.times do |j|
|
161
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_a(i,j,k)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
total
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
# Uses equation(13) from Olsson(1979)
|
169
|
+
def fd_loglike_a_eq13(k)
|
170
|
+
rho=@rho
|
171
|
+
if rho.abs>0.9999
|
172
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
173
|
+
end
|
174
|
+
total=0
|
175
|
+
a_k=a(k)
|
176
|
+
@nc.times do |j|
|
177
|
+
#puts "j: #{j}"
|
178
|
+
#puts "b #{j} : #{b.call(j)}"
|
179
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
180
|
+
|
181
|
+
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
182
|
+
e_2=Distribution::Normal.pdf(a_k)
|
183
|
+
e_3=Distribution::Normal.cdf((b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
184
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
185
|
+
total+= e_1*e_2*e_3
|
186
|
+
end
|
187
|
+
total
|
188
|
+
end
|
189
|
+
# First derivative for b
|
190
|
+
# Uses equation 6 (Olsson, 1979)
|
191
|
+
def fd_loglike_b_eq6(m)
|
192
|
+
rho=@rho
|
193
|
+
if rho.abs>0.9999
|
194
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
195
|
+
end
|
196
|
+
total=0
|
197
|
+
@nr.times do |i|
|
198
|
+
@nc.times do |j|
|
199
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_b(i,j,m)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
total
|
203
|
+
end
|
204
|
+
# First derivative for beta_m.
|
205
|
+
# Uses equation 6 (Olsson,1979)
|
206
|
+
def fd_loglike_b(m)
|
207
|
+
fd_loglike_b_eq14(m)
|
208
|
+
end
|
209
|
+
# First derivative for beta_m
|
210
|
+
# Uses equation(14) from Olsson(1979)
|
211
|
+
def fd_loglike_b_eq14(m)
|
212
|
+
rho=@rho
|
213
|
+
if rho.abs>0.9999
|
214
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
215
|
+
end
|
216
|
+
total=0
|
217
|
+
b_m=b(m)
|
218
|
+
@nr.times do |i|
|
219
|
+
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
220
|
+
e_2=Distribution::Normal.pdf(b_m)
|
221
|
+
e_3=Distribution::Normal.cdf((a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
222
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
223
|
+
|
224
|
+
total+= e_1*e_2*e_3
|
225
|
+
end
|
226
|
+
total
|
227
|
+
end
|
228
|
+
# Returns the derivative correct according to order
|
229
|
+
def im_function(t,i,j)
|
230
|
+
if t==0
|
231
|
+
fd_loglike_cell_rho(i,j)
|
232
|
+
elsif t>=1 and t<=@alpha.size
|
233
|
+
fd_loglike_cell_a(i,j,t-1)
|
234
|
+
elsif t>=@alpha.size+1 and t<=(@alpha.size+@beta.size)
|
235
|
+
fd_loglike_cell_b(i,j,t-@alpha.size-1)
|
236
|
+
else
|
237
|
+
raise "incorrect #{t}"
|
238
|
+
end
|
239
|
+
end
|
240
|
+
def information_matrix
|
241
|
+
total_n=@matrix.total_sum
|
242
|
+
vars=@alpha.size+@beta.size+1
|
243
|
+
matrix=vars.times.map { vars.times.map {0}}
|
244
|
+
vars.times do |m|
|
245
|
+
vars.times do |n|
|
246
|
+
total=0
|
247
|
+
(@nr-1).times do |i|
|
248
|
+
(@nc-1).times do |j|
|
249
|
+
total+=(1.quo(pd[i][j]+EPSILON)) * im_function(m,i,j) * im_function(n,i,j)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
matrix[m][n]=total_n*total
|
253
|
+
end
|
254
|
+
end
|
255
|
+
m=::Matrix.rows(matrix)
|
256
|
+
|
257
|
+
end
|
258
|
+
end # Processor
|
259
|
+
end # Polychoric
|
260
|
+
end # Bivariate
|
261
|
+
end # Statsample
|
data/spec/spec.opts
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+"/../../")
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Statsample::Bivariate::Polychoric::Processor do
|
4
|
+
before do
|
5
|
+
@matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
6
|
+
@alpha=[-0.027, 1.137]
|
7
|
+
@beta=[-0.240, 1.1578]
|
8
|
+
@rho=0.420
|
9
|
+
@processor=Statsample::Bivariate::Polychoric::Processor.new(@alpha,@beta,@rho,@matrix)
|
10
|
+
end
|
11
|
+
it "im_function method should return correct values according to index" do
|
12
|
+
@processor.im_function(0,0,0).should==@processor.fd_loglike_cell_rho(0,0)
|
13
|
+
@processor.im_function(1,0,0).should==@processor.fd_loglike_cell_a(0,0,0)
|
14
|
+
@processor.im_function(2,0,0).should==@processor.fd_loglike_cell_a(0,0,1)
|
15
|
+
@processor.im_function(3,1,0).should==@processor.fd_loglike_cell_b(1,0,0)
|
16
|
+
@processor.im_function(4,0,1).should==@processor.fd_loglike_cell_b(0,1,1)
|
17
|
+
lambda {@processor.im_function(5)}.should raise_error
|
18
|
+
|
19
|
+
end
|
20
|
+
it "should return informacion matrix" do
|
21
|
+
p @processor.information_matrix.inverse
|
22
|
+
end
|
23
|
+
it "fd a loglike should be equal usign eq.6 and eq.13" do
|
24
|
+
2.times {|k|
|
25
|
+
@processor.fd_loglike_a_eq6(k).should be_close @processor.fd_loglike_a_eq13(k), 1e-10
|
26
|
+
}
|
27
|
+
end
|
28
|
+
it "fd b loglike should be equal usign eq.6 and eq.14" do
|
29
|
+
2.times {|m|
|
30
|
+
@processor.fd_loglike_b_eq6(m).should be_close @processor.fd_loglike_b_eq14(m), 1e-10
|
31
|
+
}
|
32
|
+
end
|
33
|
+
it "fd rho should be equal usign fd_loglike_cell_rho and fd_loglike_rho" do
|
34
|
+
total=0
|
35
|
+
|
36
|
+
nr=@alpha.size+1
|
37
|
+
nc=@beta.size+1
|
38
|
+
nr.times do |i|
|
39
|
+
nc.times do |j|
|
40
|
+
pi=@processor.pd[i][j] + 1e-10
|
41
|
+
total+= (@matrix[i,j].quo(pi)) * @processor.fd_loglike_cell_rho(i,j)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
total.should==@processor.fd_loglike_rho
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -14,13 +14,34 @@ describe "Statsample::Bivariate polychoric extension" do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
describe Statsample::Bivariate::Polychoric do
|
17
|
+
|
17
18
|
before do
|
18
19
|
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
19
20
|
@poly=Statsample::Bivariate::Polychoric.new(matrix)
|
20
21
|
end
|
21
22
|
it "should have summary.size > 0" do
|
23
|
+
@poly.method=:two_step
|
22
24
|
@poly.summary.size.should>0
|
23
25
|
end
|
26
|
+
def check_joint
|
27
|
+
@poly.r.should be_close(0.4192, 0.0001)
|
28
|
+
@poly.threshold_x[0].should be_close(-0.0297, 0.0001)
|
29
|
+
@poly.threshold_x[1].should be_close(1.1331, 0.0001)
|
30
|
+
|
31
|
+
@poly.threshold_y[0].should be_close(-0.2421, 0.0001)
|
32
|
+
@poly.threshold_y[1].should be_close(1.5938 ,0.0001)
|
33
|
+
@poly.chi_square.should be_close(11.54,0.01)
|
34
|
+
end
|
35
|
+
it "compute joint estimation (without derivative) using gsl" do
|
36
|
+
@poly.compute_one_step_mle_without_derivatives
|
37
|
+
check_joint
|
38
|
+
|
39
|
+
end
|
40
|
+
it "compute joint estimation (with derivative) using gsl" do
|
41
|
+
@poly.compute_one_step_mle_with_derivatives
|
42
|
+
check_joint
|
43
|
+
end
|
44
|
+
|
24
45
|
def check_two_step
|
25
46
|
@poly.r.should be_close(0.420, 0.001)
|
26
47
|
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
@@ -32,37 +53,24 @@ describe Statsample::Bivariate::Polychoric do
|
|
32
53
|
@poly.compute_two_step_mle_drasgow_ruby
|
33
54
|
check_two_step
|
34
55
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
it "compute polychoric series using gsl" do
|
41
|
-
@poly.method=:polychoric_series
|
42
|
-
@poly.compute
|
43
|
-
|
44
|
-
@poly.r.should be_close(0.556, 0.001)
|
45
|
-
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
46
|
-
@poly.threshold_x[0].should be_close(-0.027 ,0.001)
|
47
|
-
@poly.threshold_y[1].should be_close(1.578 ,0.001)
|
48
|
-
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
49
|
-
end
|
50
|
-
it "compute joint estimation (without derivative) using gsl" do
|
51
|
-
@poly.method=:joint
|
52
|
-
@poly.compute
|
53
|
-
@poly.method.should==:joint
|
54
|
-
@poly.r.should be_close(0.4192, 0.0001)
|
55
|
-
@poly.threshold_y[0].should be_close(-0.2421, 0.0001)
|
56
|
-
@poly.threshold_x[0].should be_close(-0.0297, 0.0001)
|
57
|
-
@poly.threshold_y[1].should be_close(1.5938 ,0.0001)
|
58
|
-
@poly.threshold_x[1].should be_close(1.1331, 0.0001)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
it "compute two-step with gsl requires rb-gsl"
|
62
|
-
it "compute polychoric series requires rb-gsl"
|
63
|
-
it "compute joint estimation requires rb-gsl"
|
56
|
+
|
57
|
+
it "compute two-step with gsl" do
|
58
|
+
pending("requires rb-gsl") unless Statsample.has_gsl?
|
59
|
+
@poly.compute_two_step_mle_drasgow_gsl
|
60
|
+
check_two_step
|
64
61
|
end
|
62
|
+
|
63
|
+
it "compute polychoric series using gsl" do
|
64
|
+
pending("requires rb-gsl") unless Statsample.has_gsl?
|
65
65
|
|
66
|
-
|
66
|
+
@poly.method=:polychoric_series
|
67
|
+
@poly.compute
|
68
|
+
|
69
|
+
@poly.r.should be_close(0.556, 0.001)
|
70
|
+
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
71
|
+
@poly.threshold_x[0].should be_close(-0.027 ,0.001)
|
72
|
+
@poly.threshold_y[1].should be_close(1.578 ,0.001)
|
73
|
+
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
74
|
+
end
|
67
75
|
|
68
76
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 13
|
8
|
-
-
|
9
|
-
version: 0.13.
|
8
|
+
- 3
|
9
|
+
version: 0.13.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-06-
|
38
|
+
date: 2010-06-22 00:00:00 -04:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -89,8 +89,11 @@ files:
|
|
89
89
|
- data/tetmat_test.txt
|
90
90
|
- lib/statsample/bivariate/extension_version.rb
|
91
91
|
- lib/statsample/bivariate/polychoric.rb
|
92
|
+
- lib/statsample/bivariate/polychoric/processor.rb
|
92
93
|
- lib/statsample/bivariate/tetrachoric.rb
|
94
|
+
- spec/spec.opts
|
93
95
|
- spec/spec_helper.rb
|
96
|
+
- spec/statsample/bivariate/polychoric_processor_spec.rb
|
94
97
|
- spec/statsample/bivariate/polychoric_spec.rb
|
95
98
|
- spec/statsample/bivariate/tetrachoric_spec.rb
|
96
99
|
has_rdoc: true
|
metadata.gz.sig
CHANGED
Binary file
|