statsample-bivariate-extension 0.13.2 → 0.13.3
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +7 -0
- data/Manifest.txt +3 -0
- data/Rakefile +10 -0
- data/lib/statsample/bivariate/extension_version.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +118 -200
- data/lib/statsample/bivariate/polychoric/processor.rb +261 -0
- data/spec/spec.opts +3 -0
- data/spec/statsample/bivariate/polychoric_processor_spec.rb +47 -0
- data/spec/statsample/bivariate/polychoric_spec.rb +38 -30
- metadata +6 -3
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.13.3 / 2010-06-22
|
2
|
+
|
3
|
+
* Bug fix on Processor.fd_loglike_cell_rho.
|
4
|
+
* Chi square added on spec.
|
5
|
+
* Olsson (1979) added on references
|
6
|
+
* Derivative version of joint estimate operational
|
7
|
+
|
1
8
|
=== 0.13.1 / 2010-06-21
|
2
9
|
* Better specs. Bug fix on Manifest.txt
|
3
10
|
|
data/Manifest.txt
CHANGED
@@ -6,7 +6,10 @@ data/tetmat_matrix.txt
|
|
6
6
|
data/tetmat_test.txt
|
7
7
|
lib/statsample/bivariate/extension_version.rb
|
8
8
|
lib/statsample/bivariate/polychoric.rb
|
9
|
+
lib/statsample/bivariate/polychoric/processor.rb
|
9
10
|
lib/statsample/bivariate/tetrachoric.rb
|
11
|
+
spec/spec.opts
|
10
12
|
spec/spec_helper.rb
|
13
|
+
spec/statsample/bivariate/polychoric_processor_spec.rb
|
11
14
|
spec/statsample/bivariate/polychoric_spec.rb
|
12
15
|
spec/statsample/bivariate/tetrachoric_spec.rb
|
data/Rakefile
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
# -*- ruby -*-
|
3
3
|
|
4
4
|
require 'rubygems'
|
5
|
+
require 'spec/rake/spectask'
|
5
6
|
require 'hoe'
|
6
7
|
|
7
8
|
Hoe.plugin :git
|
@@ -15,4 +16,13 @@ Hoe.spec 'statsample-bivariate-extension' do
|
|
15
16
|
self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
|
16
17
|
end
|
17
18
|
|
19
|
+
|
20
|
+
|
21
|
+
desc "Run all spec with RCov"
|
22
|
+
Spec::Rake::SpecTask.new('test_with_rcov') do |t|
|
23
|
+
t.spec_files = FileList['spec/**/*.rb']
|
24
|
+
t.rcov = true
|
25
|
+
t.rcov_opts = ['--exclude', 'spec']
|
26
|
+
end
|
27
|
+
|
18
28
|
# vim: syntax=ruby
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'minimization'
|
2
|
+
require 'statsample/bivariate/polychoric/processor'
|
2
3
|
module Statsample
|
3
4
|
module Bivariate
|
4
5
|
# Calculate Polychoric correlation for two vectors.
|
@@ -44,11 +45,11 @@ module Statsample
|
|
44
45
|
# the polychoric correlation:
|
45
46
|
#
|
46
47
|
# 1. Maximum Likehood Estimator
|
47
|
-
# 2. Two-step estimator
|
48
|
+
# 2. Two-step estimator
|
48
49
|
# 3. Polychoric series estimate.
|
49
50
|
#
|
50
51
|
# By default, two-step estimation are used. You can select
|
51
|
-
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
|
52
|
+
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
|
52
53
|
#
|
53
54
|
# == Use
|
54
55
|
#
|
@@ -71,69 +72,13 @@ module Statsample
|
|
71
72
|
#
|
72
73
|
# == References
|
73
74
|
#
|
74
|
-
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
75
75
|
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
76
|
+
# * Olsson, U. (1979) Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika 44, 443-460.
|
77
|
+
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
78
|
+
|
76
79
|
|
77
80
|
class Polychoric
|
78
81
|
include Summarizable
|
79
|
-
class Processor
|
80
|
-
attr_reader :alpha, :beta, :rho
|
81
|
-
def initialize(alpha,beta,rho)
|
82
|
-
@alpha=alpha
|
83
|
-
@beta=beta
|
84
|
-
@nr=@alpha.size+1
|
85
|
-
@nc=@beta.size+1
|
86
|
-
@rho=rho
|
87
|
-
@pd=nil
|
88
|
-
end
|
89
|
-
def bipdf(i,j)
|
90
|
-
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
91
|
-
end
|
92
|
-
def a(i)
|
93
|
-
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
94
|
-
end
|
95
|
-
def b(j)
|
96
|
-
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
97
|
-
end
|
98
|
-
# Equation(10) from Olsson(1979)
|
99
|
-
def fd_loglike_cell_a(i,j,k)
|
100
|
-
if k==i
|
101
|
-
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
102
|
-
elsif k==(i-1)
|
103
|
-
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
104
|
-
else
|
105
|
-
0
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
# phi_ij for each i and j
|
110
|
-
# Uses equation(4) from Olsson(1979)
|
111
|
-
def pd
|
112
|
-
if @pd.nil?
|
113
|
-
@pd=@nr.times.collect{ [0] * @nc}
|
114
|
-
pc=@nr.times.collect{ [0] * @nc}
|
115
|
-
@nr.times do |i|
|
116
|
-
@nc.times do |j|
|
117
|
-
|
118
|
-
if i==@nr-1 and j==@nc-1
|
119
|
-
@pd[i][j]=1.0
|
120
|
-
else
|
121
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
122
|
-
b=(j==@nc-1) ? 100: beta[j]
|
123
|
-
#puts "a:#{a} b:#{b}"
|
124
|
-
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
125
|
-
end
|
126
|
-
pc[i][j] = @pd[i][j]
|
127
|
-
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
128
|
-
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
129
|
-
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
@pd
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
82
|
include DirtyMemoize
|
138
83
|
# Name of the analysis
|
139
84
|
attr_accessor :name
|
@@ -258,133 +203,8 @@ module Statsample
|
|
258
203
|
def chi_square_df
|
259
204
|
(@nr*@nc)-@nc-@nr
|
260
205
|
end
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
# Retrieve all cell probabilities for givens alpha, beta and rho
|
266
|
-
def cell_probabilities(alpha,beta,rho)
|
267
|
-
pd=@nr.times.collect{ [0] * @nc}
|
268
|
-
pc=@nr.times.collect{ [0] * @nc}
|
269
|
-
@nr.times do |i|
|
270
|
-
@nc.times do |j|
|
271
|
-
|
272
|
-
if i==@nr-1 and j==@nc-1
|
273
|
-
pd[i][j]=1.0
|
274
|
-
else
|
275
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
276
|
-
b=(j==@nc-1) ? 100: beta[j]
|
277
|
-
#puts "a:#{a} b:#{b}"
|
278
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
279
|
-
end
|
280
|
-
pc[i][j] = pd[i][j]
|
281
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
282
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
283
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
284
|
-
end
|
285
|
-
end
|
286
|
-
@pd=pd
|
287
|
-
pd
|
288
|
-
end
|
289
|
-
def loglike(alpha,beta,rho)
|
290
|
-
if rho.abs>0.9999
|
291
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
292
|
-
end
|
293
|
-
pr=Processor.new(alpha,beta,rho)
|
294
|
-
loglike=0
|
295
|
-
|
296
|
-
|
297
|
-
@nr.times do |i|
|
298
|
-
@nc.times do |j|
|
299
|
-
res=pr.pd[i][j]+EPSILON
|
300
|
-
loglike+= @matrix[i,j] * Math::log( res )
|
301
|
-
end
|
302
|
-
end
|
303
|
-
-loglike
|
304
|
-
end
|
305
|
-
# First derivate for rho
|
306
|
-
# Uses equation (9) from Olsson(1979)
|
307
|
-
def fd_loglike_rho(alpha,beta,rho)
|
308
|
-
if rho.abs>0.9999
|
309
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
310
|
-
end
|
311
|
-
total=0
|
312
|
-
pr=Processor.new(alpha,beta,rho)
|
313
|
-
@nr.times do |i|
|
314
|
-
@nc.times do |j|
|
315
|
-
pi=pr.pd[i][j] + EPSILON
|
316
|
-
total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
|
317
|
-
end
|
318
|
-
end
|
319
|
-
total
|
320
|
-
end
|
321
206
|
|
322
|
-
|
323
|
-
def fd_loglike_a(alpha,beta,rho,k)
|
324
|
-
fd_loglike_a_eq6(alpha,beta,rho,k)
|
325
|
-
end
|
326
|
-
# Uses equation (6) from Olsson(1979)
|
327
|
-
def fd_loglike_a_eq6(alpha,beta,rho,k)
|
328
|
-
if rho.abs>0.9999
|
329
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
330
|
-
end
|
331
|
-
pr=Processor.new(alpha,beta,rho)
|
332
|
-
total=0
|
333
|
-
pd=pr.pd
|
334
|
-
@nr.times do |i|
|
335
|
-
@nc.times do |j|
|
336
|
-
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
|
337
|
-
end
|
338
|
-
end
|
339
|
-
total
|
340
|
-
end
|
341
|
-
# Uses equation(13) from Olsson(1979)
|
342
|
-
def fd_loglike_a_eq13(alpha,beta,rho,k)
|
343
|
-
if rho.abs>0.9999
|
344
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
345
|
-
end
|
346
|
-
pr=Processor.new(alpha,beta,rho)
|
347
|
-
total=0
|
348
|
-
a_k=pr.a(k)
|
349
|
-
pd=pr.pd
|
350
|
-
@nc.times do |j|
|
351
|
-
#puts "j: #{j}"
|
352
|
-
#puts "b #{j} : #{b.call(j)}"
|
353
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
354
|
-
|
355
|
-
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
356
|
-
e_2=Distribution::Normal.pdf(a_k)
|
357
|
-
e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
358
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
359
|
-
|
360
|
-
total+= e_1*e_2*e_3
|
361
|
-
end
|
362
|
-
total
|
363
|
-
end
|
364
|
-
# First derivative for beta_m
|
365
|
-
# Uses equation(14) from Olsson(1979)
|
366
|
-
def fd_loglike_b(alpha,beta,rho,m)
|
367
|
-
if rho.abs>0.9999
|
368
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
369
|
-
end
|
370
|
-
pr=Processor.new(alpha,beta,rho)
|
371
|
-
total=0
|
372
|
-
b_m=pr.b m
|
373
|
-
pd=pr.pd
|
374
|
-
@nr.times do |i|
|
375
|
-
#puts "j: #{j}"
|
376
|
-
#puts "b #{j} : #{b.call(j)}"
|
377
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
378
|
-
|
379
|
-
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
380
|
-
e_2=Distribution::Normal.pdf(b_m)
|
381
|
-
e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
382
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
383
|
-
|
384
|
-
total+= e_1*e_2*e_3
|
385
|
-
end
|
386
|
-
total
|
387
|
-
end
|
207
|
+
|
388
208
|
|
389
209
|
|
390
210
|
def compute_basic_parameters
|
@@ -442,7 +262,8 @@ module Statsample
|
|
442
262
|
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
443
263
|
|
444
264
|
f=proc {|rho|
|
445
|
-
|
265
|
+
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
266
|
+
pr.loglike
|
446
267
|
}
|
447
268
|
@log=_("Minimizing using GSL Brent method\n")
|
448
269
|
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
@@ -459,8 +280,9 @@ module Statsample
|
|
459
280
|
|
460
281
|
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
461
282
|
|
462
|
-
fn1=GSL::Function.alloc {|rho|
|
463
|
-
|
283
|
+
fn1=GSL::Function.alloc {|rho|
|
284
|
+
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
285
|
+
pr.loglike
|
464
286
|
}
|
465
287
|
@iteration = 0
|
466
288
|
max_iter = @max_iterations
|
@@ -498,12 +320,101 @@ module Statsample
|
|
498
320
|
@loglike_model=-gmf.f_minimum
|
499
321
|
end
|
500
322
|
|
323
|
+
|
324
|
+
def compute_derivatives_vector(v,df)
|
325
|
+
new_rho=v[0]
|
326
|
+
new_alpha=v[1, @nr-1]
|
327
|
+
new_beta=v[@nr, @nc-1]
|
328
|
+
if new_rho.abs>0.9999
|
329
|
+
new_rho= (new_rho>0) ? 0.9999 : -0.9999
|
330
|
+
end
|
331
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
332
|
+
|
333
|
+
df[0]=-pr.fd_loglike_rho
|
334
|
+
new_alpha.to_a.each_with_index {|v,i|
|
335
|
+
df[i+1]=-pr.fd_loglike_a(i)
|
336
|
+
}
|
337
|
+
offset=new_alpha.size+1
|
338
|
+
new_beta.to_a.each_with_index {|v,i|
|
339
|
+
df[offset+i]=-pr.fd_loglike_b(i)
|
340
|
+
}
|
341
|
+
end
|
342
|
+
|
343
|
+
def compute_one_step_mle
|
344
|
+
compute_one_step_mle_with_derivatives
|
345
|
+
end
|
346
|
+
|
347
|
+
|
348
|
+
def compute_one_step_mle_with_derivatives
|
349
|
+
# Get initial values with two-step aproach
|
350
|
+
compute_two_step_mle_drasgow
|
351
|
+
# Start iteration with past values
|
352
|
+
rho=@r
|
353
|
+
cut_alpha=@alpha
|
354
|
+
cut_beta=@beta
|
355
|
+
parameters=[rho]+cut_alpha+cut_beta
|
356
|
+
np=@nc-1+@nr
|
357
|
+
|
358
|
+
|
359
|
+
loglike_f = Proc.new { |v, params|
|
360
|
+
new_rho=v[0]
|
361
|
+
new_alpha=v[1, @nr-1]
|
362
|
+
new_beta=v[@nr, @nc-1]
|
363
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
364
|
+
pr.loglike
|
365
|
+
}
|
366
|
+
|
367
|
+
loglike_df = Proc.new {|v, params, df |
|
368
|
+
compute_derivatives_vector(v,df)
|
369
|
+
}
|
370
|
+
|
371
|
+
|
372
|
+
my_func = GSL::MultiMin::Function_fdf.alloc(loglike_f,loglike_df, np)
|
373
|
+
my_func.set_params(parameters) # parameters
|
374
|
+
|
375
|
+
x = GSL::Vector.alloc(parameters.dup)
|
376
|
+
minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
|
377
|
+
minimizer.set(my_func, x, 1, 1e-3)
|
378
|
+
|
379
|
+
iter = 0
|
380
|
+
message=""
|
381
|
+
begin_time=Time.new
|
382
|
+
begin
|
383
|
+
iter += 1
|
384
|
+
status = minimizer.iterate()
|
385
|
+
#p minimizer.f
|
386
|
+
#p minimizer.gradient
|
387
|
+
status = minimizer.test_gradient(1e-3)
|
388
|
+
if status == GSL::SUCCESS
|
389
|
+
total_time=Time.new-begin_time
|
390
|
+
message+="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
|
391
|
+
end
|
392
|
+
x = minimizer.x
|
393
|
+
message+= sprintf("%5d iterations", iter)+"\n";
|
394
|
+
message+= "args="
|
395
|
+
for i in 0...np do
|
396
|
+
message+=sprintf("%10.3e ", x[i])
|
397
|
+
end
|
398
|
+
message+=sprintf("f() = %7.3f\n" , minimizer.f)+"\n";
|
399
|
+
end while status == GSL::CONTINUE and iter < @max_iterations
|
400
|
+
|
401
|
+
@iteration=iter
|
402
|
+
@log+=message
|
403
|
+
@r=minimizer.x[0]
|
404
|
+
@alpha=minimizer.x[1,@nr-1].to_a
|
405
|
+
@beta=minimizer.x[@nr,@nc-1].to_a
|
406
|
+
@loglike_model= -minimizer.minimum
|
407
|
+
|
408
|
+
pr=Processor.new(@alpha,@beta,@r,@matrix)
|
409
|
+
|
410
|
+
end
|
411
|
+
|
501
412
|
# Compute Polychoric correlation with joint estimate.
|
502
413
|
# Rho and thresholds are estimated at same time.
|
503
414
|
# Code based on R package "polycor", by J.Fox.
|
504
415
|
#
|
505
416
|
|
506
|
-
def
|
417
|
+
def compute_one_step_mle_without_derivatives
|
507
418
|
# Get initial values with two-step aproach
|
508
419
|
compute_two_step_mle_drasgow
|
509
420
|
# Start iteration with past values
|
@@ -511,10 +422,12 @@ module Statsample
|
|
511
422
|
cut_alpha=@alpha
|
512
423
|
cut_beta=@beta
|
513
424
|
parameters=[rho]+cut_alpha+cut_beta
|
425
|
+
np=@nc-1+@nr
|
426
|
+
|
514
427
|
minimization = Proc.new { |v, params|
|
515
|
-
|
516
|
-
|
517
|
-
|
428
|
+
new_rho=v[0]
|
429
|
+
new_alpha=v[1, @nr-1]
|
430
|
+
new_beta=v[@nr, @nc-1]
|
518
431
|
|
519
432
|
#puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
|
520
433
|
#(@nr-1).times {|k|
|
@@ -525,10 +438,12 @@ module Statsample
|
|
525
438
|
#(@nc-1).times {|k|
|
526
439
|
# puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
|
527
440
|
#}
|
441
|
+
pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
|
528
442
|
|
529
|
-
|
443
|
+
df=Array.new(np)
|
444
|
+
#compute_derivatives_vector(v,df)
|
445
|
+
pr.loglike
|
530
446
|
}
|
531
|
-
np=@nc-1+@nr
|
532
447
|
my_func = GSL::MultiMin::Function.alloc(minimization, np)
|
533
448
|
my_func.set_params(parameters) # parameters
|
534
449
|
|
@@ -542,12 +457,14 @@ module Statsample
|
|
542
457
|
|
543
458
|
iter = 0
|
544
459
|
message=""
|
460
|
+
begin_time=Time.new
|
545
461
|
begin
|
546
462
|
iter += 1
|
547
463
|
status = minimizer.iterate()
|
548
464
|
status = minimizer.test_size(@epsilon)
|
549
465
|
if status == GSL::SUCCESS
|
550
|
-
|
466
|
+
total_time=Time.new-begin_time
|
467
|
+
message="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
|
551
468
|
end
|
552
469
|
x = minimizer.x
|
553
470
|
message+= sprintf("%5d iterations", iter)+"\n";
|
@@ -830,8 +747,8 @@ module Statsample
|
|
830
747
|
end # 43
|
831
748
|
raise "Error" if norts==0
|
832
749
|
@r=pcorl
|
833
|
-
|
834
|
-
@loglike_model=-loglike
|
750
|
+
pr=Processor.new(@alpha,@beta,@r,@matrix)
|
751
|
+
@loglike_model=-pr.loglike
|
835
752
|
|
836
753
|
end
|
837
754
|
#Computes vector h(mm7) of orthogonal hermite...
|
@@ -878,7 +795,8 @@ module Statsample
|
|
878
795
|
t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
|
879
796
|
}
|
880
797
|
section.add(t)
|
881
|
-
section.add(_("
|
798
|
+
section.add(_("Iterations: %d") % @iteration)
|
799
|
+
section.add(_("Test of bivariate normality: X^2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
|
882
800
|
generator.parse_element(section)
|
883
801
|
end
|
884
802
|
end
|
@@ -0,0 +1,261 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Bivariate
|
3
|
+
class Polychoric
|
4
|
+
# Provides statistics for a given combination of rho, alpha and beta and contingence table.
|
5
|
+
class Processor
|
6
|
+
attr_reader :alpha, :beta, :rho, :matrix
|
7
|
+
EPSILON=1e-10
|
8
|
+
def initialize(alpha,beta,rho,matrix=nil)
|
9
|
+
@alpha=alpha
|
10
|
+
@beta=beta
|
11
|
+
@matrix=matrix
|
12
|
+
@nr=@alpha.size+1
|
13
|
+
@nc=@beta.size+1
|
14
|
+
@rho=rho
|
15
|
+
@pd=nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def bipdf(i,j)
|
19
|
+
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
20
|
+
end
|
21
|
+
|
22
|
+
def loglike
|
23
|
+
rho=@rho
|
24
|
+
if rho.abs>0.9999
|
25
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
26
|
+
end
|
27
|
+
loglike=0
|
28
|
+
@nr.times do |i|
|
29
|
+
@nc.times do |j|
|
30
|
+
res=pd[i][j]+EPSILON
|
31
|
+
loglike+= @matrix[i,j] * Math::log( res )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
-loglike
|
35
|
+
end
|
36
|
+
|
37
|
+
def a(i)
|
38
|
+
raise "Index #{i} should be <= #{@nr-1}" if i>@nr-1
|
39
|
+
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
40
|
+
end
|
41
|
+
def b(j)
|
42
|
+
raise "Index #{j} should be <= #{@nc-1}" if j>@nc-1
|
43
|
+
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
44
|
+
end
|
45
|
+
|
46
|
+
def eq12(u,v)
|
47
|
+
Distribution::Normal.pdf(u)*Distribution::Normal.cdf((v-rho*u).quo( Math::sqrt(1-rho**2)))
|
48
|
+
end
|
49
|
+
|
50
|
+
def eq12b(u,v)
|
51
|
+
Distribution::Normal.pdf(v) * Distribution::Normal.cdf((u-rho*v).quo( Math::sqrt(1-rho**2)))
|
52
|
+
|
53
|
+
end
|
54
|
+
# Equation(8) from Olsson(1979)
|
55
|
+
def fd_loglike_cell_rho(i, j)
|
56
|
+
bipdf(i,j) - bipdf(i-1,j) - bipdf(i, j-1) + bipdf(i-1, j-1)
|
57
|
+
end
|
58
|
+
# Equation(10) from Olsson(1979)
|
59
|
+
def fd_loglike_cell_a(i, j, k)
|
60
|
+
=begin
|
61
|
+
if k==i
|
62
|
+
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
63
|
+
elsif k==(i-1)
|
64
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
65
|
+
else
|
66
|
+
0
|
67
|
+
end
|
68
|
+
=end
|
69
|
+
if k==i
|
70
|
+
eq12(a(k),b(j))-eq12(a(k), b(j-1))
|
71
|
+
elsif k==(i-1)
|
72
|
+
-eq12(a(k),b(j))+eq12(a(k), b(j-1))
|
73
|
+
else
|
74
|
+
0
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def fd_loglike_cell_b(i, j, m)
|
79
|
+
if m==j
|
80
|
+
eq12b(a(i),b(m))-eq12b(a(i-1),b(m))
|
81
|
+
elsif m==(j-1)
|
82
|
+
-eq12b(a(i),b(m))+eq12b(a(i-1),b(m))
|
83
|
+
else
|
84
|
+
0
|
85
|
+
end
|
86
|
+
=begin
|
87
|
+
if m==j
|
88
|
+
Distribution::NormalBivariate.pd_cdf_x(a(i),b(m), rho) - Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
|
89
|
+
elsif m==(j-1)
|
90
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(i),b(m),rho) + Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
|
91
|
+
else
|
92
|
+
0
|
93
|
+
end
|
94
|
+
=end
|
95
|
+
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
# phi_ij for each i and j
|
100
|
+
# Uses equation(4) from Olsson(1979)
|
101
|
+
def pd
|
102
|
+
if @pd.nil?
|
103
|
+
@pd=@nr.times.collect{ [0] * @nc}
|
104
|
+
pc=@nr.times.collect{ [0] * @nc}
|
105
|
+
@nr.times do |i|
|
106
|
+
@nc.times do |j|
|
107
|
+
|
108
|
+
if i==@nr-1 and j==@nc-1
|
109
|
+
@pd[i][j]=1.0
|
110
|
+
else
|
111
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
112
|
+
b=(j==@nc-1) ? 100: beta[j]
|
113
|
+
#puts "a:#{a} b:#{b}"
|
114
|
+
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
115
|
+
end
|
116
|
+
pc[i][j] = @pd[i][j]
|
117
|
+
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
118
|
+
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
119
|
+
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
@pd
|
124
|
+
end
|
125
|
+
|
126
|
+
# First derivate for rho
|
127
|
+
# Uses equation (9) from Olsson(1979)
|
128
|
+
|
129
|
+
def fd_loglike_rho
|
130
|
+
rho=@rho
|
131
|
+
if rho.abs>0.9999
|
132
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
133
|
+
end
|
134
|
+
total=0
|
135
|
+
@nr.times do |i|
|
136
|
+
@nc.times do |j|
|
137
|
+
pi=pd[i][j] + EPSILON
|
138
|
+
total+= (@matrix[i,j].quo(pi)) * (bipdf(i,j)-bipdf(i-1,j)-bipdf(i,j-1)+bipdf(i-1,j-1))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
total
|
142
|
+
end
|
143
|
+
|
144
|
+
# First derivative for alpha_k
|
145
|
+
# Uses equation (6)
|
146
|
+
def fd_loglike_a(k)
|
147
|
+
fd_loglike_a_eq6(k)
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
# Uses equation (6) from Olsson(1979)
|
153
|
+
def fd_loglike_a_eq6(k)
|
154
|
+
rho=@rho
|
155
|
+
if rho.abs>0.9999
|
156
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
157
|
+
end
|
158
|
+
total=0
|
159
|
+
@nr.times do |i|
|
160
|
+
@nc.times do |j|
|
161
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_a(i,j,k)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
total
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
# Uses equation(13) from Olsson(1979)
|
169
|
+
def fd_loglike_a_eq13(k)
|
170
|
+
rho=@rho
|
171
|
+
if rho.abs>0.9999
|
172
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
173
|
+
end
|
174
|
+
total=0
|
175
|
+
a_k=a(k)
|
176
|
+
@nc.times do |j|
|
177
|
+
#puts "j: #{j}"
|
178
|
+
#puts "b #{j} : #{b.call(j)}"
|
179
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
180
|
+
|
181
|
+
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
182
|
+
e_2=Distribution::Normal.pdf(a_k)
|
183
|
+
e_3=Distribution::Normal.cdf((b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
184
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
185
|
+
total+= e_1*e_2*e_3
|
186
|
+
end
|
187
|
+
total
|
188
|
+
end
|
189
|
+
# First derivative for b
|
190
|
+
# Uses equation 6 (Olsson, 1979)
|
191
|
+
def fd_loglike_b_eq6(m)
|
192
|
+
rho=@rho
|
193
|
+
if rho.abs>0.9999
|
194
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
195
|
+
end
|
196
|
+
total=0
|
197
|
+
@nr.times do |i|
|
198
|
+
@nc.times do |j|
|
199
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_b(i,j,m)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
total
|
203
|
+
end
|
204
|
+
# First derivative for beta_m.
|
205
|
+
# Uses equation 6 (Olsson,1979)
|
206
|
+
def fd_loglike_b(m)
|
207
|
+
fd_loglike_b_eq14(m)
|
208
|
+
end
|
209
|
+
# First derivative for beta_m
|
210
|
+
# Uses equation(14) from Olsson(1979)
|
211
|
+
def fd_loglike_b_eq14(m)
|
212
|
+
rho=@rho
|
213
|
+
if rho.abs>0.9999
|
214
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
215
|
+
end
|
216
|
+
total=0
|
217
|
+
b_m=b(m)
|
218
|
+
@nr.times do |i|
|
219
|
+
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
220
|
+
e_2=Distribution::Normal.pdf(b_m)
|
221
|
+
e_3=Distribution::Normal.cdf((a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
222
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
223
|
+
|
224
|
+
total+= e_1*e_2*e_3
|
225
|
+
end
|
226
|
+
total
|
227
|
+
end
|
228
|
+
# Returns the derivative correct according to order
|
229
|
+
def im_function(t,i,j)
|
230
|
+
if t==0
|
231
|
+
fd_loglike_cell_rho(i,j)
|
232
|
+
elsif t>=1 and t<=@alpha.size
|
233
|
+
fd_loglike_cell_a(i,j,t-1)
|
234
|
+
elsif t>=@alpha.size+1 and t<=(@alpha.size+@beta.size)
|
235
|
+
fd_loglike_cell_b(i,j,t-@alpha.size-1)
|
236
|
+
else
|
237
|
+
raise "incorrect #{t}"
|
238
|
+
end
|
239
|
+
end
|
240
|
+
def information_matrix
|
241
|
+
total_n=@matrix.total_sum
|
242
|
+
vars=@alpha.size+@beta.size+1
|
243
|
+
matrix=vars.times.map { vars.times.map {0}}
|
244
|
+
vars.times do |m|
|
245
|
+
vars.times do |n|
|
246
|
+
total=0
|
247
|
+
(@nr-1).times do |i|
|
248
|
+
(@nc-1).times do |j|
|
249
|
+
total+=(1.quo(pd[i][j]+EPSILON)) * im_function(m,i,j) * im_function(n,i,j)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
matrix[m][n]=total_n*total
|
253
|
+
end
|
254
|
+
end
|
255
|
+
m=::Matrix.rows(matrix)
|
256
|
+
|
257
|
+
end
|
258
|
+
end # Processor
|
259
|
+
end # Polychoric
|
260
|
+
end # Bivariate
|
261
|
+
end # Statsample
|
data/spec/spec.opts
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+"/../../")
|
2
|
+
require 'spec_helper'
|
3
|
+
describe Statsample::Bivariate::Polychoric::Processor do
|
4
|
+
before do
|
5
|
+
@matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
6
|
+
@alpha=[-0.027, 1.137]
|
7
|
+
@beta=[-0.240, 1.1578]
|
8
|
+
@rho=0.420
|
9
|
+
@processor=Statsample::Bivariate::Polychoric::Processor.new(@alpha,@beta,@rho,@matrix)
|
10
|
+
end
|
11
|
+
it "im_function method should return correct values according to index" do
|
12
|
+
@processor.im_function(0,0,0).should==@processor.fd_loglike_cell_rho(0,0)
|
13
|
+
@processor.im_function(1,0,0).should==@processor.fd_loglike_cell_a(0,0,0)
|
14
|
+
@processor.im_function(2,0,0).should==@processor.fd_loglike_cell_a(0,0,1)
|
15
|
+
@processor.im_function(3,1,0).should==@processor.fd_loglike_cell_b(1,0,0)
|
16
|
+
@processor.im_function(4,0,1).should==@processor.fd_loglike_cell_b(0,1,1)
|
17
|
+
lambda {@processor.im_function(5)}.should raise_error
|
18
|
+
|
19
|
+
end
|
20
|
+
it "should return informacion matrix" do
|
21
|
+
p @processor.information_matrix.inverse
|
22
|
+
end
|
23
|
+
it "fd a loglike should be equal usign eq.6 and eq.13" do
|
24
|
+
2.times {|k|
|
25
|
+
@processor.fd_loglike_a_eq6(k).should be_close @processor.fd_loglike_a_eq13(k), 1e-10
|
26
|
+
}
|
27
|
+
end
|
28
|
+
it "fd b loglike should be equal usign eq.6 and eq.14" do
|
29
|
+
2.times {|m|
|
30
|
+
@processor.fd_loglike_b_eq6(m).should be_close @processor.fd_loglike_b_eq14(m), 1e-10
|
31
|
+
}
|
32
|
+
end
|
33
|
+
it "fd rho should be equal usign fd_loglike_cell_rho and fd_loglike_rho" do
|
34
|
+
total=0
|
35
|
+
|
36
|
+
nr=@alpha.size+1
|
37
|
+
nc=@beta.size+1
|
38
|
+
nr.times do |i|
|
39
|
+
nc.times do |j|
|
40
|
+
pi=@processor.pd[i][j] + 1e-10
|
41
|
+
total+= (@matrix[i,j].quo(pi)) * @processor.fd_loglike_cell_rho(i,j)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
total.should==@processor.fd_loglike_rho
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
@@ -14,13 +14,34 @@ describe "Statsample::Bivariate polychoric extension" do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
describe Statsample::Bivariate::Polychoric do
|
17
|
+
|
17
18
|
before do
|
18
19
|
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
19
20
|
@poly=Statsample::Bivariate::Polychoric.new(matrix)
|
20
21
|
end
|
21
22
|
it "should have summary.size > 0" do
|
23
|
+
@poly.method=:two_step
|
22
24
|
@poly.summary.size.should>0
|
23
25
|
end
|
26
|
+
def check_joint
|
27
|
+
@poly.r.should be_close(0.4192, 0.0001)
|
28
|
+
@poly.threshold_x[0].should be_close(-0.0297, 0.0001)
|
29
|
+
@poly.threshold_x[1].should be_close(1.1331, 0.0001)
|
30
|
+
|
31
|
+
@poly.threshold_y[0].should be_close(-0.2421, 0.0001)
|
32
|
+
@poly.threshold_y[1].should be_close(1.5938 ,0.0001)
|
33
|
+
@poly.chi_square.should be_close(11.54,0.01)
|
34
|
+
end
|
35
|
+
it "compute joint estimation (without derivative) using gsl" do
|
36
|
+
@poly.compute_one_step_mle_without_derivatives
|
37
|
+
check_joint
|
38
|
+
|
39
|
+
end
|
40
|
+
it "compute joint estimation (with derivative) using gsl" do
|
41
|
+
@poly.compute_one_step_mle_with_derivatives
|
42
|
+
check_joint
|
43
|
+
end
|
44
|
+
|
24
45
|
def check_two_step
|
25
46
|
@poly.r.should be_close(0.420, 0.001)
|
26
47
|
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
@@ -32,37 +53,24 @@ describe Statsample::Bivariate::Polychoric do
|
|
32
53
|
@poly.compute_two_step_mle_drasgow_ruby
|
33
54
|
check_two_step
|
34
55
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
it "compute polychoric series using gsl" do
|
41
|
-
@poly.method=:polychoric_series
|
42
|
-
@poly.compute
|
43
|
-
|
44
|
-
@poly.r.should be_close(0.556, 0.001)
|
45
|
-
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
46
|
-
@poly.threshold_x[0].should be_close(-0.027 ,0.001)
|
47
|
-
@poly.threshold_y[1].should be_close(1.578 ,0.001)
|
48
|
-
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
49
|
-
end
|
50
|
-
it "compute joint estimation (without derivative) using gsl" do
|
51
|
-
@poly.method=:joint
|
52
|
-
@poly.compute
|
53
|
-
@poly.method.should==:joint
|
54
|
-
@poly.r.should be_close(0.4192, 0.0001)
|
55
|
-
@poly.threshold_y[0].should be_close(-0.2421, 0.0001)
|
56
|
-
@poly.threshold_x[0].should be_close(-0.0297, 0.0001)
|
57
|
-
@poly.threshold_y[1].should be_close(1.5938 ,0.0001)
|
58
|
-
@poly.threshold_x[1].should be_close(1.1331, 0.0001)
|
59
|
-
end
|
60
|
-
else
|
61
|
-
it "compute two-step with gsl requires rb-gsl"
|
62
|
-
it "compute polychoric series requires rb-gsl"
|
63
|
-
it "compute joint estimation requires rb-gsl"
|
56
|
+
|
57
|
+
it "compute two-step with gsl" do
|
58
|
+
pending("requires rb-gsl") unless Statsample.has_gsl?
|
59
|
+
@poly.compute_two_step_mle_drasgow_gsl
|
60
|
+
check_two_step
|
64
61
|
end
|
62
|
+
|
63
|
+
it "compute polychoric series using gsl" do
|
64
|
+
pending("requires rb-gsl") unless Statsample.has_gsl?
|
65
65
|
|
66
|
-
|
66
|
+
@poly.method=:polychoric_series
|
67
|
+
@poly.compute
|
68
|
+
|
69
|
+
@poly.r.should be_close(0.556, 0.001)
|
70
|
+
@poly.threshold_y[0].should be_close(-0.240 ,0.001)
|
71
|
+
@poly.threshold_x[0].should be_close(-0.027 ,0.001)
|
72
|
+
@poly.threshold_y[1].should be_close(1.578 ,0.001)
|
73
|
+
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
74
|
+
end
|
67
75
|
|
68
76
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 13
|
8
|
-
-
|
9
|
-
version: 0.13.
|
8
|
+
- 3
|
9
|
+
version: 0.13.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-06-
|
38
|
+
date: 2010-06-22 00:00:00 -04:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -89,8 +89,11 @@ files:
|
|
89
89
|
- data/tetmat_test.txt
|
90
90
|
- lib/statsample/bivariate/extension_version.rb
|
91
91
|
- lib/statsample/bivariate/polychoric.rb
|
92
|
+
- lib/statsample/bivariate/polychoric/processor.rb
|
92
93
|
- lib/statsample/bivariate/tetrachoric.rb
|
94
|
+
- spec/spec.opts
|
93
95
|
- spec/spec_helper.rb
|
96
|
+
- spec/statsample/bivariate/polychoric_processor_spec.rb
|
94
97
|
- spec/statsample/bivariate/polychoric_spec.rb
|
95
98
|
- spec/statsample/bivariate/tetrachoric_spec.rb
|
96
99
|
has_rdoc: true
|
metadata.gz.sig
CHANGED
Binary file
|