statsample-bivariate-extension 0.13.2 → 0.13.3

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 0.13.3 / 2010-06-22
2
+
3
+ * Bug fix on Processor.fd_loglike_cell_rho.
4
+ * Chi square added on spec.
5
+ * Olsson (1979) added on references
6
+ * Derivative version of joint estimate operational
7
+
1
8
  === 0.13.1 / 2010-06-21
2
9
  * Better specs. Bug fix on Manifest.txt
3
10
 
data/Manifest.txt CHANGED
@@ -6,7 +6,10 @@ data/tetmat_matrix.txt
6
6
  data/tetmat_test.txt
7
7
  lib/statsample/bivariate/extension_version.rb
8
8
  lib/statsample/bivariate/polychoric.rb
9
+ lib/statsample/bivariate/polychoric/processor.rb
9
10
  lib/statsample/bivariate/tetrachoric.rb
11
+ spec/spec.opts
10
12
  spec/spec_helper.rb
13
+ spec/statsample/bivariate/polychoric_processor_spec.rb
11
14
  spec/statsample/bivariate/polychoric_spec.rb
12
15
  spec/statsample/bivariate/tetrachoric_spec.rb
data/Rakefile CHANGED
@@ -2,6 +2,7 @@
2
2
  # -*- ruby -*-
3
3
 
4
4
  require 'rubygems'
5
+ require 'spec/rake/spectask'
5
6
  require 'hoe'
6
7
 
7
8
  Hoe.plugin :git
@@ -15,4 +16,13 @@ Hoe.spec 'statsample-bivariate-extension' do
15
16
  self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
16
17
  end
17
18
 
19
+
20
+
21
+ desc "Run all spec with RCov"
22
+ Spec::Rake::SpecTask.new('test_with_rcov') do |t|
23
+ t.spec_files = FileList['spec/**/*.rb']
24
+ t.rcov = true
25
+ t.rcov_opts = ['--exclude', 'spec']
26
+ end
27
+
18
28
  # vim: syntax=ruby
@@ -1,6 +1,6 @@
1
1
  module Statsample
2
2
  module Bivariate
3
- EXTENSION_VERSION="0.13.2"
3
+ EXTENSION_VERSION="0.13.3"
4
4
  end
5
5
  end
6
6
 
@@ -1,4 +1,5 @@
1
1
  require 'minimization'
2
+ require 'statsample/bivariate/polychoric/processor'
2
3
  module Statsample
3
4
  module Bivariate
4
5
  # Calculate Polychoric correlation for two vectors.
@@ -44,11 +45,11 @@ module Statsample
44
45
  # the polychoric correlation:
45
46
  #
46
47
  # 1. Maximum Likehood Estimator
47
- # 2. Two-step estimator and
48
+ # 2. Two-step estimator
48
49
  # 3. Polychoric series estimate.
49
50
  #
50
51
  # By default, two-step estimation are used. You can select
51
- # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
52
+ # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
52
53
  #
53
54
  # == Use
54
55
  #
@@ -71,69 +72,13 @@ module Statsample
71
72
  #
72
73
  # == References
73
74
  #
74
- # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
75
75
  # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
76
+ # * Olsson, U. (1979) Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika 44, 443-460.
77
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
78
+
76
79
 
77
80
  class Polychoric
78
81
  include Summarizable
79
- class Processor
80
- attr_reader :alpha, :beta, :rho
81
- def initialize(alpha,beta,rho)
82
- @alpha=alpha
83
- @beta=beta
84
- @nr=@alpha.size+1
85
- @nc=@beta.size+1
86
- @rho=rho
87
- @pd=nil
88
- end
89
- def bipdf(i,j)
90
- Distribution::NormalBivariate.pdf(a(i), b(j), rho)
91
- end
92
- def a(i)
93
- i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
94
- end
95
- def b(j)
96
- j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
97
- end
98
- # Equation(10) from Olsson(1979)
99
- def fd_loglike_cell_a(i,j,k)
100
- if k==i
101
- Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
102
- elsif k==(i-1)
103
- -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
104
- else
105
- 0
106
- end
107
-
108
- end
109
- # phi_ij for each i and j
110
- # Uses equation(4) from Olsson(1979)
111
- def pd
112
- if @pd.nil?
113
- @pd=@nr.times.collect{ [0] * @nc}
114
- pc=@nr.times.collect{ [0] * @nc}
115
- @nr.times do |i|
116
- @nc.times do |j|
117
-
118
- if i==@nr-1 and j==@nc-1
119
- @pd[i][j]=1.0
120
- else
121
- a=(i==@nr-1) ? 100: alpha[i]
122
- b=(j==@nc-1) ? 100: beta[j]
123
- #puts "a:#{a} b:#{b}"
124
- @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
125
- end
126
- pc[i][j] = @pd[i][j]
127
- @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
128
- @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
129
- @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
130
- end
131
- end
132
- end
133
- @pd
134
- end
135
- end
136
-
137
82
  include DirtyMemoize
138
83
  # Name of the analysis
139
84
  attr_accessor :name
@@ -258,133 +203,8 @@ module Statsample
258
203
  def chi_square_df
259
204
  (@nr*@nc)-@nc-@nr
260
205
  end
261
-
262
-
263
-
264
-
265
- # Retrieve all cell probabilities for givens alpha, beta and rho
266
- def cell_probabilities(alpha,beta,rho)
267
- pd=@nr.times.collect{ [0] * @nc}
268
- pc=@nr.times.collect{ [0] * @nc}
269
- @nr.times do |i|
270
- @nc.times do |j|
271
-
272
- if i==@nr-1 and j==@nc-1
273
- pd[i][j]=1.0
274
- else
275
- a=(i==@nr-1) ? 100: alpha[i]
276
- b=(j==@nc-1) ? 100: beta[j]
277
- #puts "a:#{a} b:#{b}"
278
- pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
279
- end
280
- pc[i][j] = pd[i][j]
281
- pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
282
- pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
283
- pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
284
- end
285
- end
286
- @pd=pd
287
- pd
288
- end
289
- def loglike(alpha,beta,rho)
290
- if rho.abs>0.9999
291
- rho= (rho>0) ? 0.9999 : -0.9999
292
- end
293
- pr=Processor.new(alpha,beta,rho)
294
- loglike=0
295
-
296
-
297
- @nr.times do |i|
298
- @nc.times do |j|
299
- res=pr.pd[i][j]+EPSILON
300
- loglike+= @matrix[i,j] * Math::log( res )
301
- end
302
- end
303
- -loglike
304
- end
305
- # First derivate for rho
306
- # Uses equation (9) from Olsson(1979)
307
- def fd_loglike_rho(alpha,beta,rho)
308
- if rho.abs>0.9999
309
- rho= (rho>0) ? 0.9999 : -0.9999
310
- end
311
- total=0
312
- pr=Processor.new(alpha,beta,rho)
313
- @nr.times do |i|
314
- @nc.times do |j|
315
- pi=pr.pd[i][j] + EPSILON
316
- total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
317
- end
318
- end
319
- total
320
- end
321
206
 
322
- # First derivative for alpha_k
323
- def fd_loglike_a(alpha,beta,rho,k)
324
- fd_loglike_a_eq6(alpha,beta,rho,k)
325
- end
326
- # Uses equation (6) from Olsson(1979)
327
- def fd_loglike_a_eq6(alpha,beta,rho,k)
328
- if rho.abs>0.9999
329
- rho= (rho>0) ? 0.9999 : -0.9999
330
- end
331
- pr=Processor.new(alpha,beta,rho)
332
- total=0
333
- pd=pr.pd
334
- @nr.times do |i|
335
- @nc.times do |j|
336
- total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
337
- end
338
- end
339
- total
340
- end
341
- # Uses equation(13) from Olsson(1979)
342
- def fd_loglike_a_eq13(alpha,beta,rho,k)
343
- if rho.abs>0.9999
344
- rho= (rho>0) ? 0.9999 : -0.9999
345
- end
346
- pr=Processor.new(alpha,beta,rho)
347
- total=0
348
- a_k=pr.a(k)
349
- pd=pr.pd
350
- @nc.times do |j|
351
- #puts "j: #{j}"
352
- #puts "b #{j} : #{b.call(j)}"
353
- #puts "b #{j-1} : #{b.call(j-1)}"
354
-
355
- e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
356
- e_2=Distribution::Normal.pdf(a_k)
357
- e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
358
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
359
-
360
- total+= e_1*e_2*e_3
361
- end
362
- total
363
- end
364
- # First derivative for beta_m
365
- # Uses equation(14) from Olsson(1979)
366
- def fd_loglike_b(alpha,beta,rho,m)
367
- if rho.abs>0.9999
368
- rho= (rho>0) ? 0.9999 : -0.9999
369
- end
370
- pr=Processor.new(alpha,beta,rho)
371
- total=0
372
- b_m=pr.b m
373
- pd=pr.pd
374
- @nr.times do |i|
375
- #puts "j: #{j}"
376
- #puts "b #{j} : #{b.call(j)}"
377
- #puts "b #{j-1} : #{b.call(j-1)}"
378
-
379
- e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
380
- e_2=Distribution::Normal.pdf(b_m)
381
- e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
382
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
383
-
384
- total+= e_1*e_2*e_3
385
- end
386
- total
387
- end
207
+
388
208
 
389
209
 
390
210
  def compute_basic_parameters
@@ -442,7 +262,8 @@ module Statsample
442
262
  def compute_two_step_mle_drasgow_ruby #:nodoc:
443
263
 
444
264
  f=proc {|rho|
445
- loglike(@alpha,@beta, rho)
265
+ pr=Processor.new(@alpha,@beta, rho, @matrix)
266
+ pr.loglike
446
267
  }
447
268
  @log=_("Minimizing using GSL Brent method\n")
448
269
  min=Minimization::Brent.new(-0.9999,0.9999,f)
@@ -459,8 +280,9 @@ module Statsample
459
280
 
460
281
  def compute_two_step_mle_drasgow_gsl #:nodoc:
461
282
 
462
- fn1=GSL::Function.alloc {|rho|
463
- loglike(@alpha,@beta, rho)
283
+ fn1=GSL::Function.alloc {|rho|
284
+ pr=Processor.new(@alpha,@beta, rho, @matrix)
285
+ pr.loglike
464
286
  }
465
287
  @iteration = 0
466
288
  max_iter = @max_iterations
@@ -498,12 +320,101 @@ module Statsample
498
320
  @loglike_model=-gmf.f_minimum
499
321
  end
500
322
 
323
+
324
+ def compute_derivatives_vector(v,df)
325
+ new_rho=v[0]
326
+ new_alpha=v[1, @nr-1]
327
+ new_beta=v[@nr, @nc-1]
328
+ if new_rho.abs>0.9999
329
+ new_rho= (new_rho>0) ? 0.9999 : -0.9999
330
+ end
331
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
332
+
333
+ df[0]=-pr.fd_loglike_rho
334
+ new_alpha.to_a.each_with_index {|v,i|
335
+ df[i+1]=-pr.fd_loglike_a(i)
336
+ }
337
+ offset=new_alpha.size+1
338
+ new_beta.to_a.each_with_index {|v,i|
339
+ df[offset+i]=-pr.fd_loglike_b(i)
340
+ }
341
+ end
342
+
343
+ def compute_one_step_mle
344
+ compute_one_step_mle_with_derivatives
345
+ end
346
+
347
+
348
+ def compute_one_step_mle_with_derivatives
349
+ # Get initial values with two-step aproach
350
+ compute_two_step_mle_drasgow
351
+ # Start iteration with past values
352
+ rho=@r
353
+ cut_alpha=@alpha
354
+ cut_beta=@beta
355
+ parameters=[rho]+cut_alpha+cut_beta
356
+ np=@nc-1+@nr
357
+
358
+
359
+ loglike_f = Proc.new { |v, params|
360
+ new_rho=v[0]
361
+ new_alpha=v[1, @nr-1]
362
+ new_beta=v[@nr, @nc-1]
363
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
364
+ pr.loglike
365
+ }
366
+
367
+ loglike_df = Proc.new {|v, params, df |
368
+ compute_derivatives_vector(v,df)
369
+ }
370
+
371
+
372
+ my_func = GSL::MultiMin::Function_fdf.alloc(loglike_f,loglike_df, np)
373
+ my_func.set_params(parameters) # parameters
374
+
375
+ x = GSL::Vector.alloc(parameters.dup)
376
+ minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
377
+ minimizer.set(my_func, x, 1, 1e-3)
378
+
379
+ iter = 0
380
+ message=""
381
+ begin_time=Time.new
382
+ begin
383
+ iter += 1
384
+ status = minimizer.iterate()
385
+ #p minimizer.f
386
+ #p minimizer.gradient
387
+ status = minimizer.test_gradient(1e-3)
388
+ if status == GSL::SUCCESS
389
+ total_time=Time.new-begin_time
390
+ message+="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
391
+ end
392
+ x = minimizer.x
393
+ message+= sprintf("%5d iterations", iter)+"\n";
394
+ message+= "args="
395
+ for i in 0...np do
396
+ message+=sprintf("%10.3e ", x[i])
397
+ end
398
+ message+=sprintf("f() = %7.3f\n" , minimizer.f)+"\n";
399
+ end while status == GSL::CONTINUE and iter < @max_iterations
400
+
401
+ @iteration=iter
402
+ @log+=message
403
+ @r=minimizer.x[0]
404
+ @alpha=minimizer.x[1,@nr-1].to_a
405
+ @beta=minimizer.x[@nr,@nc-1].to_a
406
+ @loglike_model= -minimizer.minimum
407
+
408
+ pr=Processor.new(@alpha,@beta,@r,@matrix)
409
+
410
+ end
411
+
501
412
  # Compute Polychoric correlation with joint estimate.
502
413
  # Rho and thresholds are estimated at same time.
503
414
  # Code based on R package "polycor", by J.Fox.
504
415
  #
505
416
 
506
- def compute_one_step_mle
417
+ def compute_one_step_mle_without_derivatives
507
418
  # Get initial values with two-step aproach
508
419
  compute_two_step_mle_drasgow
509
420
  # Start iteration with past values
@@ -511,10 +422,12 @@ module Statsample
511
422
  cut_alpha=@alpha
512
423
  cut_beta=@beta
513
424
  parameters=[rho]+cut_alpha+cut_beta
425
+ np=@nc-1+@nr
426
+
514
427
  minimization = Proc.new { |v, params|
515
- rho=v[0]
516
- alpha=v[1, @nr-1]
517
- beta=v[@nr, @nc-1]
428
+ new_rho=v[0]
429
+ new_alpha=v[1, @nr-1]
430
+ new_beta=v[@nr, @nc-1]
518
431
 
519
432
  #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
520
433
  #(@nr-1).times {|k|
@@ -525,10 +438,12 @@ module Statsample
525
438
  #(@nc-1).times {|k|
526
439
  # puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
527
440
  #}
441
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
528
442
 
529
- loglike(alpha,beta,rho)
443
+ df=Array.new(np)
444
+ #compute_derivatives_vector(v,df)
445
+ pr.loglike
530
446
  }
531
- np=@nc-1+@nr
532
447
  my_func = GSL::MultiMin::Function.alloc(minimization, np)
533
448
  my_func.set_params(parameters) # parameters
534
449
 
@@ -542,12 +457,14 @@ module Statsample
542
457
 
543
458
  iter = 0
544
459
  message=""
460
+ begin_time=Time.new
545
461
  begin
546
462
  iter += 1
547
463
  status = minimizer.iterate()
548
464
  status = minimizer.test_size(@epsilon)
549
465
  if status == GSL::SUCCESS
550
- message="Joint MLE converged to minimum at\n"
466
+ total_time=Time.new-begin_time
467
+ message="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
551
468
  end
552
469
  x = minimizer.x
553
470
  message+= sprintf("%5d iterations", iter)+"\n";
@@ -830,8 +747,8 @@ module Statsample
830
747
  end # 43
831
748
  raise "Error" if norts==0
832
749
  @r=pcorl
833
-
834
- @loglike_model=-loglike(@alpha, @beta, @r)
750
+ pr=Processor.new(@alpha,@beta,@r,@matrix)
751
+ @loglike_model=-pr.loglike
835
752
 
836
753
  end
837
754
  #Computes vector h(mm7) of orthogonal hermite...
@@ -878,7 +795,8 @@ module Statsample
878
795
  t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
879
796
  }
880
797
  section.add(t)
881
- section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
798
+ section.add(_("Iterations: %d") % @iteration)
799
+ section.add(_("Test of bivariate normality: X^2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
882
800
  generator.parse_element(section)
883
801
  end
884
802
  end
@@ -0,0 +1,261 @@
1
+ module Statsample
2
+ module Bivariate
3
+ class Polychoric
4
+ # Provides statistics for a given combination of rho, alpha and beta and contingence table.
5
+ class Processor
6
+ attr_reader :alpha, :beta, :rho, :matrix
7
+ EPSILON=1e-10
8
+ def initialize(alpha,beta,rho,matrix=nil)
9
+ @alpha=alpha
10
+ @beta=beta
11
+ @matrix=matrix
12
+ @nr=@alpha.size+1
13
+ @nc=@beta.size+1
14
+ @rho=rho
15
+ @pd=nil
16
+ end
17
+
18
+ def bipdf(i,j)
19
+ Distribution::NormalBivariate.pdf(a(i), b(j), rho)
20
+ end
21
+
22
+ def loglike
23
+ rho=@rho
24
+ if rho.abs>0.9999
25
+ rho= (rho>0) ? 0.9999 : -0.9999
26
+ end
27
+ loglike=0
28
+ @nr.times do |i|
29
+ @nc.times do |j|
30
+ res=pd[i][j]+EPSILON
31
+ loglike+= @matrix[i,j] * Math::log( res )
32
+ end
33
+ end
34
+ -loglike
35
+ end
36
+
37
+ def a(i)
38
+ raise "Index #{i} should be <= #{@nr-1}" if i>@nr-1
39
+ i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
40
+ end
41
+ def b(j)
42
+ raise "Index #{j} should be <= #{@nc-1}" if j>@nc-1
43
+ j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
44
+ end
45
+
46
+ def eq12(u,v)
47
+ Distribution::Normal.pdf(u)*Distribution::Normal.cdf((v-rho*u).quo( Math::sqrt(1-rho**2)))
48
+ end
49
+
50
+ def eq12b(u,v)
51
+ Distribution::Normal.pdf(v) * Distribution::Normal.cdf((u-rho*v).quo( Math::sqrt(1-rho**2)))
52
+
53
+ end
54
+ # Equation(8) from Olsson(1979)
55
+ def fd_loglike_cell_rho(i, j)
56
+ bipdf(i,j) - bipdf(i-1,j) - bipdf(i, j-1) + bipdf(i-1, j-1)
57
+ end
58
+ # Equation(10) from Olsson(1979)
59
+ def fd_loglike_cell_a(i, j, k)
60
+ =begin
61
+ if k==i
62
+ Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
63
+ elsif k==(i-1)
64
+ -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
65
+ else
66
+ 0
67
+ end
68
+ =end
69
+ if k==i
70
+ eq12(a(k),b(j))-eq12(a(k), b(j-1))
71
+ elsif k==(i-1)
72
+ -eq12(a(k),b(j))+eq12(a(k), b(j-1))
73
+ else
74
+ 0
75
+ end
76
+ end
77
+
78
+ def fd_loglike_cell_b(i, j, m)
79
+ if m==j
80
+ eq12b(a(i),b(m))-eq12b(a(i-1),b(m))
81
+ elsif m==(j-1)
82
+ -eq12b(a(i),b(m))+eq12b(a(i-1),b(m))
83
+ else
84
+ 0
85
+ end
86
+ =begin
87
+ if m==j
88
+ Distribution::NormalBivariate.pd_cdf_x(a(i),b(m), rho) - Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
89
+ elsif m==(j-1)
90
+ -Distribution::NormalBivariate.pd_cdf_x(a(i),b(m),rho) + Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
91
+ else
92
+ 0
93
+ end
94
+ =end
95
+
96
+
97
+ end
98
+
99
+ # phi_ij for each i and j
100
+ # Uses equation(4) from Olsson(1979)
101
+ def pd
102
+ if @pd.nil?
103
+ @pd=@nr.times.collect{ [0] * @nc}
104
+ pc=@nr.times.collect{ [0] * @nc}
105
+ @nr.times do |i|
106
+ @nc.times do |j|
107
+
108
+ if i==@nr-1 and j==@nc-1
109
+ @pd[i][j]=1.0
110
+ else
111
+ a=(i==@nr-1) ? 100: alpha[i]
112
+ b=(j==@nc-1) ? 100: beta[j]
113
+ #puts "a:#{a} b:#{b}"
114
+ @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
115
+ end
116
+ pc[i][j] = @pd[i][j]
117
+ @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
118
+ @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
119
+ @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
120
+ end
121
+ end
122
+ end
123
+ @pd
124
+ end
125
+
126
+ # First derivate for rho
127
+ # Uses equation (9) from Olsson(1979)
128
+
129
+ def fd_loglike_rho
130
+ rho=@rho
131
+ if rho.abs>0.9999
132
+ rho= (rho>0) ? 0.9999 : -0.9999
133
+ end
134
+ total=0
135
+ @nr.times do |i|
136
+ @nc.times do |j|
137
+ pi=pd[i][j] + EPSILON
138
+ total+= (@matrix[i,j].quo(pi)) * (bipdf(i,j)-bipdf(i-1,j)-bipdf(i,j-1)+bipdf(i-1,j-1))
139
+ end
140
+ end
141
+ total
142
+ end
143
+
144
+ # First derivative for alpha_k
145
+ # Uses equation (6)
146
+ def fd_loglike_a(k)
147
+ fd_loglike_a_eq6(k)
148
+ end
149
+
150
+
151
+
152
+ # Uses equation (6) from Olsson(1979)
153
+ def fd_loglike_a_eq6(k)
154
+ rho=@rho
155
+ if rho.abs>0.9999
156
+ rho= (rho>0) ? 0.9999 : -0.9999
157
+ end
158
+ total=0
159
+ @nr.times do |i|
160
+ @nc.times do |j|
161
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_a(i,j,k)
162
+ end
163
+ end
164
+ total
165
+ end
166
+
167
+
168
+ # Uses equation(13) from Olsson(1979)
169
+ def fd_loglike_a_eq13(k)
170
+ rho=@rho
171
+ if rho.abs>0.9999
172
+ rho= (rho>0) ? 0.9999 : -0.9999
173
+ end
174
+ total=0
175
+ a_k=a(k)
176
+ @nc.times do |j|
177
+ #puts "j: #{j}"
178
+ #puts "b #{j} : #{b.call(j)}"
179
+ #puts "b #{j-1} : #{b.call(j-1)}"
180
+
181
+ e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
182
+ e_2=Distribution::Normal.pdf(a_k)
183
+ e_3=Distribution::Normal.cdf((b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
184
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
185
+ total+= e_1*e_2*e_3
186
+ end
187
+ total
188
+ end
189
+ # First derivative for b
190
+ # Uses equation 6 (Olsson, 1979)
191
+ def fd_loglike_b_eq6(m)
192
+ rho=@rho
193
+ if rho.abs>0.9999
194
+ rho= (rho>0) ? 0.9999 : -0.9999
195
+ end
196
+ total=0
197
+ @nr.times do |i|
198
+ @nc.times do |j|
199
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_b(i,j,m)
200
+ end
201
+ end
202
+ total
203
+ end
204
+ # First derivative for beta_m.
205
+ # Uses equation 6 (Olsson,1979)
206
+ def fd_loglike_b(m)
207
+ fd_loglike_b_eq14(m)
208
+ end
209
+ # First derivative for beta_m
210
+ # Uses equation(14) from Olsson(1979)
211
+ def fd_loglike_b_eq14(m)
212
+ rho=@rho
213
+ if rho.abs>0.9999
214
+ rho= (rho>0) ? 0.9999 : -0.9999
215
+ end
216
+ total=0
217
+ b_m=b(m)
218
+ @nr.times do |i|
219
+ e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
220
+ e_2=Distribution::Normal.pdf(b_m)
221
+ e_3=Distribution::Normal.cdf((a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
222
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
223
+
224
+ total+= e_1*e_2*e_3
225
+ end
226
+ total
227
+ end
228
+ # Returns the derivative correct according to order
229
+ def im_function(t,i,j)
230
+ if t==0
231
+ fd_loglike_cell_rho(i,j)
232
+ elsif t>=1 and t<=@alpha.size
233
+ fd_loglike_cell_a(i,j,t-1)
234
+ elsif t>=@alpha.size+1 and t<=(@alpha.size+@beta.size)
235
+ fd_loglike_cell_b(i,j,t-@alpha.size-1)
236
+ else
237
+ raise "incorrect #{t}"
238
+ end
239
+ end
240
+ def information_matrix
241
+ total_n=@matrix.total_sum
242
+ vars=@alpha.size+@beta.size+1
243
+ matrix=vars.times.map { vars.times.map {0}}
244
+ vars.times do |m|
245
+ vars.times do |n|
246
+ total=0
247
+ (@nr-1).times do |i|
248
+ (@nc-1).times do |j|
249
+ total+=(1.quo(pd[i][j]+EPSILON)) * im_function(m,i,j) * im_function(n,i,j)
250
+ end
251
+ end
252
+ matrix[m][n]=total_n*total
253
+ end
254
+ end
255
+ m=::Matrix.rows(matrix)
256
+
257
+ end
258
+ end # Processor
259
+ end # Polychoric
260
+ end # Bivariate
261
+ end # Statsample
data/spec/spec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ -f s
3
+ -b
@@ -0,0 +1,47 @@
1
+ $:.unshift(File.dirname(__FILE__)+"/../../")
2
+ require 'spec_helper'
3
+ describe Statsample::Bivariate::Polychoric::Processor do
4
+ before do
5
+ @matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
6
+ @alpha=[-0.027, 1.137]
7
+ @beta=[-0.240, 1.1578]
8
+ @rho=0.420
9
+ @processor=Statsample::Bivariate::Polychoric::Processor.new(@alpha,@beta,@rho,@matrix)
10
+ end
11
+ it "im_function method should return correct values according to index" do
12
+ @processor.im_function(0,0,0).should==@processor.fd_loglike_cell_rho(0,0)
13
+ @processor.im_function(1,0,0).should==@processor.fd_loglike_cell_a(0,0,0)
14
+ @processor.im_function(2,0,0).should==@processor.fd_loglike_cell_a(0,0,1)
15
+ @processor.im_function(3,1,0).should==@processor.fd_loglike_cell_b(1,0,0)
16
+ @processor.im_function(4,0,1).should==@processor.fd_loglike_cell_b(0,1,1)
17
+ lambda {@processor.im_function(5)}.should raise_error
18
+
19
+ end
20
+ it "should return informacion matrix" do
21
+ p @processor.information_matrix.inverse
22
+ end
23
+ it "fd a loglike should be equal usign eq.6 and eq.13" do
24
+ 2.times {|k|
25
+ @processor.fd_loglike_a_eq6(k).should be_close @processor.fd_loglike_a_eq13(k), 1e-10
26
+ }
27
+ end
28
+ it "fd b loglike should be equal usign eq.6 and eq.14" do
29
+ 2.times {|m|
30
+ @processor.fd_loglike_b_eq6(m).should be_close @processor.fd_loglike_b_eq14(m), 1e-10
31
+ }
32
+ end
33
+ it "fd rho should be equal usign fd_loglike_cell_rho and fd_loglike_rho" do
34
+ total=0
35
+
36
+ nr=@alpha.size+1
37
+ nc=@beta.size+1
38
+ nr.times do |i|
39
+ nc.times do |j|
40
+ pi=@processor.pd[i][j] + 1e-10
41
+ total+= (@matrix[i,j].quo(pi)) * @processor.fd_loglike_cell_rho(i,j)
42
+ end
43
+ end
44
+ total.should==@processor.fd_loglike_rho
45
+ end
46
+
47
+ end
@@ -14,13 +14,34 @@ describe "Statsample::Bivariate polychoric extension" do
14
14
  end
15
15
 
16
16
  describe Statsample::Bivariate::Polychoric do
17
+
17
18
  before do
18
19
  matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
19
20
  @poly=Statsample::Bivariate::Polychoric.new(matrix)
20
21
  end
21
22
  it "should have summary.size > 0" do
23
+ @poly.method=:two_step
22
24
  @poly.summary.size.should>0
23
25
  end
26
+ def check_joint
27
+ @poly.r.should be_close(0.4192, 0.0001)
28
+ @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
29
+ @poly.threshold_x[1].should be_close(1.1331, 0.0001)
30
+
31
+ @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
32
+ @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
33
+ @poly.chi_square.should be_close(11.54,0.01)
34
+ end
35
+ it "compute joint estimation (without derivative) using gsl" do
36
+ @poly.compute_one_step_mle_without_derivatives
37
+ check_joint
38
+
39
+ end
40
+ it "compute joint estimation (with derivative) using gsl" do
41
+ @poly.compute_one_step_mle_with_derivatives
42
+ check_joint
43
+ end
44
+
24
45
  def check_two_step
25
46
  @poly.r.should be_close(0.420, 0.001)
26
47
  @poly.threshold_y[0].should be_close(-0.240 ,0.001)
@@ -32,37 +53,24 @@ describe Statsample::Bivariate::Polychoric do
32
53
  @poly.compute_two_step_mle_drasgow_ruby
33
54
  check_two_step
34
55
  end
35
- if Statsample.has_gsl?
36
- it "compute two-step with gsl" do
37
- @poly.compute_two_step_mle_drasgow_gsl
38
- check_two_step
39
- end
40
- it "compute polychoric series using gsl" do
41
- @poly.method=:polychoric_series
42
- @poly.compute
43
-
44
- @poly.r.should be_close(0.556, 0.001)
45
- @poly.threshold_y[0].should be_close(-0.240 ,0.001)
46
- @poly.threshold_x[0].should be_close(-0.027 ,0.001)
47
- @poly.threshold_y[1].should be_close(1.578 ,0.001)
48
- @poly.threshold_x[1].should be_close(1.137 ,0.001)
49
- end
50
- it "compute joint estimation (without derivative) using gsl" do
51
- @poly.method=:joint
52
- @poly.compute
53
- @poly.method.should==:joint
54
- @poly.r.should be_close(0.4192, 0.0001)
55
- @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
56
- @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
57
- @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
58
- @poly.threshold_x[1].should be_close(1.1331, 0.0001)
59
- end
60
- else
61
- it "compute two-step with gsl requires rb-gsl"
62
- it "compute polychoric series requires rb-gsl"
63
- it "compute joint estimation requires rb-gsl"
56
+
57
+ it "compute two-step with gsl" do
58
+ pending("requires rb-gsl") unless Statsample.has_gsl?
59
+ @poly.compute_two_step_mle_drasgow_gsl
60
+ check_two_step
64
61
  end
62
+
63
+ it "compute polychoric series using gsl" do
64
+ pending("requires rb-gsl") unless Statsample.has_gsl?
65
65
 
66
-
66
+ @poly.method=:polychoric_series
67
+ @poly.compute
68
+
69
+ @poly.r.should be_close(0.556, 0.001)
70
+ @poly.threshold_y[0].should be_close(-0.240 ,0.001)
71
+ @poly.threshold_x[0].should be_close(-0.027 ,0.001)
72
+ @poly.threshold_y[1].should be_close(1.578 ,0.001)
73
+ @poly.threshold_x[1].should be_close(1.137 ,0.001)
74
+ end
67
75
 
68
76
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 13
8
- - 2
9
- version: 0.13.2
8
+ - 3
9
+ version: 0.13.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
35
35
  rpP0jjs0
36
36
  -----END CERTIFICATE-----
37
37
 
38
- date: 2010-06-21 00:00:00 -04:00
38
+ date: 2010-06-22 00:00:00 -04:00
39
39
  default_executable:
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
@@ -89,8 +89,11 @@ files:
89
89
  - data/tetmat_test.txt
90
90
  - lib/statsample/bivariate/extension_version.rb
91
91
  - lib/statsample/bivariate/polychoric.rb
92
+ - lib/statsample/bivariate/polychoric/processor.rb
92
93
  - lib/statsample/bivariate/tetrachoric.rb
94
+ - spec/spec.opts
93
95
  - spec/spec_helper.rb
96
+ - spec/statsample/bivariate/polychoric_processor_spec.rb
94
97
  - spec/statsample/bivariate/polychoric_spec.rb
95
98
  - spec/statsample/bivariate/tetrachoric_spec.rb
96
99
  has_rdoc: true
metadata.gz.sig CHANGED
Binary file