statsample-bivariate-extension 0.13.2 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ === 0.13.3 / 2010-06-22
2
+
3
+ * Bug fix on Processor.fd_loglike_cell_rho.
4
+ * Chi square added on spec.
5
+ * Olsson (1979) added on references
6
+ * Derivative version of joint estimate operational
7
+
1
8
  === 0.13.1 / 2010-06-21
2
9
  * Better specs. Bug fix on Manifest.txt
3
10
 
data/Manifest.txt CHANGED
@@ -6,7 +6,10 @@ data/tetmat_matrix.txt
6
6
  data/tetmat_test.txt
7
7
  lib/statsample/bivariate/extension_version.rb
8
8
  lib/statsample/bivariate/polychoric.rb
9
+ lib/statsample/bivariate/polychoric/processor.rb
9
10
  lib/statsample/bivariate/tetrachoric.rb
11
+ spec/spec.opts
10
12
  spec/spec_helper.rb
13
+ spec/statsample/bivariate/polychoric_processor_spec.rb
11
14
  spec/statsample/bivariate/polychoric_spec.rb
12
15
  spec/statsample/bivariate/tetrachoric_spec.rb
data/Rakefile CHANGED
@@ -2,6 +2,7 @@
2
2
  # -*- ruby -*-
3
3
 
4
4
  require 'rubygems'
5
+ require 'spec/rake/spectask'
5
6
  require 'hoe'
6
7
 
7
8
  Hoe.plugin :git
@@ -15,4 +16,13 @@ Hoe.spec 'statsample-bivariate-extension' do
15
16
  self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
16
17
  end
17
18
 
19
+
20
+
21
+ desc "Run all spec with RCov"
22
+ Spec::Rake::SpecTask.new('test_with_rcov') do |t|
23
+ t.spec_files = FileList['spec/**/*.rb']
24
+ t.rcov = true
25
+ t.rcov_opts = ['--exclude', 'spec']
26
+ end
27
+
18
28
  # vim: syntax=ruby
@@ -1,6 +1,6 @@
1
1
  module Statsample
2
2
  module Bivariate
3
- EXTENSION_VERSION="0.13.2"
3
+ EXTENSION_VERSION="0.13.3"
4
4
  end
5
5
  end
6
6
 
@@ -1,4 +1,5 @@
1
1
  require 'minimization'
2
+ require 'statsample/bivariate/polychoric/processor'
2
3
  module Statsample
3
4
  module Bivariate
4
5
  # Calculate Polychoric correlation for two vectors.
@@ -44,11 +45,11 @@ module Statsample
44
45
  # the polychoric correlation:
45
46
  #
46
47
  # 1. Maximum Likehood Estimator
47
- # 2. Two-step estimator and
48
+ # 2. Two-step estimator
48
49
  # 3. Polychoric series estimate.
49
50
  #
50
51
  # By default, two-step estimation are used. You can select
51
- # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
52
+ # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
52
53
  #
53
54
  # == Use
54
55
  #
@@ -71,69 +72,13 @@ module Statsample
71
72
  #
72
73
  # == References
73
74
  #
74
- # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
75
75
  # * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
76
+ # * Olsson, U. (1979) Maximum likelihood estimation of the polychoric correlation coefficient. Psychometrika 44, 443-460.
77
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
78
+
76
79
 
77
80
  class Polychoric
78
81
  include Summarizable
79
- class Processor
80
- attr_reader :alpha, :beta, :rho
81
- def initialize(alpha,beta,rho)
82
- @alpha=alpha
83
- @beta=beta
84
- @nr=@alpha.size+1
85
- @nc=@beta.size+1
86
- @rho=rho
87
- @pd=nil
88
- end
89
- def bipdf(i,j)
90
- Distribution::NormalBivariate.pdf(a(i), b(j), rho)
91
- end
92
- def a(i)
93
- i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
94
- end
95
- def b(j)
96
- j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
97
- end
98
- # Equation(10) from Olsson(1979)
99
- def fd_loglike_cell_a(i,j,k)
100
- if k==i
101
- Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
102
- elsif k==(i-1)
103
- -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
104
- else
105
- 0
106
- end
107
-
108
- end
109
- # phi_ij for each i and j
110
- # Uses equation(4) from Olsson(1979)
111
- def pd
112
- if @pd.nil?
113
- @pd=@nr.times.collect{ [0] * @nc}
114
- pc=@nr.times.collect{ [0] * @nc}
115
- @nr.times do |i|
116
- @nc.times do |j|
117
-
118
- if i==@nr-1 and j==@nc-1
119
- @pd[i][j]=1.0
120
- else
121
- a=(i==@nr-1) ? 100: alpha[i]
122
- b=(j==@nc-1) ? 100: beta[j]
123
- #puts "a:#{a} b:#{b}"
124
- @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
125
- end
126
- pc[i][j] = @pd[i][j]
127
- @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
128
- @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
129
- @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
130
- end
131
- end
132
- end
133
- @pd
134
- end
135
- end
136
-
137
82
  include DirtyMemoize
138
83
  # Name of the analysis
139
84
  attr_accessor :name
@@ -258,133 +203,8 @@ module Statsample
258
203
  def chi_square_df
259
204
  (@nr*@nc)-@nc-@nr
260
205
  end
261
-
262
-
263
-
264
-
265
- # Retrieve all cell probabilities for givens alpha, beta and rho
266
- def cell_probabilities(alpha,beta,rho)
267
- pd=@nr.times.collect{ [0] * @nc}
268
- pc=@nr.times.collect{ [0] * @nc}
269
- @nr.times do |i|
270
- @nc.times do |j|
271
-
272
- if i==@nr-1 and j==@nc-1
273
- pd[i][j]=1.0
274
- else
275
- a=(i==@nr-1) ? 100: alpha[i]
276
- b=(j==@nc-1) ? 100: beta[j]
277
- #puts "a:#{a} b:#{b}"
278
- pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
279
- end
280
- pc[i][j] = pd[i][j]
281
- pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
282
- pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
283
- pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
284
- end
285
- end
286
- @pd=pd
287
- pd
288
- end
289
- def loglike(alpha,beta,rho)
290
- if rho.abs>0.9999
291
- rho= (rho>0) ? 0.9999 : -0.9999
292
- end
293
- pr=Processor.new(alpha,beta,rho)
294
- loglike=0
295
-
296
-
297
- @nr.times do |i|
298
- @nc.times do |j|
299
- res=pr.pd[i][j]+EPSILON
300
- loglike+= @matrix[i,j] * Math::log( res )
301
- end
302
- end
303
- -loglike
304
- end
305
- # First derivate for rho
306
- # Uses equation (9) from Olsson(1979)
307
- def fd_loglike_rho(alpha,beta,rho)
308
- if rho.abs>0.9999
309
- rho= (rho>0) ? 0.9999 : -0.9999
310
- end
311
- total=0
312
- pr=Processor.new(alpha,beta,rho)
313
- @nr.times do |i|
314
- @nc.times do |j|
315
- pi=pr.pd[i][j] + EPSILON
316
- total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
317
- end
318
- end
319
- total
320
- end
321
206
 
322
- # First derivative for alpha_k
323
- def fd_loglike_a(alpha,beta,rho,k)
324
- fd_loglike_a_eq6(alpha,beta,rho,k)
325
- end
326
- # Uses equation (6) from Olsson(1979)
327
- def fd_loglike_a_eq6(alpha,beta,rho,k)
328
- if rho.abs>0.9999
329
- rho= (rho>0) ? 0.9999 : -0.9999
330
- end
331
- pr=Processor.new(alpha,beta,rho)
332
- total=0
333
- pd=pr.pd
334
- @nr.times do |i|
335
- @nc.times do |j|
336
- total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
337
- end
338
- end
339
- total
340
- end
341
- # Uses equation(13) from Olsson(1979)
342
- def fd_loglike_a_eq13(alpha,beta,rho,k)
343
- if rho.abs>0.9999
344
- rho= (rho>0) ? 0.9999 : -0.9999
345
- end
346
- pr=Processor.new(alpha,beta,rho)
347
- total=0
348
- a_k=pr.a(k)
349
- pd=pr.pd
350
- @nc.times do |j|
351
- #puts "j: #{j}"
352
- #puts "b #{j} : #{b.call(j)}"
353
- #puts "b #{j-1} : #{b.call(j-1)}"
354
-
355
- e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
356
- e_2=Distribution::Normal.pdf(a_k)
357
- e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
358
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
359
-
360
- total+= e_1*e_2*e_3
361
- end
362
- total
363
- end
364
- # First derivative for beta_m
365
- # Uses equation(14) from Olsson(1979)
366
- def fd_loglike_b(alpha,beta,rho,m)
367
- if rho.abs>0.9999
368
- rho= (rho>0) ? 0.9999 : -0.9999
369
- end
370
- pr=Processor.new(alpha,beta,rho)
371
- total=0
372
- b_m=pr.b m
373
- pd=pr.pd
374
- @nr.times do |i|
375
- #puts "j: #{j}"
376
- #puts "b #{j} : #{b.call(j)}"
377
- #puts "b #{j-1} : #{b.call(j-1)}"
378
-
379
- e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
380
- e_2=Distribution::Normal.pdf(b_m)
381
- e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
382
- #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
383
-
384
- total+= e_1*e_2*e_3
385
- end
386
- total
387
- end
207
+
388
208
 
389
209
 
390
210
  def compute_basic_parameters
@@ -442,7 +262,8 @@ module Statsample
442
262
  def compute_two_step_mle_drasgow_ruby #:nodoc:
443
263
 
444
264
  f=proc {|rho|
445
- loglike(@alpha,@beta, rho)
265
+ pr=Processor.new(@alpha,@beta, rho, @matrix)
266
+ pr.loglike
446
267
  }
447
268
  @log=_("Minimizing using GSL Brent method\n")
448
269
  min=Minimization::Brent.new(-0.9999,0.9999,f)
@@ -459,8 +280,9 @@ module Statsample
459
280
 
460
281
  def compute_two_step_mle_drasgow_gsl #:nodoc:
461
282
 
462
- fn1=GSL::Function.alloc {|rho|
463
- loglike(@alpha,@beta, rho)
283
+ fn1=GSL::Function.alloc {|rho|
284
+ pr=Processor.new(@alpha,@beta, rho, @matrix)
285
+ pr.loglike
464
286
  }
465
287
  @iteration = 0
466
288
  max_iter = @max_iterations
@@ -498,12 +320,101 @@ module Statsample
498
320
  @loglike_model=-gmf.f_minimum
499
321
  end
500
322
 
323
+
324
+ def compute_derivatives_vector(v,df)
325
+ new_rho=v[0]
326
+ new_alpha=v[1, @nr-1]
327
+ new_beta=v[@nr, @nc-1]
328
+ if new_rho.abs>0.9999
329
+ new_rho= (new_rho>0) ? 0.9999 : -0.9999
330
+ end
331
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
332
+
333
+ df[0]=-pr.fd_loglike_rho
334
+ new_alpha.to_a.each_with_index {|v,i|
335
+ df[i+1]=-pr.fd_loglike_a(i)
336
+ }
337
+ offset=new_alpha.size+1
338
+ new_beta.to_a.each_with_index {|v,i|
339
+ df[offset+i]=-pr.fd_loglike_b(i)
340
+ }
341
+ end
342
+
343
+ def compute_one_step_mle
344
+ compute_one_step_mle_with_derivatives
345
+ end
346
+
347
+
348
+ def compute_one_step_mle_with_derivatives
349
+ # Get initial values with two-step aproach
350
+ compute_two_step_mle_drasgow
351
+ # Start iteration with past values
352
+ rho=@r
353
+ cut_alpha=@alpha
354
+ cut_beta=@beta
355
+ parameters=[rho]+cut_alpha+cut_beta
356
+ np=@nc-1+@nr
357
+
358
+
359
+ loglike_f = Proc.new { |v, params|
360
+ new_rho=v[0]
361
+ new_alpha=v[1, @nr-1]
362
+ new_beta=v[@nr, @nc-1]
363
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
364
+ pr.loglike
365
+ }
366
+
367
+ loglike_df = Proc.new {|v, params, df |
368
+ compute_derivatives_vector(v,df)
369
+ }
370
+
371
+
372
+ my_func = GSL::MultiMin::Function_fdf.alloc(loglike_f,loglike_df, np)
373
+ my_func.set_params(parameters) # parameters
374
+
375
+ x = GSL::Vector.alloc(parameters.dup)
376
+ minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
377
+ minimizer.set(my_func, x, 1, 1e-3)
378
+
379
+ iter = 0
380
+ message=""
381
+ begin_time=Time.new
382
+ begin
383
+ iter += 1
384
+ status = minimizer.iterate()
385
+ #p minimizer.f
386
+ #p minimizer.gradient
387
+ status = minimizer.test_gradient(1e-3)
388
+ if status == GSL::SUCCESS
389
+ total_time=Time.new-begin_time
390
+ message+="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
391
+ end
392
+ x = minimizer.x
393
+ message+= sprintf("%5d iterations", iter)+"\n";
394
+ message+= "args="
395
+ for i in 0...np do
396
+ message+=sprintf("%10.3e ", x[i])
397
+ end
398
+ message+=sprintf("f() = %7.3f\n" , minimizer.f)+"\n";
399
+ end while status == GSL::CONTINUE and iter < @max_iterations
400
+
401
+ @iteration=iter
402
+ @log+=message
403
+ @r=minimizer.x[0]
404
+ @alpha=minimizer.x[1,@nr-1].to_a
405
+ @beta=minimizer.x[@nr,@nc-1].to_a
406
+ @loglike_model= -minimizer.minimum
407
+
408
+ pr=Processor.new(@alpha,@beta,@r,@matrix)
409
+
410
+ end
411
+
501
412
  # Compute Polychoric correlation with joint estimate.
502
413
  # Rho and thresholds are estimated at same time.
503
414
  # Code based on R package "polycor", by J.Fox.
504
415
  #
505
416
 
506
- def compute_one_step_mle
417
+ def compute_one_step_mle_without_derivatives
507
418
  # Get initial values with two-step aproach
508
419
  compute_two_step_mle_drasgow
509
420
  # Start iteration with past values
@@ -511,10 +422,12 @@ module Statsample
511
422
  cut_alpha=@alpha
512
423
  cut_beta=@beta
513
424
  parameters=[rho]+cut_alpha+cut_beta
425
+ np=@nc-1+@nr
426
+
514
427
  minimization = Proc.new { |v, params|
515
- rho=v[0]
516
- alpha=v[1, @nr-1]
517
- beta=v[@nr, @nc-1]
428
+ new_rho=v[0]
429
+ new_alpha=v[1, @nr-1]
430
+ new_beta=v[@nr, @nc-1]
518
431
 
519
432
  #puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
520
433
  #(@nr-1).times {|k|
@@ -525,10 +438,12 @@ module Statsample
525
438
  #(@nc-1).times {|k|
526
439
  # puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
527
440
  #}
441
+ pr=Processor.new(new_alpha,new_beta,new_rho,@matrix)
528
442
 
529
- loglike(alpha,beta,rho)
443
+ df=Array.new(np)
444
+ #compute_derivatives_vector(v,df)
445
+ pr.loglike
530
446
  }
531
- np=@nc-1+@nr
532
447
  my_func = GSL::MultiMin::Function.alloc(minimization, np)
533
448
  my_func.set_params(parameters) # parameters
534
449
 
@@ -542,12 +457,14 @@ module Statsample
542
457
 
543
458
  iter = 0
544
459
  message=""
460
+ begin_time=Time.new
545
461
  begin
546
462
  iter += 1
547
463
  status = minimizer.iterate()
548
464
  status = minimizer.test_size(@epsilon)
549
465
  if status == GSL::SUCCESS
550
- message="Joint MLE converged to minimum at\n"
466
+ total_time=Time.new-begin_time
467
+ message="Joint MLE converged to minimum on %0.3f seconds at\n" % total_time
551
468
  end
552
469
  x = minimizer.x
553
470
  message+= sprintf("%5d iterations", iter)+"\n";
@@ -830,8 +747,8 @@ module Statsample
830
747
  end # 43
831
748
  raise "Error" if norts==0
832
749
  @r=pcorl
833
-
834
- @loglike_model=-loglike(@alpha, @beta, @r)
750
+ pr=Processor.new(@alpha,@beta,@r,@matrix)
751
+ @loglike_model=-pr.loglike
835
752
 
836
753
  end
837
754
  #Computes vector h(mm7) of orthogonal hermite...
@@ -878,7 +795,8 @@ module Statsample
878
795
  t.row([_("Threshold Y %d") % i, sprintf("%0.4f", val)])
879
796
  }
880
797
  section.add(t)
881
- section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
798
+ section.add(_("Iterations: %d") % @iteration)
799
+ section.add(_("Test of bivariate normality: X^2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
882
800
  generator.parse_element(section)
883
801
  end
884
802
  end
@@ -0,0 +1,261 @@
1
+ module Statsample
2
+ module Bivariate
3
+ class Polychoric
4
+ # Provides statistics for a given combination of rho, alpha and beta and contingence table.
5
+ class Processor
6
+ attr_reader :alpha, :beta, :rho, :matrix
7
+ EPSILON=1e-10
8
+ def initialize(alpha,beta,rho,matrix=nil)
9
+ @alpha=alpha
10
+ @beta=beta
11
+ @matrix=matrix
12
+ @nr=@alpha.size+1
13
+ @nc=@beta.size+1
14
+ @rho=rho
15
+ @pd=nil
16
+ end
17
+
18
+ def bipdf(i,j)
19
+ Distribution::NormalBivariate.pdf(a(i), b(j), rho)
20
+ end
21
+
22
+ def loglike
23
+ rho=@rho
24
+ if rho.abs>0.9999
25
+ rho= (rho>0) ? 0.9999 : -0.9999
26
+ end
27
+ loglike=0
28
+ @nr.times do |i|
29
+ @nc.times do |j|
30
+ res=pd[i][j]+EPSILON
31
+ loglike+= @matrix[i,j] * Math::log( res )
32
+ end
33
+ end
34
+ -loglike
35
+ end
36
+
37
+ def a(i)
38
+ raise "Index #{i} should be <= #{@nr-1}" if i>@nr-1
39
+ i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
40
+ end
41
+ def b(j)
42
+ raise "Index #{j} should be <= #{@nc-1}" if j>@nc-1
43
+ j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
44
+ end
45
+
46
+ def eq12(u,v)
47
+ Distribution::Normal.pdf(u)*Distribution::Normal.cdf((v-rho*u).quo( Math::sqrt(1-rho**2)))
48
+ end
49
+
50
+ def eq12b(u,v)
51
+ Distribution::Normal.pdf(v) * Distribution::Normal.cdf((u-rho*v).quo( Math::sqrt(1-rho**2)))
52
+
53
+ end
54
+ # Equation(8) from Olsson(1979)
55
+ def fd_loglike_cell_rho(i, j)
56
+ bipdf(i,j) - bipdf(i-1,j) - bipdf(i, j-1) + bipdf(i-1, j-1)
57
+ end
58
+ # Equation(10) from Olsson(1979)
59
+ def fd_loglike_cell_a(i, j, k)
60
+ =begin
61
+ if k==i
62
+ Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
63
+ elsif k==(i-1)
64
+ -Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
65
+ else
66
+ 0
67
+ end
68
+ =end
69
+ if k==i
70
+ eq12(a(k),b(j))-eq12(a(k), b(j-1))
71
+ elsif k==(i-1)
72
+ -eq12(a(k),b(j))+eq12(a(k), b(j-1))
73
+ else
74
+ 0
75
+ end
76
+ end
77
+
78
+ def fd_loglike_cell_b(i, j, m)
79
+ if m==j
80
+ eq12b(a(i),b(m))-eq12b(a(i-1),b(m))
81
+ elsif m==(j-1)
82
+ -eq12b(a(i),b(m))+eq12b(a(i-1),b(m))
83
+ else
84
+ 0
85
+ end
86
+ =begin
87
+ if m==j
88
+ Distribution::NormalBivariate.pd_cdf_x(a(i),b(m), rho) - Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
89
+ elsif m==(j-1)
90
+ -Distribution::NormalBivariate.pd_cdf_x(a(i),b(m),rho) + Distribution::NormalBivariate.pd_cdf_x(a(i-1),b(m),rho)
91
+ else
92
+ 0
93
+ end
94
+ =end
95
+
96
+
97
+ end
98
+
99
+ # phi_ij for each i and j
100
+ # Uses equation(4) from Olsson(1979)
101
+ def pd
102
+ if @pd.nil?
103
+ @pd=@nr.times.collect{ [0] * @nc}
104
+ pc=@nr.times.collect{ [0] * @nc}
105
+ @nr.times do |i|
106
+ @nc.times do |j|
107
+
108
+ if i==@nr-1 and j==@nc-1
109
+ @pd[i][j]=1.0
110
+ else
111
+ a=(i==@nr-1) ? 100: alpha[i]
112
+ b=(j==@nc-1) ? 100: beta[j]
113
+ #puts "a:#{a} b:#{b}"
114
+ @pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
115
+ end
116
+ pc[i][j] = @pd[i][j]
117
+ @pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
118
+ @pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
119
+ @pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
120
+ end
121
+ end
122
+ end
123
+ @pd
124
+ end
125
+
126
+ # First derivate for rho
127
+ # Uses equation (9) from Olsson(1979)
128
+
129
+ def fd_loglike_rho
130
+ rho=@rho
131
+ if rho.abs>0.9999
132
+ rho= (rho>0) ? 0.9999 : -0.9999
133
+ end
134
+ total=0
135
+ @nr.times do |i|
136
+ @nc.times do |j|
137
+ pi=pd[i][j] + EPSILON
138
+ total+= (@matrix[i,j].quo(pi)) * (bipdf(i,j)-bipdf(i-1,j)-bipdf(i,j-1)+bipdf(i-1,j-1))
139
+ end
140
+ end
141
+ total
142
+ end
143
+
144
+ # First derivative for alpha_k
145
+ # Uses equation (6)
146
+ def fd_loglike_a(k)
147
+ fd_loglike_a_eq6(k)
148
+ end
149
+
150
+
151
+
152
+ # Uses equation (6) from Olsson(1979)
153
+ def fd_loglike_a_eq6(k)
154
+ rho=@rho
155
+ if rho.abs>0.9999
156
+ rho= (rho>0) ? 0.9999 : -0.9999
157
+ end
158
+ total=0
159
+ @nr.times do |i|
160
+ @nc.times do |j|
161
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_a(i,j,k)
162
+ end
163
+ end
164
+ total
165
+ end
166
+
167
+
168
+ # Uses equation(13) from Olsson(1979)
169
+ def fd_loglike_a_eq13(k)
170
+ rho=@rho
171
+ if rho.abs>0.9999
172
+ rho= (rho>0) ? 0.9999 : -0.9999
173
+ end
174
+ total=0
175
+ a_k=a(k)
176
+ @nc.times do |j|
177
+ #puts "j: #{j}"
178
+ #puts "b #{j} : #{b.call(j)}"
179
+ #puts "b #{j-1} : #{b.call(j-1)}"
180
+
181
+ e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
182
+ e_2=Distribution::Normal.pdf(a_k)
183
+ e_3=Distribution::Normal.cdf((b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
184
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
185
+ total+= e_1*e_2*e_3
186
+ end
187
+ total
188
+ end
189
+ # First derivative for b
190
+ # Uses equation 6 (Olsson, 1979)
191
+ def fd_loglike_b_eq6(m)
192
+ rho=@rho
193
+ if rho.abs>0.9999
194
+ rho= (rho>0) ? 0.9999 : -0.9999
195
+ end
196
+ total=0
197
+ @nr.times do |i|
198
+ @nc.times do |j|
199
+ total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * fd_loglike_cell_b(i,j,m)
200
+ end
201
+ end
202
+ total
203
+ end
204
+ # First derivative for beta_m.
205
+ # Uses equation 6 (Olsson,1979)
206
+ def fd_loglike_b(m)
207
+ fd_loglike_b_eq14(m)
208
+ end
209
+ # First derivative for beta_m
210
+ # Uses equation(14) from Olsson(1979)
211
+ def fd_loglike_b_eq14(m)
212
+ rho=@rho
213
+ if rho.abs>0.9999
214
+ rho= (rho>0) ? 0.9999 : -0.9999
215
+ end
216
+ total=0
217
+ b_m=b(m)
218
+ @nr.times do |i|
219
+ e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
220
+ e_2=Distribution::Normal.pdf(b_m)
221
+ e_3=Distribution::Normal.cdf((a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
222
+ #puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
223
+
224
+ total+= e_1*e_2*e_3
225
+ end
226
+ total
227
+ end
228
+ # Returns the derivative correct according to order
229
+ def im_function(t,i,j)
230
+ if t==0
231
+ fd_loglike_cell_rho(i,j)
232
+ elsif t>=1 and t<=@alpha.size
233
+ fd_loglike_cell_a(i,j,t-1)
234
+ elsif t>=@alpha.size+1 and t<=(@alpha.size+@beta.size)
235
+ fd_loglike_cell_b(i,j,t-@alpha.size-1)
236
+ else
237
+ raise "incorrect #{t}"
238
+ end
239
+ end
240
+ def information_matrix
241
+ total_n=@matrix.total_sum
242
+ vars=@alpha.size+@beta.size+1
243
+ matrix=vars.times.map { vars.times.map {0}}
244
+ vars.times do |m|
245
+ vars.times do |n|
246
+ total=0
247
+ (@nr-1).times do |i|
248
+ (@nc-1).times do |j|
249
+ total+=(1.quo(pd[i][j]+EPSILON)) * im_function(m,i,j) * im_function(n,i,j)
250
+ end
251
+ end
252
+ matrix[m][n]=total_n*total
253
+ end
254
+ end
255
+ m=::Matrix.rows(matrix)
256
+
257
+ end
258
+ end # Processor
259
+ end # Polychoric
260
+ end # Bivariate
261
+ end # Statsample
data/spec/spec.opts ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ -f s
3
+ -b
@@ -0,0 +1,47 @@
1
+ $:.unshift(File.dirname(__FILE__)+"/../../")
2
+ require 'spec_helper'
3
+ describe Statsample::Bivariate::Polychoric::Processor do
4
+ before do
5
+ @matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
6
+ @alpha=[-0.027, 1.137]
7
+ @beta=[-0.240, 1.1578]
8
+ @rho=0.420
9
+ @processor=Statsample::Bivariate::Polychoric::Processor.new(@alpha,@beta,@rho,@matrix)
10
+ end
11
+ it "im_function method should return correct values according to index" do
12
+ @processor.im_function(0,0,0).should==@processor.fd_loglike_cell_rho(0,0)
13
+ @processor.im_function(1,0,0).should==@processor.fd_loglike_cell_a(0,0,0)
14
+ @processor.im_function(2,0,0).should==@processor.fd_loglike_cell_a(0,0,1)
15
+ @processor.im_function(3,1,0).should==@processor.fd_loglike_cell_b(1,0,0)
16
+ @processor.im_function(4,0,1).should==@processor.fd_loglike_cell_b(0,1,1)
17
+ lambda {@processor.im_function(5)}.should raise_error
18
+
19
+ end
20
+ it "should return informacion matrix" do
21
+ p @processor.information_matrix.inverse
22
+ end
23
+ it "fd a loglike should be equal usign eq.6 and eq.13" do
24
+ 2.times {|k|
25
+ @processor.fd_loglike_a_eq6(k).should be_close @processor.fd_loglike_a_eq13(k), 1e-10
26
+ }
27
+ end
28
+ it "fd b loglike should be equal usign eq.6 and eq.14" do
29
+ 2.times {|m|
30
+ @processor.fd_loglike_b_eq6(m).should be_close @processor.fd_loglike_b_eq14(m), 1e-10
31
+ }
32
+ end
33
+ it "fd rho should be equal usign fd_loglike_cell_rho and fd_loglike_rho" do
34
+ total=0
35
+
36
+ nr=@alpha.size+1
37
+ nc=@beta.size+1
38
+ nr.times do |i|
39
+ nc.times do |j|
40
+ pi=@processor.pd[i][j] + 1e-10
41
+ total+= (@matrix[i,j].quo(pi)) * @processor.fd_loglike_cell_rho(i,j)
42
+ end
43
+ end
44
+ total.should==@processor.fd_loglike_rho
45
+ end
46
+
47
+ end
@@ -14,13 +14,34 @@ describe "Statsample::Bivariate polychoric extension" do
14
14
  end
15
15
 
16
16
  describe Statsample::Bivariate::Polychoric do
17
+
17
18
  before do
18
19
  matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
19
20
  @poly=Statsample::Bivariate::Polychoric.new(matrix)
20
21
  end
21
22
  it "should have summary.size > 0" do
23
+ @poly.method=:two_step
22
24
  @poly.summary.size.should>0
23
25
  end
26
+ def check_joint
27
+ @poly.r.should be_close(0.4192, 0.0001)
28
+ @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
29
+ @poly.threshold_x[1].should be_close(1.1331, 0.0001)
30
+
31
+ @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
32
+ @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
33
+ @poly.chi_square.should be_close(11.54,0.01)
34
+ end
35
+ it "compute joint estimation (without derivative) using gsl" do
36
+ @poly.compute_one_step_mle_without_derivatives
37
+ check_joint
38
+
39
+ end
40
+ it "compute joint estimation (with derivative) using gsl" do
41
+ @poly.compute_one_step_mle_with_derivatives
42
+ check_joint
43
+ end
44
+
24
45
  def check_two_step
25
46
  @poly.r.should be_close(0.420, 0.001)
26
47
  @poly.threshold_y[0].should be_close(-0.240 ,0.001)
@@ -32,37 +53,24 @@ describe Statsample::Bivariate::Polychoric do
32
53
  @poly.compute_two_step_mle_drasgow_ruby
33
54
  check_two_step
34
55
  end
35
- if Statsample.has_gsl?
36
- it "compute two-step with gsl" do
37
- @poly.compute_two_step_mle_drasgow_gsl
38
- check_two_step
39
- end
40
- it "compute polychoric series using gsl" do
41
- @poly.method=:polychoric_series
42
- @poly.compute
43
-
44
- @poly.r.should be_close(0.556, 0.001)
45
- @poly.threshold_y[0].should be_close(-0.240 ,0.001)
46
- @poly.threshold_x[0].should be_close(-0.027 ,0.001)
47
- @poly.threshold_y[1].should be_close(1.578 ,0.001)
48
- @poly.threshold_x[1].should be_close(1.137 ,0.001)
49
- end
50
- it "compute joint estimation (without derivative) using gsl" do
51
- @poly.method=:joint
52
- @poly.compute
53
- @poly.method.should==:joint
54
- @poly.r.should be_close(0.4192, 0.0001)
55
- @poly.threshold_y[0].should be_close(-0.2421, 0.0001)
56
- @poly.threshold_x[0].should be_close(-0.0297, 0.0001)
57
- @poly.threshold_y[1].should be_close(1.5938 ,0.0001)
58
- @poly.threshold_x[1].should be_close(1.1331, 0.0001)
59
- end
60
- else
61
- it "compute two-step with gsl requires rb-gsl"
62
- it "compute polychoric series requires rb-gsl"
63
- it "compute joint estimation requires rb-gsl"
56
+
57
+ it "compute two-step with gsl" do
58
+ pending("requires rb-gsl") unless Statsample.has_gsl?
59
+ @poly.compute_two_step_mle_drasgow_gsl
60
+ check_two_step
64
61
  end
62
+
63
+ it "compute polychoric series using gsl" do
64
+ pending("requires rb-gsl") unless Statsample.has_gsl?
65
65
 
66
-
66
+ @poly.method=:polychoric_series
67
+ @poly.compute
68
+
69
+ @poly.r.should be_close(0.556, 0.001)
70
+ @poly.threshold_y[0].should be_close(-0.240 ,0.001)
71
+ @poly.threshold_x[0].should be_close(-0.027 ,0.001)
72
+ @poly.threshold_y[1].should be_close(1.578 ,0.001)
73
+ @poly.threshold_x[1].should be_close(1.137 ,0.001)
74
+ end
67
75
 
68
76
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 13
8
- - 2
9
- version: 0.13.2
8
+ - 3
9
+ version: 0.13.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
35
35
  rpP0jjs0
36
36
  -----END CERTIFICATE-----
37
37
 
38
- date: 2010-06-21 00:00:00 -04:00
38
+ date: 2010-06-22 00:00:00 -04:00
39
39
  default_executable:
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
@@ -89,8 +89,11 @@ files:
89
89
  - data/tetmat_test.txt
90
90
  - lib/statsample/bivariate/extension_version.rb
91
91
  - lib/statsample/bivariate/polychoric.rb
92
+ - lib/statsample/bivariate/polychoric/processor.rb
92
93
  - lib/statsample/bivariate/tetrachoric.rb
94
+ - spec/spec.opts
93
95
  - spec/spec_helper.rb
96
+ - spec/statsample/bivariate/polychoric_processor_spec.rb
94
97
  - spec/statsample/bivariate/polychoric_spec.rb
95
98
  - spec/statsample/bivariate/tetrachoric_spec.rb
96
99
  has_rdoc: true
metadata.gz.sig CHANGED
Binary file