statsample 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +2 -1
- data/History.txt +11 -0
- data/Manifest.txt +2 -3
- data/README.txt +0 -17
- data/Rakefile +10 -9
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/examples/principal_axis.rb +2 -0
- data/examples/u_test.rb +8 -0
- data/lib/distribution.rb +1 -1
- data/lib/statsample.rb +12 -12
- data/lib/statsample/anova/oneway.rb +4 -4
- data/lib/statsample/bivariate.rb +10 -3
- data/lib/statsample/bivariate/pearson.rb +55 -0
- data/lib/statsample/dataset.rb +57 -49
- data/lib/statsample/dominanceanalysis.rb +1 -2
- data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
- data/lib/statsample/factor.rb +0 -1
- data/lib/statsample/factor/parallelanalysis.rb +9 -13
- data/lib/statsample/factor/pca.rb +5 -10
- data/lib/statsample/factor/principalaxis.rb +27 -33
- data/lib/statsample/matrix.rb +11 -11
- data/lib/statsample/mle.rb +0 -1
- data/lib/statsample/regression.rb +0 -1
- data/lib/statsample/reliability.rb +2 -2
- data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
- data/lib/statsample/reliability/scaleanalysis.rb +5 -6
- data/lib/statsample/test/f.rb +2 -5
- data/lib/statsample/test/levene.rb +2 -5
- data/lib/statsample/test/t.rb +4 -13
- data/lib/statsample/test/umannwhitney.rb +19 -19
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +304 -111
- data/po/statsample.pot +224 -90
- data/test/test_bivariate.rb +8 -69
- data/test/test_reliability.rb +3 -4
- metadata +30 -18
- metadata.gz.sig +0 -0
- data/lib/statsample/bivariate/polychoric.rb +0 -893
- data/lib/statsample/bivariate/tetrachoric.rb +0 -457
- data/test/test_bivariate_polychoric.rb +0 -70
data/test/test_bivariate.rb
CHANGED
@@ -39,76 +39,15 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
39
39
|
# Test ruby method
|
40
40
|
v3a,v4a=Statsample.only_valid v3, v4
|
41
41
|
assert_in_delta(0.525, Statsample::Bivariate.pearson_slow(v3a,v4a),0.001)
|
42
|
-
|
43
|
-
end
|
44
|
-
def test_tetrachoric_matrix
|
45
|
-
ds=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_test.txt", %w{a b c d e})
|
46
|
-
tcm_obs=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
|
47
|
-
tcm_exp=Statsample::PlainText.read(File.dirname(__FILE__)+"/../data/tetmat_matrix.txt", %w{a b c d e}).to_matrix
|
48
|
-
tcm_obs.row_size.times do |i|
|
49
|
-
tcm_obs.column_size do |j|
|
50
|
-
assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
|
51
|
-
end
|
52
|
-
end
|
53
42
|
end
|
54
|
-
def
|
55
|
-
2.
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
if Statsample.has_gsl?
|
63
|
-
poly.compute_two_step_mle_drasgow_gsl
|
64
|
-
assert_in_delta(tetra.r,poly.r,0.0001)
|
65
|
-
else
|
66
|
-
skip "compute_two_step_mle_drasgow_gsl not tested (requires GSL)"
|
67
|
-
end
|
68
|
-
}
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_tetrachoric
|
72
|
-
a,b,c,d=0,0,0,0
|
73
|
-
assert_raises RuntimeError do
|
74
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
75
|
-
end
|
76
|
-
a,b,c,d=10,10,0,0
|
77
|
-
assert_raises RuntimeError do
|
78
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
79
|
-
end
|
80
|
-
a,b,c,d=10,0,10,0
|
81
|
-
assert_raises RuntimeError do
|
82
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
83
|
-
end
|
84
|
-
a,b,c,d=10,0,0,10
|
85
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
86
|
-
assert_equal(1,tc.r)
|
87
|
-
assert_equal(0,tc.se)
|
88
|
-
a,b,c,d=0,10,10,0
|
89
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
90
|
-
assert_equal(-1,tc.r)
|
91
|
-
assert_equal(0,tc.se)
|
92
|
-
|
93
|
-
a,b,c,d = 30,40,70,20
|
94
|
-
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
95
|
-
assert_in_delta(-0.53980,tc.r,0.0001)
|
96
|
-
assert_in_delta(0.09940,tc.se,0.0001)
|
97
|
-
assert_in_delta(-0.15731,tc.threshold_x, 0.0001)
|
98
|
-
assert_in_delta(0.31864,tc.threshold_y, 0.0001)
|
99
|
-
|
100
|
-
x=%w{a a a a b b b a b b a a b b}.to_vector
|
101
|
-
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
102
|
-
# crosstab
|
103
|
-
# 0 1
|
104
|
-
# a 4 3
|
105
|
-
# b 2 5
|
106
|
-
a,b,c,d=4,3,2,5
|
107
|
-
tc1 = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
108
|
-
tc2 = Statsample::Bivariate::Tetrachoric.new_with_vectors(x,y)
|
109
|
-
assert_equal(tc1.r,tc2.r)
|
110
|
-
assert_equal(tc1.se,tc2.se)
|
111
|
-
assert(tc.summary)
|
43
|
+
def test_bivariate_pearson
|
44
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
45
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
46
|
+
r=Statsample::Bivariate::Pearson.new(v1,v2)
|
47
|
+
assert_in_delta(0.525,r.r, 0.001)
|
48
|
+
assert_in_delta(Statsample::Bivariate.t_pearson(v1,v2), r.t, 0.001)
|
49
|
+
assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8,:both), r.probability, 0.001)
|
50
|
+
assert(r.summary.size>0)
|
112
51
|
end
|
113
52
|
def test_matrix_correlation
|
114
53
|
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
data/test/test_reliability.rb
CHANGED
@@ -31,8 +31,7 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
|
|
31
31
|
should "method cronbach_alpha_from_n_s2_cov return correct values" do
|
32
32
|
sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
|
33
33
|
vm, cm = sa.variances_mean, sa.covariances_mean
|
34
|
-
assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm,cm), 1e-10
|
35
|
-
|
34
|
+
assert_in_delta(sa.alpha, Statsample::Reliability.cronbach_alpha_from_n_s2_cov(@n_variables, vm,cm), 1e-10)
|
36
35
|
end
|
37
36
|
should "return correct n for desired alpha, covariance and variance" do
|
38
37
|
sa=Statsample::Reliability::ScaleAnalysis.new(@ds)
|
@@ -102,7 +101,6 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
|
|
102
101
|
context Statsample::Reliability::MultiScaleAnalysis do
|
103
102
|
|
104
103
|
setup do
|
105
|
-
|
106
104
|
size=100
|
107
105
|
@scales=4
|
108
106
|
@items_per_scale=10
|
@@ -132,7 +130,6 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
|
|
132
130
|
end
|
133
131
|
should "retrieve correct correlation matrix for each scale" do
|
134
132
|
vectors={'complete'=>@ds.vector_sum}
|
135
|
-
|
136
133
|
@scales.times {|s|
|
137
134
|
vectors["scale_#{s}"]=@ds.dup(@items_per_scale.times.map {|i| "#{s}_#{i}"}).vector_sum
|
138
135
|
}
|
@@ -163,6 +160,8 @@ class StatsampleReliabilityTestCase < MiniTest::Unit::TestCase
|
|
163
160
|
|
164
161
|
#@msa.summary_correlation_matrix=true
|
165
162
|
#@msa.summary_pca=true
|
163
|
+
|
164
|
+
|
166
165
|
assert(@msa.summary.size>0)
|
167
166
|
end
|
168
167
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
7
|
+
- 13
|
8
8
|
- 0
|
9
|
-
version: 0.
|
9
|
+
version: 0.13.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-06-
|
38
|
+
date: 2010-06-21 00:00:00 -04:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -132,9 +132,23 @@ dependencies:
|
|
132
132
|
type: :runtime
|
133
133
|
version_requirements: *id007
|
134
134
|
- !ruby/object:Gem::Dependency
|
135
|
-
name:
|
135
|
+
name: statsample-bivariate-extension
|
136
136
|
prerelease: false
|
137
137
|
requirement: &id008 !ruby/object:Gem::Requirement
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
segments:
|
142
|
+
- 0
|
143
|
+
- 13
|
144
|
+
- 0
|
145
|
+
version: 0.13.0
|
146
|
+
type: :runtime
|
147
|
+
version_requirements: *id008
|
148
|
+
- !ruby/object:Gem::Dependency
|
149
|
+
name: rubyforge
|
150
|
+
prerelease: false
|
151
|
+
requirement: &id009 !ruby/object:Gem::Requirement
|
138
152
|
requirements:
|
139
153
|
- - ">="
|
140
154
|
- !ruby/object:Gem::Version
|
@@ -144,11 +158,11 @@ dependencies:
|
|
144
158
|
- 4
|
145
159
|
version: 2.0.4
|
146
160
|
type: :development
|
147
|
-
version_requirements: *
|
161
|
+
version_requirements: *id009
|
148
162
|
- !ruby/object:Gem::Dependency
|
149
163
|
name: shoulda
|
150
164
|
prerelease: false
|
151
|
-
requirement: &
|
165
|
+
requirement: &id010 !ruby/object:Gem::Requirement
|
152
166
|
requirements:
|
153
167
|
- - ">="
|
154
168
|
- !ruby/object:Gem::Version
|
@@ -156,11 +170,11 @@ dependencies:
|
|
156
170
|
- 0
|
157
171
|
version: "0"
|
158
172
|
type: :development
|
159
|
-
version_requirements: *
|
173
|
+
version_requirements: *id010
|
160
174
|
- !ruby/object:Gem::Dependency
|
161
175
|
name: hoe
|
162
176
|
prerelease: false
|
163
|
-
requirement: &
|
177
|
+
requirement: &id011 !ruby/object:Gem::Requirement
|
164
178
|
requirements:
|
165
179
|
- - ">="
|
166
180
|
- !ruby/object:Gem::Version
|
@@ -170,7 +184,7 @@ dependencies:
|
|
170
184
|
- 1
|
171
185
|
version: 2.6.1
|
172
186
|
type: :development
|
173
|
-
version_requirements: *
|
187
|
+
version_requirements: *id011
|
174
188
|
description: |-
|
175
189
|
A suite for basic and advanced statistics on Ruby. Tested on Ruby 1.8.7, 1.9.1, 1.9.2 (April, 2010) and JRuby 1.4 (Ruby 1.8.7 compatible).
|
176
190
|
|
@@ -225,6 +239,7 @@ files:
|
|
225
239
|
- examples/reliability.rb
|
226
240
|
- examples/t_test.rb
|
227
241
|
- examples/tetrachoric.rb
|
242
|
+
- examples/u_test.rb
|
228
243
|
- examples/vector.rb
|
229
244
|
- lib/distribution.rb
|
230
245
|
- lib/distribution/chisquare.rb
|
@@ -239,8 +254,7 @@ files:
|
|
239
254
|
- lib/statsample/anova/oneway.rb
|
240
255
|
- lib/statsample/anova/twoway.rb
|
241
256
|
- lib/statsample/bivariate.rb
|
242
|
-
- lib/statsample/bivariate/
|
243
|
-
- lib/statsample/bivariate/tetrachoric.rb
|
257
|
+
- lib/statsample/bivariate/pearson.rb
|
244
258
|
- lib/statsample/codification.rb
|
245
259
|
- lib/statsample/combination.rb
|
246
260
|
- lib/statsample/converter/csv.rb
|
@@ -301,7 +315,6 @@ files:
|
|
301
315
|
- test/test_anovatwowaywithdataset.rb
|
302
316
|
- test/test_anovawithvectors.rb
|
303
317
|
- test/test_bivariate.rb
|
304
|
-
- test/test_bivariate_polychoric.rb
|
305
318
|
- test/test_codification.rb
|
306
319
|
- test/test_combination.rb
|
307
320
|
- test/test_crosstab.rb
|
@@ -345,13 +358,13 @@ post_install_message: |
|
|
345
358
|
to retrieve gems gsl, statistics2 and a C extension
|
346
359
|
to speed some methods.
|
347
360
|
|
348
|
-
$sudo gem install statsample-optimization
|
361
|
+
$ sudo gem install statsample-optimization
|
349
362
|
|
350
|
-
|
351
|
-
|
352
|
-
|
363
|
+
On Ubuntu, install build-essential and libgsl0-dev
|
364
|
+
using apt-get and compile ruby 1.8 or 1.9 from
|
365
|
+
source code first.
|
353
366
|
|
354
|
-
$sudo apt-get install build-essential libgsl0-dev
|
367
|
+
$ sudo apt-get install build-essential libgsl0-dev
|
355
368
|
|
356
369
|
|
357
370
|
*****************************************************
|
@@ -400,7 +413,6 @@ test_files:
|
|
400
413
|
- test/test_combination.rb
|
401
414
|
- test/test_mle.rb
|
402
415
|
- test/test_resample.rb
|
403
|
-
- test/test_bivariate_polychoric.rb
|
404
416
|
- test/test_stratified.rb
|
405
417
|
- test/test_vector.rb
|
406
418
|
- test/test_srs.rb
|
metadata.gz.sig
CHANGED
Binary file
|
@@ -1,893 +0,0 @@
|
|
1
|
-
require 'minimization'
|
2
|
-
module Statsample
|
3
|
-
module Bivariate
|
4
|
-
# Calculate Polychoric correlation for two vectors.
|
5
|
-
def self.polychoric(v1,v2)
|
6
|
-
pc=Polychoric.new_with_vectors(v1,v2)
|
7
|
-
pc.r
|
8
|
-
end
|
9
|
-
|
10
|
-
# Polychoric correlation matrix.
|
11
|
-
# Order of rows and columns depends on Dataset#fields order
|
12
|
-
def self.polychoric_correlation_matrix(ds)
|
13
|
-
cache={}
|
14
|
-
matrix=ds.collect_matrix do |row,col|
|
15
|
-
if row==col
|
16
|
-
1.0
|
17
|
-
else
|
18
|
-
begin
|
19
|
-
if cache[[col,row]].nil?
|
20
|
-
poly=polychoric(ds[row],ds[col])
|
21
|
-
cache[[row,col]]=poly
|
22
|
-
poly
|
23
|
-
else
|
24
|
-
cache[[col,row]]
|
25
|
-
end
|
26
|
-
rescue RuntimeError
|
27
|
-
nil
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
matrix.extend CovariateMatrix
|
32
|
-
matrix.fields=ds.fields
|
33
|
-
matrix
|
34
|
-
end
|
35
|
-
|
36
|
-
# = Polychoric correlation.
|
37
|
-
#
|
38
|
-
# The <em>polychoric</em> correlation is a measure of
|
39
|
-
# bivariate association arising when both observed variates
|
40
|
-
# are ordered, categorical variables that result from polychotomizing
|
41
|
-
# the two undelying continuous variables (Drasgow, 2006)
|
42
|
-
#
|
43
|
-
# According to Drasgow(2006), there are tree methods to estimate
|
44
|
-
# the polychoric correlation:
|
45
|
-
#
|
46
|
-
# 1. Maximum Likehood Estimator
|
47
|
-
# 2. Two-step estimator and
|
48
|
-
# 3. Polychoric series estimate.
|
49
|
-
#
|
50
|
-
# By default, two-step estimation are used. You can select
|
51
|
-
# the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl.
|
52
|
-
#
|
53
|
-
# == Use
|
54
|
-
#
|
55
|
-
# You should enter a Matrix with ordered data. For example:
|
56
|
-
# -------------------
|
57
|
-
# | y=0 | y=1 | y=2 |
|
58
|
-
# -------------------
|
59
|
-
# x = 0 | 1 | 10 | 20 |
|
60
|
-
# -------------------
|
61
|
-
# x = 1 | 20 | 20 | 50 |
|
62
|
-
# -------------------
|
63
|
-
#
|
64
|
-
# The code will be
|
65
|
-
#
|
66
|
-
# matrix=Matrix[[1,10,20],[20,20,50]]
|
67
|
-
# poly=Statsample::Bivariate::Polychoric.new(matrix, :method=>:joint)
|
68
|
-
# puts poly.r
|
69
|
-
#
|
70
|
-
# See extensive documentation on Uebersax(2002) and Drasgow(2006)
|
71
|
-
#
|
72
|
-
# == References
|
73
|
-
#
|
74
|
-
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
75
|
-
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
76
|
-
|
77
|
-
class Polychoric
|
78
|
-
|
79
|
-
class Processor
|
80
|
-
attr_reader :alpha, :beta, :rho
|
81
|
-
def initialize(alpha,beta,rho)
|
82
|
-
@alpha=alpha
|
83
|
-
@beta=beta
|
84
|
-
@nr=@alpha.size+1
|
85
|
-
@nc=@beta.size+1
|
86
|
-
@rho=rho
|
87
|
-
@pd=nil
|
88
|
-
end
|
89
|
-
def bipdf(i,j)
|
90
|
-
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
91
|
-
end
|
92
|
-
def a(i)
|
93
|
-
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
94
|
-
end
|
95
|
-
def b(j)
|
96
|
-
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
97
|
-
end
|
98
|
-
# Equation(10) from Olsson(1979)
|
99
|
-
def fd_loglike_cell_a(i,j,k)
|
100
|
-
if k==i
|
101
|
-
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
102
|
-
elsif k==(i-1)
|
103
|
-
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
104
|
-
else
|
105
|
-
0
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
# phi_ij for each i and j
|
110
|
-
# Uses equation(4) from Olsson(1979)
|
111
|
-
def pd
|
112
|
-
if @pd.nil?
|
113
|
-
@pd=@nr.times.collect{ [0] * @nc}
|
114
|
-
pc=@nr.times.collect{ [0] * @nc}
|
115
|
-
@nr.times do |i|
|
116
|
-
@nc.times do |j|
|
117
|
-
|
118
|
-
if i==@nr-1 and j==@nc-1
|
119
|
-
@pd[i][j]=1.0
|
120
|
-
else
|
121
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
122
|
-
b=(j==@nc-1) ? 100: beta[j]
|
123
|
-
#puts "a:#{a} b:#{b}"
|
124
|
-
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
125
|
-
end
|
126
|
-
pc[i][j] = @pd[i][j]
|
127
|
-
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
128
|
-
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
129
|
-
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
@pd
|
134
|
-
end
|
135
|
-
end
|
136
|
-
|
137
|
-
include GetText
|
138
|
-
include DirtyMemoize
|
139
|
-
bindtextdomain("statsample")
|
140
|
-
# Name of the analysis
|
141
|
-
attr_accessor :name
|
142
|
-
# Max number of iterations used on iterative methods. Default to MAX_ITERATIONS
|
143
|
-
attr_accessor :max_iterations
|
144
|
-
# Debug algorithm (See iterations, for example)
|
145
|
-
attr_accessor :debug
|
146
|
-
# Minimizer type for two step. Default "brent"
|
147
|
-
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
148
|
-
attr_accessor :minimizer_type_two_step
|
149
|
-
|
150
|
-
# Minimizer type for joint estimate. Default "nmsimplex"
|
151
|
-
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
152
|
-
attr_accessor :minimizer_type_joint
|
153
|
-
|
154
|
-
|
155
|
-
# Method of calculation of polychoric series.
|
156
|
-
# <tt>:two_step</tt> used by default.
|
157
|
-
#
|
158
|
-
# :two_step:: two-step ML, based on code by Gegenfurtner(1992).
|
159
|
-
# :polychoric_series:: polychoric series estimate, using
|
160
|
-
# algorithm AS87 by Martinson and Hamdan (1975).
|
161
|
-
# :joint:: one-step ML, based on R package 'polycor'
|
162
|
-
# by J.Fox.
|
163
|
-
attr_accessor :method
|
164
|
-
# Absolute error for iteration.
|
165
|
-
attr_accessor :epsilon
|
166
|
-
|
167
|
-
# Number of iterations
|
168
|
-
attr_reader :iteration
|
169
|
-
|
170
|
-
# Log of algorithm
|
171
|
-
attr_reader :log
|
172
|
-
|
173
|
-
|
174
|
-
attr_reader :loglike_model
|
175
|
-
|
176
|
-
METHOD=:two_step
|
177
|
-
MAX_ITERATIONS=300
|
178
|
-
EPSILON=1e-6
|
179
|
-
MINIMIZER_TYPE_TWO_STEP="brent"
|
180
|
-
MINIMIZER_TYPE_JOINT="nmsimplex"
|
181
|
-
def self.new_with_vectors(v1,v2)
|
182
|
-
Polychoric.new(Crosstab.new(v1,v2).to_matrix)
|
183
|
-
end
|
184
|
-
# Params:
|
185
|
-
# * matrix: Contingence table
|
186
|
-
# * opts: Any attribute
|
187
|
-
|
188
|
-
def initialize(matrix, opts=Hash.new)
|
189
|
-
@matrix=matrix
|
190
|
-
@n=matrix.column_size
|
191
|
-
@m=matrix.row_size
|
192
|
-
raise "row size <1" if @m<=1
|
193
|
-
raise "column size <1" if @n<=1
|
194
|
-
|
195
|
-
@method=METHOD
|
196
|
-
@name="Polychoric correlation"
|
197
|
-
@max_iterations=MAX_ITERATIONS
|
198
|
-
@epsilon=EPSILON
|
199
|
-
@minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
|
200
|
-
@minimizer_type_joint=MINIMIZER_TYPE_JOINT
|
201
|
-
@debug=false
|
202
|
-
@iteration=nil
|
203
|
-
opts.each{|k,v|
|
204
|
-
self.send("#{k}=",v) if self.respond_to? k
|
205
|
-
}
|
206
|
-
@r=nil
|
207
|
-
@pd=nil
|
208
|
-
compute_basic_parameters
|
209
|
-
end
|
210
|
-
# Returns the polychoric correlation
|
211
|
-
attr_reader :r
|
212
|
-
# Returns the rows thresholds
|
213
|
-
attr_reader :alpha
|
214
|
-
# Returns the columns thresholds
|
215
|
-
attr_reader :beta
|
216
|
-
|
217
|
-
dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
|
218
|
-
dirty_memoize :r, :alpha, :beta
|
219
|
-
|
220
|
-
alias :threshold_x :alpha
|
221
|
-
alias :threshold_y :beta
|
222
|
-
|
223
|
-
|
224
|
-
# Start the computation of polychoric correlation
|
225
|
-
# based on attribute method
|
226
|
-
def compute
|
227
|
-
if @method==:two_step
|
228
|
-
compute_two_step_mle_drasgow
|
229
|
-
elsif @method==:joint
|
230
|
-
compute_one_step_mle
|
231
|
-
elsif @method==:polychoric_series
|
232
|
-
compute_polychoric_series
|
233
|
-
else
|
234
|
-
raise "Not implemented"
|
235
|
-
end
|
236
|
-
end
|
237
|
-
# Retrieve log likehood for actual data.
|
238
|
-
def loglike_data
|
239
|
-
loglike=0
|
240
|
-
@nr.times do |i|
|
241
|
-
@nc.times do |j|
|
242
|
-
res=@matrix[i,j].quo(@total)
|
243
|
-
if (res==0)
|
244
|
-
res=1e-16
|
245
|
-
end
|
246
|
-
loglike+= @matrix[i,j] * Math::log(res )
|
247
|
-
end
|
248
|
-
end
|
249
|
-
loglike
|
250
|
-
end
|
251
|
-
|
252
|
-
# Chi Square of model
|
253
|
-
def chi_square
|
254
|
-
if @loglike_model.nil?
|
255
|
-
compute
|
256
|
-
end
|
257
|
-
-2*(@loglike_model-loglike_data)
|
258
|
-
end
|
259
|
-
|
260
|
-
def chi_square_df
|
261
|
-
(@nr*@nc)-@nc-@nr
|
262
|
-
end
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
# Retrieve all cell probabilities for givens alpha, beta and rho
|
268
|
-
def cell_probabilities(alpha,beta,rho)
|
269
|
-
pd=@nr.times.collect{ [0] * @nc}
|
270
|
-
pc=@nr.times.collect{ [0] * @nc}
|
271
|
-
@nr.times do |i|
|
272
|
-
@nc.times do |j|
|
273
|
-
|
274
|
-
if i==@nr-1 and j==@nc-1
|
275
|
-
pd[i][j]=1.0
|
276
|
-
else
|
277
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
278
|
-
b=(j==@nc-1) ? 100: beta[j]
|
279
|
-
#puts "a:#{a} b:#{b}"
|
280
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
281
|
-
end
|
282
|
-
pc[i][j] = pd[i][j]
|
283
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
284
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
285
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
286
|
-
end
|
287
|
-
end
|
288
|
-
@pd=pd
|
289
|
-
pd
|
290
|
-
end
|
291
|
-
def loglike(alpha,beta,rho)
|
292
|
-
if rho.abs>0.9999
|
293
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
294
|
-
end
|
295
|
-
pr=Processor.new(alpha,beta,rho)
|
296
|
-
loglike=0
|
297
|
-
|
298
|
-
|
299
|
-
@nr.times do |i|
|
300
|
-
@nc.times do |j|
|
301
|
-
res=pr.pd[i][j]+EPSILON
|
302
|
-
loglike+= @matrix[i,j] * Math::log( res )
|
303
|
-
end
|
304
|
-
end
|
305
|
-
-loglike
|
306
|
-
end
|
307
|
-
# First derivate for rho
|
308
|
-
# Uses equation (9) from Olsson(1979)
|
309
|
-
def fd_loglike_rho(alpha,beta,rho)
|
310
|
-
if rho.abs>0.9999
|
311
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
312
|
-
end
|
313
|
-
total=0
|
314
|
-
pr=Processor.new(alpha,beta,rho)
|
315
|
-
@nr.times do |i|
|
316
|
-
@nc.times do |j|
|
317
|
-
pi=pr.pd[i][j] + EPSILON
|
318
|
-
total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
|
319
|
-
end
|
320
|
-
end
|
321
|
-
total
|
322
|
-
end
|
323
|
-
|
324
|
-
# First derivative for alpha_k
|
325
|
-
def fd_loglike_a(alpha,beta,rho,k)
|
326
|
-
fd_loglike_a_eq6(alpha,beta,rho,k)
|
327
|
-
end
|
328
|
-
# Uses equation (6) from Olsson(1979)
|
329
|
-
def fd_loglike_a_eq6(alpha,beta,rho,k)
|
330
|
-
if rho.abs>0.9999
|
331
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
332
|
-
end
|
333
|
-
pr=Processor.new(alpha,beta,rho)
|
334
|
-
total=0
|
335
|
-
pd=pr.pd
|
336
|
-
@nr.times do |i|
|
337
|
-
@nc.times do |j|
|
338
|
-
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
|
339
|
-
end
|
340
|
-
end
|
341
|
-
total
|
342
|
-
end
|
343
|
-
# Uses equation(13) from Olsson(1979)
|
344
|
-
def fd_loglike_a_eq13(alpha,beta,rho,k)
|
345
|
-
if rho.abs>0.9999
|
346
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
347
|
-
end
|
348
|
-
pr=Processor.new(alpha,beta,rho)
|
349
|
-
total=0
|
350
|
-
a_k=pr.a(k)
|
351
|
-
pd=pr.pd
|
352
|
-
@nc.times do |j|
|
353
|
-
#puts "j: #{j}"
|
354
|
-
#puts "b #{j} : #{b.call(j)}"
|
355
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
356
|
-
|
357
|
-
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
358
|
-
e_2=Distribution::Normal.pdf(a_k)
|
359
|
-
e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
360
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
361
|
-
|
362
|
-
total+= e_1*e_2*e_3
|
363
|
-
end
|
364
|
-
total
|
365
|
-
end
|
366
|
-
# First derivative for beta_m
|
367
|
-
# Uses equation(14) from Olsson(1979)
|
368
|
-
def fd_loglike_b(alpha,beta,rho,m)
|
369
|
-
if rho.abs>0.9999
|
370
|
-
rho= (rho>0) ? 0.9999 : -0.9999
|
371
|
-
end
|
372
|
-
pr=Processor.new(alpha,beta,rho)
|
373
|
-
total=0
|
374
|
-
b_m=pr.b m
|
375
|
-
pd=pr.pd
|
376
|
-
@nr.times do |i|
|
377
|
-
#puts "j: #{j}"
|
378
|
-
#puts "b #{j} : #{b.call(j)}"
|
379
|
-
#puts "b #{j-1} : #{b.call(j-1)}"
|
380
|
-
|
381
|
-
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
382
|
-
e_2=Distribution::Normal.pdf(b_m)
|
383
|
-
e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
384
|
-
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
385
|
-
|
386
|
-
total+= e_1*e_2*e_3
|
387
|
-
end
|
388
|
-
total
|
389
|
-
end
|
390
|
-
|
391
|
-
|
392
|
-
def compute_basic_parameters
|
393
|
-
@nr=@matrix.row_size
|
394
|
-
@nc=@matrix.column_size
|
395
|
-
@sumr=[0]*@matrix.row_size
|
396
|
-
@sumrac=[0]*@matrix.row_size
|
397
|
-
@sumc=[0]*@matrix.column_size
|
398
|
-
@sumcac=[0]*@matrix.column_size
|
399
|
-
@alpha=[0]*(@nr-1)
|
400
|
-
@beta=[0]*(@nc-1)
|
401
|
-
@total=0
|
402
|
-
@nr.times do |i|
|
403
|
-
@nc.times do |j|
|
404
|
-
@sumr[i]+=@matrix[i,j]
|
405
|
-
@sumc[j]+=@matrix[i,j]
|
406
|
-
@total+=@matrix[i,j]
|
407
|
-
end
|
408
|
-
end
|
409
|
-
ac=0
|
410
|
-
(@nr-1).times do |i|
|
411
|
-
@sumrac[i]=@sumr[i]+ac
|
412
|
-
@alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
|
413
|
-
ac=@sumrac[i]
|
414
|
-
end
|
415
|
-
ac=0
|
416
|
-
(@nc-1).times do |i|
|
417
|
-
@sumcac[i]=@sumc[i]+ac
|
418
|
-
@beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
|
419
|
-
ac=@sumcac[i]
|
420
|
-
end
|
421
|
-
end
|
422
|
-
|
423
|
-
|
424
|
-
# Computation of polychoric correlation usign two-step ML estimation.
|
425
|
-
#
|
426
|
-
# Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
|
427
|
-
#
|
428
|
-
# The algorithm is based on code by Gegenfurtner(1992).
|
429
|
-
#
|
430
|
-
# <b>References</b>:
|
431
|
-
# * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
|
432
|
-
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
433
|
-
#
|
434
|
-
def compute_two_step_mle_drasgow
|
435
|
-
if Statsample.has_gsl?
|
436
|
-
compute_two_step_mle_drasgow_gsl
|
437
|
-
else
|
438
|
-
compute_two_step_mle_drasgow_ruby
|
439
|
-
end
|
440
|
-
end
|
441
|
-
|
442
|
-
# Depends on minimization algorithm.
|
443
|
-
|
444
|
-
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
445
|
-
|
446
|
-
f=proc {|rho|
|
447
|
-
loglike(@alpha,@beta, rho)
|
448
|
-
}
|
449
|
-
@log="Minimizing using GSL Brent method\n"
|
450
|
-
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
451
|
-
min.epsilon=@epsilon
|
452
|
-
min.expected=0
|
453
|
-
min.iterate
|
454
|
-
@log+=min.log.to_table.to_s
|
455
|
-
@r=min.x_minimum
|
456
|
-
@loglike_model=-min.f_minimum
|
457
|
-
puts @log if @debug
|
458
|
-
|
459
|
-
end
|
460
|
-
|
461
|
-
|
462
|
-
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
463
|
-
|
464
|
-
fn1=GSL::Function.alloc {|rho|
|
465
|
-
loglike(@alpha,@beta, rho)
|
466
|
-
}
|
467
|
-
@iteration = 0
|
468
|
-
max_iter = @max_iterations
|
469
|
-
m = 0 # initial guess
|
470
|
-
m_expected = 0
|
471
|
-
a=-0.9999
|
472
|
-
b=+0.9999
|
473
|
-
gmf = GSL::Min::FMinimizer.alloc(@minimizer_type_two_step)
|
474
|
-
gmf.set(fn1, m, a, b)
|
475
|
-
header=sprintf("Two step minimization using %s method\n", gmf.name)
|
476
|
-
header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
|
477
|
-
"err", "err(est)")
|
478
|
-
|
479
|
-
header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
|
480
|
-
@log=header
|
481
|
-
puts header if @debug
|
482
|
-
begin
|
483
|
-
@iteration += 1
|
484
|
-
status = gmf.iterate
|
485
|
-
status = gmf.test_interval(@epsilon, 0.0)
|
486
|
-
|
487
|
-
if status == GSL::SUCCESS
|
488
|
-
@log+="converged:"
|
489
|
-
puts "converged:" if @debug
|
490
|
-
end
|
491
|
-
a = gmf.x_lower
|
492
|
-
b = gmf.x_upper
|
493
|
-
m = gmf.x_minimum
|
494
|
-
message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
|
495
|
-
@iteration, a, b, m, m - m_expected, b - a);
|
496
|
-
@log+=message
|
497
|
-
puts message if @debug
|
498
|
-
end while status == GSL::CONTINUE and @iteration < @max_iterations
|
499
|
-
@r=gmf.x_minimum
|
500
|
-
@loglike_model=-gmf.f_minimum
|
501
|
-
end
|
502
|
-
|
503
|
-
# Compute Polychoric correlation with joint estimate.
|
504
|
-
# Rho and thresholds are estimated at same time.
|
505
|
-
# Code based on R package "polycor", by J.Fox.
|
506
|
-
#
|
507
|
-
|
508
|
-
def compute_one_step_mle
|
509
|
-
# Get initial values with two-step aproach
|
510
|
-
compute_two_step_mle_drasgow
|
511
|
-
# Start iteration with past values
|
512
|
-
rho=@r
|
513
|
-
cut_alpha=@alpha
|
514
|
-
cut_beta=@beta
|
515
|
-
parameters=[rho]+cut_alpha+cut_beta
|
516
|
-
minimization = Proc.new { |v, params|
|
517
|
-
rho=v[0]
|
518
|
-
alpha=v[1, @nr-1]
|
519
|
-
beta=v[@nr, @nc-1]
|
520
|
-
|
521
|
-
#puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
|
522
|
-
#(@nr-1).times {|k|
|
523
|
-
# puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
|
524
|
-
# puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
|
525
|
-
#
|
526
|
-
#}
|
527
|
-
#(@nc-1).times {|k|
|
528
|
-
# puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
|
529
|
-
#}
|
530
|
-
|
531
|
-
loglike(alpha,beta,rho)
|
532
|
-
}
|
533
|
-
np=@nc-1+@nr
|
534
|
-
my_func = GSL::MultiMin::Function.alloc(minimization, np)
|
535
|
-
my_func.set_params(parameters) # parameters
|
536
|
-
|
537
|
-
x = GSL::Vector.alloc(parameters.dup)
|
538
|
-
|
539
|
-
ss = GSL::Vector.alloc(np)
|
540
|
-
ss.set_all(1.0)
|
541
|
-
|
542
|
-
minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
|
543
|
-
minimizer.set(my_func, x, ss)
|
544
|
-
|
545
|
-
iter = 0
|
546
|
-
message=""
|
547
|
-
begin
|
548
|
-
iter += 1
|
549
|
-
status = minimizer.iterate()
|
550
|
-
status = minimizer.test_size(@epsilon)
|
551
|
-
if status == GSL::SUCCESS
|
552
|
-
message="Joint MLE converged to minimum at\n"
|
553
|
-
end
|
554
|
-
x = minimizer.x
|
555
|
-
message+= sprintf("%5d iterations", iter)+"\n";
|
556
|
-
for i in 0...np do
|
557
|
-
message+=sprintf("%10.3e ", x[i])
|
558
|
-
end
|
559
|
-
message+=sprintf("f() = %7.3f size = %.3f\n", minimizer.fval, minimizer.size)+"\n";
|
560
|
-
end while status == GSL::CONTINUE and iter < @max_iterations
|
561
|
-
@iteration=iter
|
562
|
-
@log+=message
|
563
|
-
@r=minimizer.x[0]
|
564
|
-
@alpha=minimizer.x[1,@nr-1].to_a
|
565
|
-
@beta=minimizer.x[@nr,@nc-1].to_a
|
566
|
-
@loglike_model= -minimizer.minimum
|
567
|
-
end
|
568
|
-
|
569
|
-
def matrix_for_rho(rho) # :nodoc:
|
570
|
-
pd=@nr.times.collect{ [0]*@nc}
|
571
|
-
pc=@nr.times.collect{ [0]*@nc}
|
572
|
-
@nr.times { |i|
|
573
|
-
@nc.times { |j|
|
574
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
|
575
|
-
pc[i][j] = pd[i][j]
|
576
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
577
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
578
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
579
|
-
res= pd[i][j]
|
580
|
-
}
|
581
|
-
}
|
582
|
-
Matrix.rows(pc)
|
583
|
-
end
|
584
|
-
|
585
|
-
def expected # :nodoc:
|
586
|
-
rt=[]
|
587
|
-
ct=[]
|
588
|
-
t=0
|
589
|
-
@matrix.row_size.times {|i|
|
590
|
-
@matrix.column_size.times {|j|
|
591
|
-
rt[i]=0 if rt[i].nil?
|
592
|
-
ct[j]=0 if ct[j].nil?
|
593
|
-
rt[i]+=@matrix[i,j]
|
594
|
-
ct[j]+=@matrix[i,j]
|
595
|
-
t+=@matrix[i,j]
|
596
|
-
}
|
597
|
-
}
|
598
|
-
m=[]
|
599
|
-
@matrix.row_size.times {|i|
|
600
|
-
row=[]
|
601
|
-
@matrix.column_size.times {|j|
|
602
|
-
row[j]=(rt[i]*ct[j]).quo(t)
|
603
|
-
}
|
604
|
-
m.push(row)
|
605
|
-
}
|
606
|
-
|
607
|
-
Matrix.rows(m)
|
608
|
-
end
|
609
|
-
|
610
|
-
# Compute polychoric correlation using polychoric series.
|
611
|
-
# Algorithm: AS87, by Martinson and Hamdam(1975).
|
612
|
-
#
|
613
|
-
# <b>Warning</b>: According to Drasgow(2006), this
|
614
|
-
# computation diverges greatly of joint and two-step methods.
|
615
|
-
#
|
616
|
-
def compute_polychoric_series
|
617
|
-
@nn=@n-1
|
618
|
-
@mm=@m-1
|
619
|
-
@nn7=7*@nn
|
620
|
-
@mm7=7*@mm
|
621
|
-
@mn=@n*@m
|
622
|
-
@cont=[nil]
|
623
|
-
@n.times {|j|
|
624
|
-
@m.times {|i|
|
625
|
-
@cont.push(@matrix[i,j])
|
626
|
-
}
|
627
|
-
}
|
628
|
-
|
629
|
-
pcorl=0
|
630
|
-
cont=@cont
|
631
|
-
xmean=0.0
|
632
|
-
sum=0.0
|
633
|
-
row=[]
|
634
|
-
colmn=[]
|
635
|
-
(1..@m).each do |i|
|
636
|
-
row[i]=0.0
|
637
|
-
l=i
|
638
|
-
(1..@n).each do |j|
|
639
|
-
row[i]=row[i]+cont[l]
|
640
|
-
l+=@m
|
641
|
-
end
|
642
|
-
raise "Should not be empty rows" if(row[i]==0.0)
|
643
|
-
xmean=xmean+row[i]*i.to_f
|
644
|
-
sum+=row[i]
|
645
|
-
end
|
646
|
-
xmean=xmean/sum.to_f
|
647
|
-
ymean=0.0
|
648
|
-
(1..@n).each do |j|
|
649
|
-
colmn[j]=0.0
|
650
|
-
l=(j-1)*@m
|
651
|
-
(1..@m).each do |i|
|
652
|
-
l=l+1
|
653
|
-
colmn[j]=colmn[j]+cont[l] #12
|
654
|
-
end
|
655
|
-
raise "Should not be empty cols" if colmn[j]==0
|
656
|
-
ymean=ymean+colmn[j]*j.to_f
|
657
|
-
end
|
658
|
-
ymean=ymean/sum.to_f
|
659
|
-
covxy=0.0
|
660
|
-
(1..@m).each do |i|
|
661
|
-
l=i
|
662
|
-
(1..@n).each do |j|
|
663
|
-
conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
|
664
|
-
l=l+@m
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
chisq=0.0
|
669
|
-
(1..@m).each do |i|
|
670
|
-
l=i
|
671
|
-
(1..@n).each do |j|
|
672
|
-
chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
|
673
|
-
l=l+@m
|
674
|
-
end
|
675
|
-
end
|
676
|
-
|
677
|
-
phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
|
678
|
-
phisq=0 if(phisq<0.0)
|
679
|
-
# Compute cumulative sum of columns and rows
|
680
|
-
sumc=[]
|
681
|
-
sumr=[]
|
682
|
-
sumc[1]=colmn[1]
|
683
|
-
sumr[1]=row[1]
|
684
|
-
cum=0
|
685
|
-
(1..@nn).each do |i| # goto 17 r20
|
686
|
-
cum=cum+colmn[i]
|
687
|
-
sumc[i]=cum
|
688
|
-
end
|
689
|
-
cum=0
|
690
|
-
(1..@mm).each do |i|
|
691
|
-
cum=cum+row[i]
|
692
|
-
sumr[i]=cum
|
693
|
-
end
|
694
|
-
alpha=[]
|
695
|
-
beta=[]
|
696
|
-
# Compute points of polytomy
|
697
|
-
(1..@mm).each do |i| #do 21
|
698
|
-
alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
|
699
|
-
end # 21
|
700
|
-
(1..@nn).each do |i| #do 22
|
701
|
-
beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
|
702
|
-
end # 21
|
703
|
-
@alpha=alpha[1,alpha.size]
|
704
|
-
@beta=beta[1,beta.size]
|
705
|
-
@sumr=row[1,row.size]
|
706
|
-
@sumc=colmn[1,colmn.size]
|
707
|
-
@total=sum
|
708
|
-
|
709
|
-
# Compute Fourier coefficients a and b. Verified
|
710
|
-
h=hermit(alpha,@mm)
|
711
|
-
hh=hermit(beta,@nn)
|
712
|
-
a=[]
|
713
|
-
b=[]
|
714
|
-
if @m!=2 # goto 24
|
715
|
-
mmm=@m-2
|
716
|
-
(1..mmm).each do |i| #do 23
|
717
|
-
a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
|
718
|
-
a2=sumr[i] * xnorm(alpha[i+1])
|
719
|
-
a3=sumr[i+1] * xnorm(alpha[i])
|
720
|
-
l=i
|
721
|
-
(1..7).each do |j| #do 23
|
722
|
-
a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
|
723
|
-
l=l+@mm
|
724
|
-
end
|
725
|
-
end #23
|
726
|
-
end
|
727
|
-
# 24
|
728
|
-
|
729
|
-
|
730
|
-
if @n!=2 # goto 26
|
731
|
-
nnn=@n-2
|
732
|
-
(1..nnn).each do |i| #do 25
|
733
|
-
a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
|
734
|
-
a2=sumc[i] * xnorm(beta[i+1])
|
735
|
-
a3=sumc[i+1] * xnorm(beta[i])
|
736
|
-
l=i
|
737
|
-
(1..7).each do |j| #do 25
|
738
|
-
b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
|
739
|
-
l=l+@nn
|
740
|
-
end # 25
|
741
|
-
end # 25
|
742
|
-
end
|
743
|
-
#26 r20
|
744
|
-
l = @mm
|
745
|
-
a1 = -sum * xnorm(alpha[@mm])
|
746
|
-
a2 = row[@m] * sumr[@mm]
|
747
|
-
(1..7).each do |j| # do 27
|
748
|
-
a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
|
749
|
-
l=l+@mm
|
750
|
-
end # 27
|
751
|
-
|
752
|
-
l = @nn
|
753
|
-
a1 = -sum * xnorm(beta[@nn])
|
754
|
-
a2 = colmn[@n] * sumc[@nn]
|
755
|
-
|
756
|
-
(1..7).each do |j| # do 28
|
757
|
-
b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
|
758
|
-
l = l + @nn
|
759
|
-
end # 28
|
760
|
-
rcof=[]
|
761
|
-
# compute coefficients rcof of polynomial of order 8
|
762
|
-
rcof[1]=-phisq
|
763
|
-
(2..9).each do |i| # do 30
|
764
|
-
rcof[i]=0.0
|
765
|
-
end #30
|
766
|
-
m1=@mm
|
767
|
-
(1..@mm).each do |i| # do 31
|
768
|
-
m1=m1+1
|
769
|
-
m2=m1+@mm
|
770
|
-
m3=m2+@mm
|
771
|
-
m4=m3+@mm
|
772
|
-
m5=m4+@mm
|
773
|
-
m6=m5+@mm
|
774
|
-
n1=@nn
|
775
|
-
(1..@nn).each do |j| # do 31
|
776
|
-
n1=n1+1
|
777
|
-
n2=n1+@nn
|
778
|
-
n3=n2+@nn
|
779
|
-
n4=n3+@nn
|
780
|
-
n5=n4+@nn
|
781
|
-
n6=n5+@nn
|
782
|
-
|
783
|
-
rcof[3] = rcof[3] + a[i]**2 * b[j]**2
|
784
|
-
|
785
|
-
rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
|
786
|
-
|
787
|
-
rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
|
788
|
-
2.0 * a[i] * a[m2] * b[j] * b[n2]
|
789
|
-
|
790
|
-
rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
|
791
|
-
b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
|
792
|
-
|
793
|
-
rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
|
794
|
-
2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
|
795
|
-
b[n1] * b[n3])
|
796
|
-
|
797
|
-
rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
|
798
|
-
a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
|
799
|
-
|
800
|
-
rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
|
801
|
-
2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
|
802
|
-
b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
|
803
|
-
end # 31
|
804
|
-
end # 31
|
805
|
-
|
806
|
-
rcof=rcof[1,rcof.size]
|
807
|
-
poly = GSL::Poly.alloc(rcof)
|
808
|
-
roots=poly.solve
|
809
|
-
rootr=[nil]
|
810
|
-
rooti=[nil]
|
811
|
-
roots.each {|c|
|
812
|
-
rootr.push(c.real)
|
813
|
-
rooti.push(c.im)
|
814
|
-
}
|
815
|
-
@rootr=rootr
|
816
|
-
@rooti=rooti
|
817
|
-
|
818
|
-
norts=0
|
819
|
-
(1..7).each do |i| # do 43
|
820
|
-
|
821
|
-
next if rooti[i]!=0.0
|
822
|
-
if (covxy>=0.0)
|
823
|
-
next if(rootr[i]<0.0 or rootr[i]>1.0)
|
824
|
-
pcorl=rootr[i]
|
825
|
-
norts=norts+1
|
826
|
-
else
|
827
|
-
if (rootr[i]>=-1.0 and rootr[i]<0.0)
|
828
|
-
pcorl=rootr[i]
|
829
|
-
norts=norts+1
|
830
|
-
end
|
831
|
-
end
|
832
|
-
end # 43
|
833
|
-
raise "Error" if norts==0
|
834
|
-
@r=pcorl
|
835
|
-
|
836
|
-
@loglike_model=-loglike(@alpha, @beta, @r)
|
837
|
-
|
838
|
-
end
|
839
|
-
#Computes vector h(mm7) of orthogonal hermite...
|
840
|
-
def hermit(s,k) # :nodoc:
|
841
|
-
h=[]
|
842
|
-
(1..k).each do |i| # do 14
|
843
|
-
l=i
|
844
|
-
ll=i+k
|
845
|
-
lll=ll+k
|
846
|
-
h[i]=1.0
|
847
|
-
h[ll]=s[i]
|
848
|
-
v=1.0
|
849
|
-
(2..6).each do |j| #do 14
|
850
|
-
w=Math::sqrt(j)
|
851
|
-
h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
|
852
|
-
v=w
|
853
|
-
l=l+k
|
854
|
-
ll=ll+k
|
855
|
-
lll=lll+k
|
856
|
-
end
|
857
|
-
end
|
858
|
-
h
|
859
|
-
end
|
860
|
-
def xnorm(t) # :nodoc:
|
861
|
-
Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
|
862
|
-
end
|
863
|
-
|
864
|
-
def summary
|
865
|
-
rp=ReportBuilder.new(:no_title=>true).add(self).to_text
|
866
|
-
end
|
867
|
-
|
868
|
-
|
869
|
-
def report_building(generator) # :nodoc:
|
870
|
-
compute if dirty?
|
871
|
-
section=ReportBuilder::Section.new(:name=>@name)
|
872
|
-
t=ReportBuilder::Table.new(:name=>_("Contingence Table"), :header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
|
873
|
-
@m.times do |i|
|
874
|
-
t.row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
|
875
|
-
end
|
876
|
-
t.hr
|
877
|
-
t.row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
|
878
|
-
section.add(t)
|
879
|
-
section.add(sprintf("r: %0.4f",r))
|
880
|
-
t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
|
881
|
-
threshold_x.each_with_index {|val,i|
|
882
|
-
t.row(["Threshold X #{i}", sprintf("%0.4f", val)])
|
883
|
-
}
|
884
|
-
threshold_y.each_with_index {|val,i|
|
885
|
-
t.row(["Threshold Y #{i}", sprintf("%0.4f", val)])
|
886
|
-
}
|
887
|
-
section.add(t)
|
888
|
-
section.add(_("Test of bivariate normality: X2 = %0.3f, df = %d, p= %0.5f" % [ chi_square, chi_square_df, 1-Distribution::ChiSquare.cdf(chi_square, chi_square_df)]))
|
889
|
-
generator.parse_element(section)
|
890
|
-
end
|
891
|
-
end
|
892
|
-
end
|
893
|
-
end
|