statsample 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.6.2 / 2010-02-11
2
+ * New Statsample::Bivariate::Polychoric. For implement: X2 and G2
3
+ * New matrix.rb, for faster development of Contingence Tables and Correlation Matrix
4
+
1
5
  === 0.6.1 / 2010-02-08
2
6
  * Bug fix on DominanceAnalysis summary for Ruby1.9
3
7
  * Some extra documentation
data/Manifest.txt CHANGED
@@ -9,35 +9,23 @@ data/repeated_fields.csv
9
9
  data/test_binomial.csv
10
10
  data/tetmat_matrix.txt
11
11
  data/tetmat_test.txt
12
- demo/benchmark.rb
13
- demo/chi-square.rb
14
- demo/crosstab.rb
15
- demo/dice.rb
16
- demo/distribution_t.rb
17
- demo/graph.rb
18
- demo/item_analysis.rb
19
- demo/mean.rb
20
- demo/nunnally_6.rb
21
- demo/pca.rb
22
- demo/proportion.rb
23
- demo/regression.rb
24
- demo/sample_test.csv
25
- demo/spss_matrix.rb
26
- demo/strata_proportion.rb
27
- demo/stratum.rb
28
- demo/t-student.rb
29
- demo/umann.rb
12
+ demo/dominance_analysis_bootstrap.rb
13
+ demo/dominanceanalysis.rb
14
+ demo/multiple_regression.rb
15
+ demo/polychoric.rb
16
+ demo/tetrachoric.rb
30
17
  lib/distribution.rb
31
18
  lib/distribution/chisquare.rb
32
19
  lib/distribution/f.rb
33
20
  lib/distribution/normal.rb
21
+ lib/distribution/normalbivariate.rb
34
22
  lib/distribution/t.rb
35
- lib/matrix_extension.rb
36
23
  lib/spss.rb
37
24
  lib/statistics2.rb
38
25
  lib/statsample.rb
39
26
  lib/statsample/anova.rb
40
27
  lib/statsample/bivariate.rb
28
+ lib/statsample/bivariate/polychoric.rb
41
29
  lib/statsample/bivariate/tetrachoric.rb
42
30
  lib/statsample/codification.rb
43
31
  lib/statsample/combination.rb
@@ -60,6 +48,7 @@ lib/statsample/graph/svghistogram.rb
60
48
  lib/statsample/graph/svgscatterplot.rb
61
49
  lib/statsample/histogram.rb
62
50
  lib/statsample/htmlreport.rb
51
+ lib/statsample/matrix.rb
63
52
  lib/statsample/mle.rb
64
53
  lib/statsample/mle/logit.rb
65
54
  lib/statsample/mle/normal.rb
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ require 'statsample'
4
+
5
+
6
+ a=100.times.collect {rand}.to_scale
7
+ b=100.times.collect {rand}.to_scale
8
+ c=100.times.collect {rand}.to_scale
9
+ d=100.times.collect {rand}.to_scale
10
+
11
+ ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
12
+
13
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
14
+ dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, 'y')
15
+ if HAS_GSL
16
+ # Use Gsl if available (faster calculation)
17
+ dab.regression_class=Statsample::Regression::Multiple::GslEngine
18
+ end
19
+ dab.bootstrap(100,nil,true)
20
+ puts dab.summary
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=1000.times.collect {rand}.to_scale
6
+ b=1000.times.collect {rand}.to_scale
7
+ c=1000.times.collect {rand}.to_scale
8
+ ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
9
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
10
+ da=Statsample::DominanceAnalysis.new(ds,'y')
11
+ puts da.summary
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ require 'benchmark'
6
+ samples=10000
7
+ a=samples.times.collect {rand}.to_scale
8
+ b=samples.times.collect {rand}.to_scale
9
+ c=samples.times.collect {rand}.to_scale
10
+ d=samples.times.collect {rand}.to_scale
11
+
12
+ ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
13
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+row['d']*1+rand()}
14
+
15
+ Benchmark.bm(7) do |x|
16
+
17
+
18
+ rb=ReportBuilder.new("Multiple Regression Engines")
19
+
20
+ if HAS_GSL
21
+ x.report("GSL:") {
22
+ lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y',:name=>"Multiple Regression using GSL")
23
+ rb.add(lr.summary)
24
+ }
25
+ end
26
+
27
+
28
+ if HAS_ALGIB
29
+ x.report("Alglib:") {
30
+ lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y', :name=>"Multiple Regression using Alglib")
31
+ rb.add(lr.summary)
32
+ }
33
+ end
34
+ x.report("Ruby:") {
35
+ lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
36
+ rb.add(lr.summary)
37
+ }
38
+
39
+ puts rb.to_text
40
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ #ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
6
+
7
+ ct=Matrix[[30,1,0,0,0,0],[0,10,2,0,0,0], [0,4,8,3,1,0], [0,3,3,37,9,0], [0,0,1, 25, 71, 49], [ 0,0,0,2, 20, 181]]
8
+ poly=Statsample::Bivariate::Polychoric.new(ct)
9
+
10
+ puts poly.summary
11
+ puts poly.chi_square_independence
12
+ puts poly.chi_square_model
13
+ puts poly.chi_square_independence
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=40
6
+ b=10
7
+ c=20
8
+ d=30
9
+ tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
10
+ puts tetra.summary
data/lib/distribution.rb CHANGED
@@ -12,4 +12,5 @@ module Distribution
12
12
  autoload(:T, 'distribution/t')
13
13
  autoload(:F, 'distribution/f')
14
14
  autoload(:Normal, 'distribution/normal')
15
+ autoload(:NormalBivariate, 'distribution/normalbivariate')
15
16
  end
@@ -0,0 +1,100 @@
1
+ module Distribution
2
+ # Calculate pdf and cdf for bivariate normal distribution
3
+ module NormalBivariate
4
+
5
+ class << self
6
+ SIDE=0.1
7
+ LIMIT=5
8
+ # Probability density function
9
+ # Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
10
+ def pdf(x,y, rho, sigma1=1.0, sigma2=1.0)
11
+ (1.quo(2 * Math::PI * sigma1*sigma2 * Math::sqrt( 1 - rho**2 ))) *
12
+ Math::exp(-(1.quo(2*(1-rho**2))) *
13
+ ((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2) ))
14
+ end
15
+ def f(x,y,aprime,bprime,rho)
16
+ r=aprime*(2*x-aprime)+bprime*(2*y-bprime)+2*rho*(x-aprime)*(y-bprime)
17
+ Math::exp(r)
18
+ end
19
+ def cdf(a,b,rho)
20
+ cdf_math(a,b,rho)
21
+ end
22
+ def sgn(x)
23
+ if(x>=0)
24
+ 1
25
+ else
26
+ -1
27
+ end
28
+ end
29
+ # As http://finance.bi.no/~bernt/gcc_prog/recipes/recipes/node23.html
30
+ def cdf_math(a,b,rho)
31
+ #puts "a:#{a} - b:#{b} - rho:#{rho}"
32
+ if (a<=0 and b<=0 and rho<=0)
33
+ # puts "ruta 1"
34
+ aprime=a.quo(Math::sqrt(2.0*(1.0-rho**2)))
35
+ bprime=b.quo(Math::sqrt(2.0*(1.0-rho**2)))
36
+ aa=[0.3253030, 0.4211071, 0.1334425, 0.006374323]
37
+ bb=[0.1337764, 0.6243247, 1.3425378, 2.2626645]
38
+ sum=0
39
+ 4.times do |i|
40
+ 4.times do |j|
41
+ sum+=aa[i]*aa[j] * f(bb[i], bb[j], aprime, bprime,rho)
42
+ end
43
+ end
44
+ sum=sum*(Math::sqrt(1.0-rho**2).quo(Math::PI))
45
+ return sum
46
+ elsif(a*b*rho<=0.0)
47
+
48
+ #puts "ruta 2"
49
+ if(a<=0 and b>=0 and rho>=0)
50
+ return Distribution::Normal.cdf(a) - cdf(a,-b,-rho)
51
+ elsif (a>=0.0 and b<=0.0 and rho>=0)
52
+ return Distribution::Normal.cdf(b) - cdf(-a,b,-rho)
53
+ elsif (a>=0.0 and b>=0.0 and rho<=0)
54
+ return Distribution::Normal.cdf(a) + Distribution::Normal.cdf(b) - 1.0 + cdf(-a,-b,rho)
55
+ end
56
+ elsif (a*b*rho>=0.0)
57
+ #puts "ruta 3"
58
+ denum=Math::sqrt(a**2 - 2*rho*a*b + b**2)
59
+ rho1=((rho*a-b)*sgn(a)).quo(denum)
60
+ rho2=((rho*b-a)*sgn(b)).quo(denum)
61
+ delta=(1.0-sgn(a)*sgn(b)).quo(4)
62
+ #puts "#{rho1} - #{rho2}"
63
+ return cdf(a, 0.0, rho1) + cdf(b, 0.0, rho2) - delta
64
+ end
65
+ raise "Should'nt be here! #{a} - #{b} #{rho}"
66
+ end
67
+ # Cdf for a given x and y
68
+ # Reference:
69
+ # * Jantaravareerat, M. & Thomopoulos, N. (n/d). Tables for standard bivariate normal distribution
70
+
71
+ def cdf_iterate(x,y,rho,s1=1,s2=1)
72
+ # Special cases
73
+ return 1 if x>LIMIT and y>LIMIT
74
+ return 0 if x<-LIMIT or y<-LIMIT
75
+ return Distribution::Normal.cdf(y) if x>LIMIT
76
+ return Distribution::Normal.cdf(x) if y>LIMIT
77
+
78
+ #puts "x:#{x} - y:#{y}"
79
+ x=-LIMIT if x<-LIMIT
80
+ x=LIMIT if x>LIMIT
81
+ y=-LIMIT if y<-LIMIT
82
+ y=LIMIT if y>LIMIT
83
+
84
+ x_squares=((LIMIT+x) / SIDE).to_i
85
+ y_squares=((LIMIT+y) / SIDE).to_i
86
+ sum=0
87
+ x_squares.times do |i|
88
+ y_squares.times do |j|
89
+ z1=-LIMIT+(i+1)*SIDE
90
+ z2=-LIMIT+(j+1)*SIDE
91
+ #puts " #{z1}-#{z2}"
92
+ h=(pdf(z1,z2,rho,s1,s2)+pdf(z1-SIDE,z2,rho,s1,s2)+pdf(z1,z2-SIDE,rho,s1,s2) + pdf(z1-SIDE,z2-SIDE,rho,s1,s2)).quo(4)
93
+ sum+= (SIDE**2)*h # area
94
+ end
95
+ end
96
+ sum
97
+ end
98
+ end
99
+ end
100
+ end
data/lib/statsample.rb CHANGED
@@ -23,6 +23,7 @@ $:.unshift(File.expand_path(File.dirname(__FILE__)+"/../ext"))
23
23
 
24
24
  require 'matrix'
25
25
  require 'distribution'
26
+ require 'reportbuilder'
26
27
 
27
28
  class Numeric
28
29
  def square ; self * self ; end
@@ -108,7 +109,7 @@ end
108
109
  # * Dataset: An union of vectors.
109
110
  #
110
111
  module Statsample
111
- VERSION = '0.6.1'
112
+ VERSION = '0.6.2'
112
113
  SPLIT_TOKEN = ","
113
114
  autoload(:Database, 'statsample/converters')
114
115
  autoload(:Anova, 'statsample/anova')
@@ -135,6 +136,7 @@ module Statsample
135
136
  autoload(:Regression, 'statsample/regression')
136
137
  autoload(:Test, 'statsample/test')
137
138
  autoload(:Factor, 'statsample/factor')
139
+
138
140
  def self.load(filename)
139
141
  if File.exists? filename
140
142
  o=false
@@ -165,110 +167,6 @@ module Statsample
165
167
  fp.close
166
168
  end
167
169
  end
168
- module HtmlSummary
169
- def add_line(n=nil)
170
- self << "<hr />"
171
- end
172
- def nl
173
- self << "<br />"
174
- end
175
- def add(text)
176
- self << ("<p>"+text.gsub("\n","<br />")+"</p>")
177
- end
178
- def parse_table(table)
179
- self << table.parse_html
180
- end
181
- end
182
- module ConsoleSummary
183
- def add_line(n=80)
184
- self << "-"*n+"\n"
185
- end
186
- def nl
187
- self << "\n"
188
- end
189
- def add(text)
190
- self << text
191
- end
192
- def parse_table(table)
193
- self << table.parse_console
194
- end
195
- end
196
- class ReportTable
197
- attr_reader :header
198
- def initialize(h=[])
199
- @rows=[]
200
- @max_cols=[]
201
- self.header=(h)
202
- end
203
- def add_row(row)
204
- row.each_index{|i|
205
- @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
206
- }
207
- @rows.push(row)
208
- end
209
- def add_horizontal_line
210
- @rows.push(:hr)
211
- end
212
- def header=(h)
213
- h.each_index{|i|
214
- @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
215
- }
216
- @header=h
217
- end
218
- def parse_console_row(row)
219
- out="| "
220
- @max_cols.each_index{|i|
221
- if row[i].nil?
222
- out << " "*(@max_cols[i]+2)+"|"
223
- else
224
- t=row[i].to_s
225
- out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
226
- end
227
- }
228
- out << "\n"
229
- out
230
- end
231
- def parse_console_hr
232
- "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
233
- end
234
- def parse_console
235
- out="\n"
236
- out << parse_console_hr
237
- out << parse_console_row(header)
238
- out << parse_console_hr
239
-
240
- @rows.each{|row|
241
- if row==:hr
242
- out << parse_console_hr
243
- else
244
- out << parse_console_row(row)
245
- end
246
- }
247
- out << parse_console_hr
248
-
249
- out
250
- end
251
- def parse_html
252
- out="<table>\n"
253
- if header.size>0
254
- out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
255
- end
256
- out << "<tbody>\n"
257
- row_with_line=false
258
- @rows.each{|row|
259
- if row==:hr
260
- row_with_line=true
261
- else
262
- out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
263
- out << row.join("</td><td>") +"</td>"
264
- out << "</tr>\n"
265
- row_with_line=false
266
- end
267
- }
268
- out << "</tbody></table>\n"
269
- out
270
- end
271
- end
272
170
 
273
171
  module STATSAMPLE__ #:nodoc:
274
172
  end
@@ -288,3 +186,4 @@ end
288
186
  require 'statsample/vector'
289
187
  require 'statsample/dataset'
290
188
  require 'statsample/crosstab'
189
+ require 'statsample/matrix'
@@ -1,4 +1,5 @@
1
1
  require 'statsample/bivariate/tetrachoric'
2
+ require 'statsample/bivariate/polychoric'
2
3
  module Statsample
3
4
  # Diverse correlation methods
4
5
  module Bivariate
@@ -132,7 +133,7 @@ module Statsample
132
133
  # Order of rows and columns depends on Dataset#fields order
133
134
 
134
135
  def correlation_matrix(ds)
135
- ds.collect_matrix do |row,col|
136
+ cm=ds.collect_matrix do |row,col|
136
137
  if row==col
137
138
  1.0
138
139
  elsif (ds[row].type!=:scale or ds[col].type!=:scale)
@@ -141,6 +142,9 @@ module Statsample
141
142
  pearson(ds[row],ds[col])
142
143
  end
143
144
  end
145
+ cm.extend(Statsample::CorrelationMatrix)
146
+ cm.labels=ds.fields
147
+ cm
144
148
  end
145
149
 
146
150
  # Retrieves the n valid pairwise.
@@ -0,0 +1,581 @@
1
+ module Statsample
2
+ module Bivariate
3
+ # Calculate Polychoric correlation for two vectors.
4
+ def self.polychoric(v1,v2)
5
+ pc=Polychoric.new_with_vectors(v1,v2)
6
+ pc.r
7
+ end
8
+
9
+ # Polychoric correlation matrix.
10
+ # Order of rows and columns depends on Dataset#fields order
11
+ def self.polychoric_correlation_matrix(ds)
12
+ ds.collect_matrix do |row,col|
13
+ if row==col
14
+ 1.0
15
+ else
16
+ begin
17
+ polychoric(ds[row],ds[col])
18
+ rescue RuntimeError
19
+ nil
20
+ end
21
+ end
22
+ end
23
+ end
24
+ # Compute polychoric correlation.
25
+ #
26
+ # The polychoric correlation estimate what the correlation between raters, who classified on a ordered category scale, would be if ratings were made on a continuous scale; they are, theoretically, invariant over changes in the number or "width" of rating categories.
27
+ # See extensive documentation on http://www.john-uebersax.com/stat/tetra.htm
28
+
29
+ class Polychoric
30
+ include GetText
31
+ bindtextdomain("statsample")
32
+ # Name of the analysis
33
+ attr_accessor :name
34
+ # Max number of iterations used on iterative methods. Default to 100
35
+ attr_accessor :max_iterations
36
+ # Debug algorithm (See iterations, for example)
37
+ attr_accessor :debug
38
+ # Minimizer type. Default GSL::Min::FMinimizer::BRENT
39
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
40
+ attr_accessor :minimizer_type
41
+ # Method of calculation.
42
+ #
43
+ # Drasgow (1988, cited by Uebersax, 2002) describe two method: joint maximum likelihood (ML) approach and two-step ML estimation.
44
+ # For now, only implemented two-step ML (:two_step), with algorithm
45
+ # based on Drasgow(1986, cited by Gegenfurtner, 1992)
46
+ #
47
+ attr_accessor :method
48
+ # Absolute error for iteration. Default to 0.001
49
+ attr_accessor :epsilon
50
+
51
+ # Number of iterations
52
+ attr_reader :iteration
53
+
54
+ # Log of algorithm
55
+ attr_reader :log
56
+ attr_reader :loglike
57
+ MAX_ITERATIONS=100
58
+ EPSILON=0.001
59
+ MINIMIZER_TYPE=GSL::Min::FMinimizer::BRENT
60
+ def new_with_vectors(v1,v2)
61
+ Polychoric.new(Crosstab.new(v1,v2).to_matrix)
62
+ end
63
+
64
+ def initialize(matrix, opts=Hash.new)
65
+ @matrix=matrix
66
+ @n=matrix.column_size
67
+ @m=matrix.row_size
68
+ raise "row size <1" if @m<=1
69
+ raise "column size <1" if @n<=1
70
+
71
+ @method=:two_step
72
+ @name="Polychoric correlation"
73
+ @max_iterations=MAX_ITERATIONS
74
+ @epsilon=EPSILON
75
+ @minimizer_type=GSL::Min::FMinimizer::BRENT
76
+ @debug=false
77
+ @iteration=nil
78
+ opts.each{|k,v|
79
+ self.send("#{k}=",v) if self.respond_to? k
80
+ }
81
+ @r=nil
82
+ end
83
+ def r
84
+ if @r.nil?
85
+ compute
86
+ end
87
+ @r
88
+ end
89
+
90
+ def threshold_x
91
+ if @alpha.nil?
92
+ compute
93
+ end
94
+ @alpha[0,@alpha.size-1]
95
+ end
96
+
97
+ def threshold_y
98
+ if @beta.nil?
99
+ compute
100
+ end
101
+ @beta[0,@beta.size-1]
102
+ end
103
+
104
+
105
+
106
+ def compute
107
+ if @method==:two_step
108
+ compute_two_step_mle_drasgow
109
+ elsif @method==:as87
110
+ compute_two_step_as87
111
+ else
112
+ raise "Not implemented"
113
+ end
114
+ end
115
+ # *Computation of polychoric correlation usign two-step ML estimation.*
116
+ #
117
+ # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
118
+ #
119
+ # The algorithm is based on Drasgow(1986, cited by Gegenfurtner (1992)
120
+ # References:
121
+ # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
122
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
123
+ #
124
+ def compute_two_step_mle_drasgow
125
+ @nr=@matrix.row_size
126
+ @nc=@matrix.column_size
127
+ @sumr=[0]*@matrix.row_size
128
+ @sumrac=[0]*@matrix.row_size
129
+ @sumc=[0]*@matrix.column_size
130
+ @sumcac=[0]*@matrix.column_size
131
+ @alpha=[0]*@matrix.row_size
132
+ @beta=[0]*@matrix.row_size
133
+ @total=0
134
+ @nr.times do |i|
135
+ @nc.times do |j|
136
+ @sumr[i]+=@matrix[i,j]
137
+ @sumc[j]+=@matrix[i,j]
138
+ @total+=@matrix[i,j]
139
+ end
140
+ end
141
+ ac=0
142
+ (@nr-1).times do |i|
143
+ @sumrac[i]=@sumr[i]+ac
144
+ @alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
145
+ ac=@sumrac[i]
146
+ end
147
+ ac=0
148
+ (@nc-1).times do |i|
149
+ @sumcac[i]=@sumc[i]+ac
150
+ @beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
151
+ ac=@sumcac[i]
152
+ end
153
+ @alpha[@nr-1]=10
154
+ @beta[@nc-1]=10
155
+ fn1=GSL::Function.alloc {|x|
156
+ loglike=0
157
+ pd=@nr.times.collect{ [0]*@nc}
158
+ pc=@nr.times.collect{ [0]*@nc}
159
+
160
+ @nr.times { |i|
161
+ @nc.times { |j|
162
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], x)
163
+ pc[i][j] = pd[i][j]
164
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
165
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
166
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
167
+ res= pd[i][j]
168
+
169
+ if res==0.0
170
+ res=1e-15
171
+ end
172
+
173
+ # puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
174
+ loglike+= @matrix[i,j] * Math::log( res )
175
+ }
176
+ }
177
+ # p pd
178
+ @loglike=loglike
179
+ @pd=pd
180
+ -loglike
181
+ }
182
+ @iteration = 0
183
+ max_iter = @max_iterations
184
+ m = 0 # initial guess
185
+ m_expected = 0.5
186
+ a=-0.99999
187
+ b=+0.99999
188
+ gmf = GSL::Min::FMinimizer.alloc(@minimizer_type)
189
+ gmf.set(fn1, m, a, b)
190
+ header=sprintf("using %s method\n", gmf.name)
191
+ header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
192
+ "err", "err(est)")
193
+
194
+ header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
195
+ @log=header
196
+ puts header if @debug
197
+ begin
198
+ @iteration += 1
199
+ status = gmf.iterate
200
+ status = gmf.test_interval(0.001, 0.0)
201
+
202
+ if status == GSL::SUCCESS
203
+ @log+="Converged:"
204
+ puts "Converged:" if @debug
205
+ end
206
+ a = gmf.x_lower
207
+ b = gmf.x_upper
208
+ m = gmf.x_minimum
209
+ message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
210
+ @iteration, a, b, m, m - m_expected, b - a);
211
+ @log+=message
212
+ puts message if @debug
213
+ end while status == GSL::CONTINUE and @iteration < @max_iterations
214
+ @r=gmf.x_minimum
215
+ end
216
+ # Chi-square to test r=0
217
+ def chi_square_independence
218
+ Statsample::Test::chi_square(@matrix, expected)
219
+ end
220
+ # Chi-square to test model==independence
221
+
222
+ def chi_square_model_expected
223
+ calculate if @r.nil?
224
+ model=Matrix.rows(@pd).collect {|c| c*@total}
225
+ Statsample::Test::chi_square(model, expected)
226
+
227
+ end
228
+ # Chi-square to test real == calculated with rho
229
+ def chi_square_model
230
+ calculate if @r.nil?
231
+ e=Matrix.rows(@pd).collect {|c| c*@total}
232
+ Statsample::Test::chi_square(@matrix, e)
233
+ end
234
+ def matrix_for_rho(rho)
235
+ pd=@nr.times.collect{ [0]*@nc}
236
+ pc=@nr.times.collect{ [0]*@nc}
237
+ @nr.times { |i|
238
+ @nc.times { |j|
239
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
240
+ pc[i][j] = pd[i][j]
241
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
242
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
243
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
244
+ res= pd[i][j]
245
+ }
246
+ }
247
+ Matrix.rows(pc)
248
+ end
249
+ def g2
250
+ raise "Doesn't work"
251
+ e=expected
252
+ no_r_likehood=0
253
+ @nr.times {|i|
254
+ @nc.times {|j|
255
+ #p @matrix[i,j]
256
+ if @matrix[i,j]!=0
257
+ no_r_likehood+= @matrix[i,j]*Math::log(e[i,j])
258
+ end
259
+ }
260
+ }
261
+ p no_r_likehood
262
+ model=Matrix.rows(@pd).collect {|c| c*@total}
263
+
264
+ model_likehood=0
265
+ @nr.times {|i|
266
+ @nc.times {|j|
267
+ #p @matrix[i,j]
268
+ if @matrix[i,j]!=0
269
+ model_likehood+= @matrix[i,j] * Math::log(model[i,j])
270
+ end
271
+ }
272
+ }
273
+
274
+ p model_likehood
275
+
276
+ -2*(no_r_likehood-model_likehood)
277
+
278
+ end
279
+ def expected
280
+ rt=[]
281
+ ct=[]
282
+ t=0
283
+ @matrix.row_size.times {|i|
284
+ @matrix.column_size.times {|j|
285
+ rt[i]=0 if rt[i].nil?
286
+ ct[j]=0 if ct[j].nil?
287
+ rt[i]+=@matrix[i,j]
288
+ ct[j]+=@matrix[i,j]
289
+ t+=@matrix[i,j]
290
+ }
291
+ }
292
+ m=[]
293
+ @matrix.row_size.times {|i|
294
+ row=[]
295
+ @matrix.column_size.times {|j|
296
+ row[j]=(rt[i]*ct[j]).quo(t)
297
+ }
298
+ m.push(row)
299
+ }
300
+
301
+ Matrix.rows(m)
302
+ end
303
+ # Compute polychoric using AS87.
304
+ # Doesn't work for now! I can't find the error :(
305
+
306
+ def compute_two_step_as87
307
+ @nn=@n-1
308
+ @mm=@m-1
309
+ @nn7=7*@nn
310
+ @mm7=7*@mm
311
+ @mn=@n*@m
312
+ @cont=[nil]
313
+ @n.times {|j|
314
+ @m.times {|i|
315
+ @cont.push(@matrix[i,j])
316
+ }
317
+ }
318
+
319
+ pcorl=0
320
+ cont=@cont
321
+ xmean=0.0
322
+ sum=0.0
323
+ row=[]
324
+ colmn=[]
325
+ (1..@m).each do |i|
326
+ row[i]=0.0
327
+ l=i
328
+ (1..@n).each do |j|
329
+ row[i]=row[i]+cont[l]
330
+ l+=@m
331
+ end
332
+ raise "Should not be empty rows" if(row[i]==0.0)
333
+ xmean=xmean+row[i]*i.to_f
334
+ sum+=row[i]
335
+ end
336
+ xmean=xmean/sum.to_f
337
+ ymean=0.0
338
+ (1..@n).each do |j|
339
+ colmn[j]=0.0
340
+ l=(j-1)*@m
341
+ (1..@m).each do |i|
342
+ l=l+1
343
+ colmn[j]=colmn[j]+cont[l] #12
344
+ end
345
+ raise "Should not be empty cols" if colmn[j]==0
346
+ ymean=ymean+colmn[j]*j.to_f
347
+ end
348
+ ymean=ymean/sum.to_f
349
+ covxy=0.0
350
+ (1..@m).each do |i|
351
+ l=i
352
+ (1..@n).each do |j|
353
+ conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
354
+ l=l+@m
355
+ end
356
+ end
357
+
358
+ chisq=0.0
359
+ (1..@m).each do |i|
360
+ l=i
361
+ (1..@n).each do |j|
362
+ chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
363
+ l=l+@m
364
+ end
365
+ end
366
+
367
+ phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
368
+ phisq=0 if(phisq<0.0)
369
+ # Compute cumulative sum of columns and rows
370
+ sumc=[]
371
+ sumr=[]
372
+ sumc[1]=colmn[1]
373
+ sumr[1]=row[1]
374
+ cum=0
375
+ (1..@nn).each do |i| # goto 17 r20
376
+ cum=cum+colmn[i]
377
+ sumc[i]=cum
378
+ end
379
+ cum=0
380
+ (1..@mm).each do |i|
381
+ cum=cum+row[i]
382
+ sumr[i]=cum
383
+ end
384
+ alpha=[]
385
+ beta=[]
386
+ # Compute points of polytomy
387
+ (1..@mm).each do |i| #do 21
388
+ alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
389
+ end # 21
390
+ (1..@nn).each do |i| #do 22
391
+ beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
392
+ end # 21
393
+ @alpha=alpha[1,alpha.size] << nil
394
+ @beta=beta[1,beta.size] << nil
395
+ @sumr=sumr
396
+ @sumc=sumc
397
+ @total=sum
398
+
399
+ # Compute Fourier coefficients a and b. Verified
400
+ h=hermit(alpha,@mm)
401
+ hh=hermit(beta,@nn)
402
+ a=[]
403
+ b=[]
404
+ if @m!=2 # goto 24
405
+ mmm=@m-2
406
+ (1..mmm).each do |i| #do 23
407
+ a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
408
+ a2=sumr[i] * xnorm(alpha[i+1])
409
+ a3=sumr[i+1] * xnorm(alpha[i])
410
+ l=i
411
+ (1..7).each do |j| #do 23
412
+ a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
413
+ l=l+@mm
414
+ end
415
+ end #23
416
+ end
417
+ # 24
418
+
419
+
420
+ if @n!=2 # goto 26
421
+ nnn=@n-2
422
+ (1..nnn).each do |i| #do 25
423
+ a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
424
+ a2=sumc[i] * xnorm(beta[i+1])
425
+ a3=sumc[i+1] * xnorm(beta[i])
426
+ l=i
427
+ (1..7).each do |j| #do 25
428
+ b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
429
+ l=l+@nn
430
+ end # 25
431
+ end # 25
432
+ end
433
+ #26 r20
434
+ l = @mm
435
+ a1 = -sum * xnorm(alpha[@mm])
436
+ a2 = row[@m] * sumr[@mm]
437
+ (1..7).each do |j| # do 27
438
+ a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
439
+ l=l+@mm
440
+ end # 27
441
+
442
+ l = @nn
443
+ a1 = -sum * xnorm(beta[@nn])
444
+ a2 = colmn[@n] * sumc[@nn]
445
+
446
+ (1..7).each do |j| # do 28
447
+ b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
448
+ l = l + @nn
449
+ end # 28
450
+ rcof=[]
451
+ # compute coefficients rcof of polynomial of order 8
452
+ rcof[1]=-phisq
453
+ (2..9).each do |i| # do 30
454
+ rcof[i]=0.0
455
+ end #30
456
+ m1=@mm
457
+ (1..@mm).each do |i| # do 31
458
+ m1=m1+1
459
+ m2=m1+@mm
460
+ m3=m2+@mm
461
+ m4=m3+@mm
462
+ m5=m4+@mm
463
+ m6=m5+@mm
464
+ n1=@nn
465
+ (1..@nn).each do |j| # do 31
466
+ n1=n1+1
467
+ n2=n1+@nn
468
+ n3=n2+@nn
469
+ n4=n3+@nn
470
+ n5=n4+@nn
471
+ n6=n5+@nn
472
+
473
+ rcof[3] = rcof[3] + a[i]**2 * b[j]**2
474
+
475
+ rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
476
+
477
+ rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
478
+ 2.0 * a[i] * a[m2] * b[j] * b[n2]
479
+
480
+ rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
481
+ b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
482
+
483
+ rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
484
+ 2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
485
+ b[n1] * b[n3])
486
+
487
+ rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
488
+ a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
489
+
490
+ rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
491
+ 2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
492
+ b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
493
+ end # 31
494
+ end # 31
495
+
496
+ rcof=rcof[1,rcof.size]
497
+ poly = GSL::Poly.alloc(rcof)
498
+ roots=poly.solve
499
+ rootr=[nil]
500
+ rooti=[nil]
501
+ roots.each {|c|
502
+ rootr.push(c.real)
503
+ rooti.push(c.im)
504
+ }
505
+ @rootr=rootr
506
+ @rooti=rooti
507
+
508
+ norts=0
509
+ (1..7).each do |i| # do 43
510
+
511
+ next if rooti[i]!=0.0
512
+ if (covxy>=0.0)
513
+ next if(rootr[i]<0.0 or rootr[i]>1.0)
514
+ pcorl=rootr[i]
515
+ norts=norts+1
516
+ else
517
+ if (rootr[i]>=-1.0 and rootr[i]<0.0)
518
+ pcorl=rootr[i]
519
+ norts=norts+1
520
+ end
521
+ end
522
+ end # 43
523
+ raise "Error" if norts==0
524
+ @r=pcorl
525
+ end
526
+ #Computes vector h(mm7) of orthogonal hermite...
527
+ def hermit(s,k)
528
+ h=[]
529
+ (1..k).each do |i| # do 14
530
+ l=i
531
+ ll=i+k
532
+ lll=ll+k
533
+ h[i]=1.0
534
+ h[ll]=s[i]
535
+ v=1.0
536
+ (2..6).each do |j| #do 14
537
+ w=Math::sqrt(j)
538
+ h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
539
+ v=w
540
+ l=l+k
541
+ ll=ll+k
542
+ lll=lll+k
543
+ end
544
+ end
545
+ h
546
+ end
547
+ def xnorm(t)
548
+ Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
549
+ end
550
+
551
+ def summary
552
+ rp=ReportBuilder.new()
553
+ rp.add(self)
554
+ rp.to_text
555
+ end
556
+
557
+ def to_reportbuilder(generator)
558
+ compute if @r.nil?
559
+ section=ReportBuilder::Section.new(:name=>@name)
560
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
561
+ @m.times do |i|
562
+ t.add_row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
563
+ end
564
+ t.add_hr
565
+ t.add_row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
566
+ section.add(t)
567
+ #generator.parse_element(t)
568
+ section.add(sprintf("r: %0.4f",r))
569
+ t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
570
+ threshold_x.each_with_index {|val,i|
571
+ t.add_row(["Threshold X #{i}", sprintf("%0.4f", val)])
572
+ }
573
+ threshold_y.each_with_index {|val,i|
574
+ t.add_row(["Threshold Y #{i}", sprintf("%0.4f", val)])
575
+ }
576
+ section.add(t)
577
+ generator.parse_element(section)
578
+ end
579
+ end
580
+ end
581
+ end