statsample 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 0.6.2 / 2010-02-11
2
+ * New Statsample::Bivariate::Polychoric. For implement: X2 and G2
3
+ * New matrix.rb, for faster development of Contingence Tables and Correlation Matrix
4
+
1
5
  === 0.6.1 / 2010-02-08
2
6
  * Bug fix on DominanceAnalysis summary for Ruby1.9
3
7
  * Some extra documentation
data/Manifest.txt CHANGED
@@ -9,35 +9,23 @@ data/repeated_fields.csv
9
9
  data/test_binomial.csv
10
10
  data/tetmat_matrix.txt
11
11
  data/tetmat_test.txt
12
- demo/benchmark.rb
13
- demo/chi-square.rb
14
- demo/crosstab.rb
15
- demo/dice.rb
16
- demo/distribution_t.rb
17
- demo/graph.rb
18
- demo/item_analysis.rb
19
- demo/mean.rb
20
- demo/nunnally_6.rb
21
- demo/pca.rb
22
- demo/proportion.rb
23
- demo/regression.rb
24
- demo/sample_test.csv
25
- demo/spss_matrix.rb
26
- demo/strata_proportion.rb
27
- demo/stratum.rb
28
- demo/t-student.rb
29
- demo/umann.rb
12
+ demo/dominance_analysis_bootstrap.rb
13
+ demo/dominanceanalysis.rb
14
+ demo/multiple_regression.rb
15
+ demo/polychoric.rb
16
+ demo/tetrachoric.rb
30
17
  lib/distribution.rb
31
18
  lib/distribution/chisquare.rb
32
19
  lib/distribution/f.rb
33
20
  lib/distribution/normal.rb
21
+ lib/distribution/normalbivariate.rb
34
22
  lib/distribution/t.rb
35
- lib/matrix_extension.rb
36
23
  lib/spss.rb
37
24
  lib/statistics2.rb
38
25
  lib/statsample.rb
39
26
  lib/statsample/anova.rb
40
27
  lib/statsample/bivariate.rb
28
+ lib/statsample/bivariate/polychoric.rb
41
29
  lib/statsample/bivariate/tetrachoric.rb
42
30
  lib/statsample/codification.rb
43
31
  lib/statsample/combination.rb
@@ -60,6 +48,7 @@ lib/statsample/graph/svghistogram.rb
60
48
  lib/statsample/graph/svgscatterplot.rb
61
49
  lib/statsample/histogram.rb
62
50
  lib/statsample/htmlreport.rb
51
+ lib/statsample/matrix.rb
63
52
  lib/statsample/mle.rb
64
53
  lib/statsample/mle/logit.rb
65
54
  lib/statsample/mle/normal.rb
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ require 'statsample'
4
+
5
+
6
+ a=100.times.collect {rand}.to_scale
7
+ b=100.times.collect {rand}.to_scale
8
+ c=100.times.collect {rand}.to_scale
9
+ d=100.times.collect {rand}.to_scale
10
+
11
+ ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
12
+
13
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
14
+ dab=Statsample::DominanceAnalysis::Bootstrap.new(ds, 'y')
15
+ if HAS_GSL
16
+ # Use Gsl if available (faster calculation)
17
+ dab.regression_class=Statsample::Regression::Multiple::GslEngine
18
+ end
19
+ dab.bootstrap(100,nil,true)
20
+ puts dab.summary
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=1000.times.collect {rand}.to_scale
6
+ b=1000.times.collect {rand}.to_scale
7
+ c=1000.times.collect {rand}.to_scale
8
+ ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
9
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
10
+ da=Statsample::DominanceAnalysis.new(ds,'y')
11
+ puts da.summary
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ require 'benchmark'
6
+ samples=10000
7
+ a=samples.times.collect {rand}.to_scale
8
+ b=samples.times.collect {rand}.to_scale
9
+ c=samples.times.collect {rand}.to_scale
10
+ d=samples.times.collect {rand}.to_scale
11
+
12
+ ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
13
+ ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+row['d']*1+rand()}
14
+
15
+ Benchmark.bm(7) do |x|
16
+
17
+
18
+ rb=ReportBuilder.new("Multiple Regression Engines")
19
+
20
+ if HAS_GSL
21
+ x.report("GSL:") {
22
+ lr=Statsample::Regression::Multiple::GslEngine.new(ds,'y',:name=>"Multiple Regression using GSL")
23
+ rb.add(lr.summary)
24
+ }
25
+ end
26
+
27
+
28
+ if HAS_ALGIB
29
+ x.report("Alglib:") {
30
+ lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,'y', :name=>"Multiple Regression using Alglib")
31
+ rb.add(lr.summary)
32
+ }
33
+ end
34
+ x.report("Ruby:") {
35
+ lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y',:name=>"Multiple Regression using RubyEngine")
36
+ rb.add(lr.summary)
37
+ }
38
+
39
+ puts rb.to_text
40
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ #ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
6
+
7
+ ct=Matrix[[30,1,0,0,0,0],[0,10,2,0,0,0], [0,4,8,3,1,0], [0,3,3,37,9,0], [0,0,1, 25, 71, 49], [ 0,0,0,2, 20, 181]]
8
+ poly=Statsample::Bivariate::Polychoric.new(ct)
9
+
10
+ puts poly.summary
11
+ puts poly.chi_square_independence
12
+ puts poly.chi_square_model
13
+ puts poly.chi_square_independence
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+
4
+ require 'statsample'
5
+ a=40
6
+ b=10
7
+ c=20
8
+ d=30
9
+ tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
10
+ puts tetra.summary
data/lib/distribution.rb CHANGED
@@ -12,4 +12,5 @@ module Distribution
12
12
  autoload(:T, 'distribution/t')
13
13
  autoload(:F, 'distribution/f')
14
14
  autoload(:Normal, 'distribution/normal')
15
+ autoload(:NormalBivariate, 'distribution/normalbivariate')
15
16
  end
@@ -0,0 +1,100 @@
1
+ module Distribution
2
+ # Calculate pdf and cdf for bivariate normal distribution
3
+ module NormalBivariate
4
+
5
+ class << self
6
+ SIDE=0.1
7
+ LIMIT=5
8
+ # Probability density function
9
+ # Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
10
+ def pdf(x,y, rho, sigma1=1.0, sigma2=1.0)
11
+ (1.quo(2 * Math::PI * sigma1*sigma2 * Math::sqrt( 1 - rho**2 ))) *
12
+ Math::exp(-(1.quo(2*(1-rho**2))) *
13
+ ((x**2/sigma1) + (y**2/sigma2) - (2*rho*x*y).quo(sigma1*sigma2) ))
14
+ end
15
+ def f(x,y,aprime,bprime,rho)
16
+ r=aprime*(2*x-aprime)+bprime*(2*y-bprime)+2*rho*(x-aprime)*(y-bprime)
17
+ Math::exp(r)
18
+ end
19
+ def cdf(a,b,rho)
20
+ cdf_math(a,b,rho)
21
+ end
22
+ def sgn(x)
23
+ if(x>=0)
24
+ 1
25
+ else
26
+ -1
27
+ end
28
+ end
29
+ # As http://finance.bi.no/~bernt/gcc_prog/recipes/recipes/node23.html
30
+ def cdf_math(a,b,rho)
31
+ #puts "a:#{a} - b:#{b} - rho:#{rho}"
32
+ if (a<=0 and b<=0 and rho<=0)
33
+ # puts "ruta 1"
34
+ aprime=a.quo(Math::sqrt(2.0*(1.0-rho**2)))
35
+ bprime=b.quo(Math::sqrt(2.0*(1.0-rho**2)))
36
+ aa=[0.3253030, 0.4211071, 0.1334425, 0.006374323]
37
+ bb=[0.1337764, 0.6243247, 1.3425378, 2.2626645]
38
+ sum=0
39
+ 4.times do |i|
40
+ 4.times do |j|
41
+ sum+=aa[i]*aa[j] * f(bb[i], bb[j], aprime, bprime,rho)
42
+ end
43
+ end
44
+ sum=sum*(Math::sqrt(1.0-rho**2).quo(Math::PI))
45
+ return sum
46
+ elsif(a*b*rho<=0.0)
47
+
48
+ #puts "ruta 2"
49
+ if(a<=0 and b>=0 and rho>=0)
50
+ return Distribution::Normal.cdf(a) - cdf(a,-b,-rho)
51
+ elsif (a>=0.0 and b<=0.0 and rho>=0)
52
+ return Distribution::Normal.cdf(b) - cdf(-a,b,-rho)
53
+ elsif (a>=0.0 and b>=0.0 and rho<=0)
54
+ return Distribution::Normal.cdf(a) + Distribution::Normal.cdf(b) - 1.0 + cdf(-a,-b,rho)
55
+ end
56
+ elsif (a*b*rho>=0.0)
57
+ #puts "ruta 3"
58
+ denum=Math::sqrt(a**2 - 2*rho*a*b + b**2)
59
+ rho1=((rho*a-b)*sgn(a)).quo(denum)
60
+ rho2=((rho*b-a)*sgn(b)).quo(denum)
61
+ delta=(1.0-sgn(a)*sgn(b)).quo(4)
62
+ #puts "#{rho1} - #{rho2}"
63
+ return cdf(a, 0.0, rho1) + cdf(b, 0.0, rho2) - delta
64
+ end
65
+ raise "Should'nt be here! #{a} - #{b} #{rho}"
66
+ end
67
+ # Cdf for a given x and y
68
+ # Reference:
69
+ # * Jantaravareerat, M. & Thomopoulos, N. (n/d). Tables for standard bivariate normal distribution
70
+
71
+ def cdf_iterate(x,y,rho,s1=1,s2=1)
72
+ # Special cases
73
+ return 1 if x>LIMIT and y>LIMIT
74
+ return 0 if x<-LIMIT or y<-LIMIT
75
+ return Distribution::Normal.cdf(y) if x>LIMIT
76
+ return Distribution::Normal.cdf(x) if y>LIMIT
77
+
78
+ #puts "x:#{x} - y:#{y}"
79
+ x=-LIMIT if x<-LIMIT
80
+ x=LIMIT if x>LIMIT
81
+ y=-LIMIT if y<-LIMIT
82
+ y=LIMIT if y>LIMIT
83
+
84
+ x_squares=((LIMIT+x) / SIDE).to_i
85
+ y_squares=((LIMIT+y) / SIDE).to_i
86
+ sum=0
87
+ x_squares.times do |i|
88
+ y_squares.times do |j|
89
+ z1=-LIMIT+(i+1)*SIDE
90
+ z2=-LIMIT+(j+1)*SIDE
91
+ #puts " #{z1}-#{z2}"
92
+ h=(pdf(z1,z2,rho,s1,s2)+pdf(z1-SIDE,z2,rho,s1,s2)+pdf(z1,z2-SIDE,rho,s1,s2) + pdf(z1-SIDE,z2-SIDE,rho,s1,s2)).quo(4)
93
+ sum+= (SIDE**2)*h # area
94
+ end
95
+ end
96
+ sum
97
+ end
98
+ end
99
+ end
100
+ end
data/lib/statsample.rb CHANGED
@@ -23,6 +23,7 @@ $:.unshift(File.expand_path(File.dirname(__FILE__)+"/../ext"))
23
23
 
24
24
  require 'matrix'
25
25
  require 'distribution'
26
+ require 'reportbuilder'
26
27
 
27
28
  class Numeric
28
29
  def square ; self * self ; end
@@ -108,7 +109,7 @@ end
108
109
  # * Dataset: An union of vectors.
109
110
  #
110
111
  module Statsample
111
- VERSION = '0.6.1'
112
+ VERSION = '0.6.2'
112
113
  SPLIT_TOKEN = ","
113
114
  autoload(:Database, 'statsample/converters')
114
115
  autoload(:Anova, 'statsample/anova')
@@ -135,6 +136,7 @@ module Statsample
135
136
  autoload(:Regression, 'statsample/regression')
136
137
  autoload(:Test, 'statsample/test')
137
138
  autoload(:Factor, 'statsample/factor')
139
+
138
140
  def self.load(filename)
139
141
  if File.exists? filename
140
142
  o=false
@@ -165,110 +167,6 @@ module Statsample
165
167
  fp.close
166
168
  end
167
169
  end
168
- module HtmlSummary
169
- def add_line(n=nil)
170
- self << "<hr />"
171
- end
172
- def nl
173
- self << "<br />"
174
- end
175
- def add(text)
176
- self << ("<p>"+text.gsub("\n","<br />")+"</p>")
177
- end
178
- def parse_table(table)
179
- self << table.parse_html
180
- end
181
- end
182
- module ConsoleSummary
183
- def add_line(n=80)
184
- self << "-"*n+"\n"
185
- end
186
- def nl
187
- self << "\n"
188
- end
189
- def add(text)
190
- self << text
191
- end
192
- def parse_table(table)
193
- self << table.parse_console
194
- end
195
- end
196
- class ReportTable
197
- attr_reader :header
198
- def initialize(h=[])
199
- @rows=[]
200
- @max_cols=[]
201
- self.header=(h)
202
- end
203
- def add_row(row)
204
- row.each_index{|i|
205
- @max_cols[i]=row[i].to_s.size if @max_cols[i].nil? or row[i].to_s.size > @max_cols[i]
206
- }
207
- @rows.push(row)
208
- end
209
- def add_horizontal_line
210
- @rows.push(:hr)
211
- end
212
- def header=(h)
213
- h.each_index{|i|
214
- @max_cols[i]=h[i].to_s.size if @max_cols[i].nil? or h[i].to_s.size>@max_cols[i]
215
- }
216
- @header=h
217
- end
218
- def parse_console_row(row)
219
- out="| "
220
- @max_cols.each_index{|i|
221
- if row[i].nil?
222
- out << " "*(@max_cols[i]+2)+"|"
223
- else
224
- t=row[i].to_s
225
- out << " "+t+" "*(@max_cols[i]-t.size+1)+"|"
226
- end
227
- }
228
- out << "\n"
229
- out
230
- end
231
- def parse_console_hr
232
- "-"*(@max_cols.inject(0){|a,v|a+v.size+3}+2)+"\n"
233
- end
234
- def parse_console
235
- out="\n"
236
- out << parse_console_hr
237
- out << parse_console_row(header)
238
- out << parse_console_hr
239
-
240
- @rows.each{|row|
241
- if row==:hr
242
- out << parse_console_hr
243
- else
244
- out << parse_console_row(row)
245
- end
246
- }
247
- out << parse_console_hr
248
-
249
- out
250
- end
251
- def parse_html
252
- out="<table>\n"
253
- if header.size>0
254
- out << "<thead><th>"+header.join("</th><th>")+"</thead><tbody>"
255
- end
256
- out << "<tbody>\n"
257
- row_with_line=false
258
- @rows.each{|row|
259
- if row==:hr
260
- row_with_line=true
261
- else
262
- out << "<tr class='"+(row_with_line ? 'line':'')+"'><td>"
263
- out << row.join("</td><td>") +"</td>"
264
- out << "</tr>\n"
265
- row_with_line=false
266
- end
267
- }
268
- out << "</tbody></table>\n"
269
- out
270
- end
271
- end
272
170
 
273
171
  module STATSAMPLE__ #:nodoc:
274
172
  end
@@ -288,3 +186,4 @@ end
288
186
  require 'statsample/vector'
289
187
  require 'statsample/dataset'
290
188
  require 'statsample/crosstab'
189
+ require 'statsample/matrix'
@@ -1,4 +1,5 @@
1
1
  require 'statsample/bivariate/tetrachoric'
2
+ require 'statsample/bivariate/polychoric'
2
3
  module Statsample
3
4
  # Diverse correlation methods
4
5
  module Bivariate
@@ -132,7 +133,7 @@ module Statsample
132
133
  # Order of rows and columns depends on Dataset#fields order
133
134
 
134
135
  def correlation_matrix(ds)
135
- ds.collect_matrix do |row,col|
136
+ cm=ds.collect_matrix do |row,col|
136
137
  if row==col
137
138
  1.0
138
139
  elsif (ds[row].type!=:scale or ds[col].type!=:scale)
@@ -141,6 +142,9 @@ module Statsample
141
142
  pearson(ds[row],ds[col])
142
143
  end
143
144
  end
145
+ cm.extend(Statsample::CorrelationMatrix)
146
+ cm.labels=ds.fields
147
+ cm
144
148
  end
145
149
 
146
150
  # Retrieves the n valid pairwise.
@@ -0,0 +1,581 @@
1
+ module Statsample
2
+ module Bivariate
3
+ # Calculate Polychoric correlation for two vectors.
4
+ def self.polychoric(v1,v2)
5
+ pc=Polychoric.new_with_vectors(v1,v2)
6
+ pc.r
7
+ end
8
+
9
+ # Polychoric correlation matrix.
10
+ # Order of rows and columns depends on Dataset#fields order
11
+ def self.polychoric_correlation_matrix(ds)
12
+ ds.collect_matrix do |row,col|
13
+ if row==col
14
+ 1.0
15
+ else
16
+ begin
17
+ polychoric(ds[row],ds[col])
18
+ rescue RuntimeError
19
+ nil
20
+ end
21
+ end
22
+ end
23
+ end
24
+ # Compute polychoric correlation.
25
+ #
26
+ # The polychoric correlation estimate what the correlation between raters, who classified on a ordered category scale, would be if ratings were made on a continuous scale; they are, theoretically, invariant over changes in the number or "width" of rating categories.
27
+ # See extensive documentation on http://www.john-uebersax.com/stat/tetra.htm
28
+
29
+ class Polychoric
30
+ include GetText
31
+ bindtextdomain("statsample")
32
+ # Name of the analysis
33
+ attr_accessor :name
34
+ # Max number of iterations used on iterative methods. Default to 100
35
+ attr_accessor :max_iterations
36
+ # Debug algorithm (See iterations, for example)
37
+ attr_accessor :debug
38
+ # Minimizer type. Default GSL::Min::FMinimizer::BRENT
39
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
40
+ attr_accessor :minimizer_type
41
+ # Method of calculation.
42
+ #
43
+ # Drasgow (1988, cited by Uebersax, 2002) describe two method: joint maximum likelihood (ML) approach and two-step ML estimation.
44
+ # For now, only implemented two-step ML (:two_step), with algorithm
45
+ # based on Drasgow(1986, cited by Gegenfurtner, 1992)
46
+ #
47
+ attr_accessor :method
48
+ # Absolute error for iteration. Default to 0.001
49
+ attr_accessor :epsilon
50
+
51
+ # Number of iterations
52
+ attr_reader :iteration
53
+
54
+ # Log of algorithm
55
+ attr_reader :log
56
+ attr_reader :loglike
57
+ MAX_ITERATIONS=100
58
+ EPSILON=0.001
59
+ MINIMIZER_TYPE=GSL::Min::FMinimizer::BRENT
60
+ def new_with_vectors(v1,v2)
61
+ Polychoric.new(Crosstab.new(v1,v2).to_matrix)
62
+ end
63
+
64
+ def initialize(matrix, opts=Hash.new)
65
+ @matrix=matrix
66
+ @n=matrix.column_size
67
+ @m=matrix.row_size
68
+ raise "row size <1" if @m<=1
69
+ raise "column size <1" if @n<=1
70
+
71
+ @method=:two_step
72
+ @name="Polychoric correlation"
73
+ @max_iterations=MAX_ITERATIONS
74
+ @epsilon=EPSILON
75
+ @minimizer_type=GSL::Min::FMinimizer::BRENT
76
+ @debug=false
77
+ @iteration=nil
78
+ opts.each{|k,v|
79
+ self.send("#{k}=",v) if self.respond_to? k
80
+ }
81
+ @r=nil
82
+ end
83
+ def r
84
+ if @r.nil?
85
+ compute
86
+ end
87
+ @r
88
+ end
89
+
90
+ def threshold_x
91
+ if @alpha.nil?
92
+ compute
93
+ end
94
+ @alpha[0,@alpha.size-1]
95
+ end
96
+
97
+ def threshold_y
98
+ if @beta.nil?
99
+ compute
100
+ end
101
+ @beta[0,@beta.size-1]
102
+ end
103
+
104
+
105
+
106
+ def compute
107
+ if @method==:two_step
108
+ compute_two_step_mle_drasgow
109
+ elsif @method==:as87
110
+ compute_two_step_as87
111
+ else
112
+ raise "Not implemented"
113
+ end
114
+ end
115
+ # *Computation of polychoric correlation usign two-step ML estimation.*
116
+ #
117
+ # Two-step ML estimation "first estimates the thresholds from the one-way marginal frequencies, then estimates rho, conditional on these thresholds, via maximum likelihood" (Uebersax, 2006).
118
+ #
119
+ # The algorithm is based on Drasgow(1986, cited by Gegenfurtner (1992)
120
+ # References:
121
+ # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
122
+ # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
123
+ #
124
+ def compute_two_step_mle_drasgow
125
+ @nr=@matrix.row_size
126
+ @nc=@matrix.column_size
127
+ @sumr=[0]*@matrix.row_size
128
+ @sumrac=[0]*@matrix.row_size
129
+ @sumc=[0]*@matrix.column_size
130
+ @sumcac=[0]*@matrix.column_size
131
+ @alpha=[0]*@matrix.row_size
132
+ @beta=[0]*@matrix.row_size
133
+ @total=0
134
+ @nr.times do |i|
135
+ @nc.times do |j|
136
+ @sumr[i]+=@matrix[i,j]
137
+ @sumc[j]+=@matrix[i,j]
138
+ @total+=@matrix[i,j]
139
+ end
140
+ end
141
+ ac=0
142
+ (@nr-1).times do |i|
143
+ @sumrac[i]=@sumr[i]+ac
144
+ @alpha[i]=Distribution::Normal.p_value(@sumrac[i] / @total.to_f)
145
+ ac=@sumrac[i]
146
+ end
147
+ ac=0
148
+ (@nc-1).times do |i|
149
+ @sumcac[i]=@sumc[i]+ac
150
+ @beta[i]=Distribution::Normal.p_value(@sumcac[i] / @total.to_f)
151
+ ac=@sumcac[i]
152
+ end
153
+ @alpha[@nr-1]=10
154
+ @beta[@nc-1]=10
155
+ fn1=GSL::Function.alloc {|x|
156
+ loglike=0
157
+ pd=@nr.times.collect{ [0]*@nc}
158
+ pc=@nr.times.collect{ [0]*@nc}
159
+
160
+ @nr.times { |i|
161
+ @nc.times { |j|
162
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], x)
163
+ pc[i][j] = pd[i][j]
164
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
165
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
166
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
167
+ res= pd[i][j]
168
+
169
+ if res==0.0
170
+ res=1e-15
171
+ end
172
+
173
+ # puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
174
+ loglike+= @matrix[i,j] * Math::log( res )
175
+ }
176
+ }
177
+ # p pd
178
+ @loglike=loglike
179
+ @pd=pd
180
+ -loglike
181
+ }
182
+ @iteration = 0
183
+ max_iter = @max_iterations
184
+ m = 0 # initial guess
185
+ m_expected = 0.5
186
+ a=-0.99999
187
+ b=+0.99999
188
+ gmf = GSL::Min::FMinimizer.alloc(@minimizer_type)
189
+ gmf.set(fn1, m, a, b)
190
+ header=sprintf("using %s method\n", gmf.name)
191
+ header+=sprintf("%5s [%9s, %9s] %9s %10s %9s\n", "iter", "lower", "upper", "min",
192
+ "err", "err(est)")
193
+
194
+ header+=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n", @iteration, a, b, m, m - m_expected, b - a)
195
+ @log=header
196
+ puts header if @debug
197
+ begin
198
+ @iteration += 1
199
+ status = gmf.iterate
200
+ status = gmf.test_interval(0.001, 0.0)
201
+
202
+ if status == GSL::SUCCESS
203
+ @log+="Converged:"
204
+ puts "Converged:" if @debug
205
+ end
206
+ a = gmf.x_lower
207
+ b = gmf.x_upper
208
+ m = gmf.x_minimum
209
+ message=sprintf("%5d [%.7f, %.7f] %.7f %+.7f %.7f\n",
210
+ @iteration, a, b, m, m - m_expected, b - a);
211
+ @log+=message
212
+ puts message if @debug
213
+ end while status == GSL::CONTINUE and @iteration < @max_iterations
214
+ @r=gmf.x_minimum
215
+ end
216
+ # Chi-square to test r=0
217
+ def chi_square_independence
218
+ Statsample::Test::chi_square(@matrix, expected)
219
+ end
220
+ # Chi-square to test model==independence
221
+
222
+ def chi_square_model_expected
223
+ calculate if @r.nil?
224
+ model=Matrix.rows(@pd).collect {|c| c*@total}
225
+ Statsample::Test::chi_square(model, expected)
226
+
227
+ end
228
+ # Chi-square to test real == calculated with rho
229
+ def chi_square_model
230
+ calculate if @r.nil?
231
+ e=Matrix.rows(@pd).collect {|c| c*@total}
232
+ Statsample::Test::chi_square(@matrix, e)
233
+ end
234
+ def matrix_for_rho(rho)
235
+ pd=@nr.times.collect{ [0]*@nc}
236
+ pc=@nr.times.collect{ [0]*@nc}
237
+ @nr.times { |i|
238
+ @nc.times { |j|
239
+ pd[i][j]=Distribution::NormalBivariate.cdf(@alpha[i], @beta[j], rho)
240
+ pc[i][j] = pd[i][j]
241
+ pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
242
+ pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
243
+ pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
244
+ res= pd[i][j]
245
+ }
246
+ }
247
+ Matrix.rows(pc)
248
+ end
249
+ def g2
250
+ raise "Doesn't work"
251
+ e=expected
252
+ no_r_likehood=0
253
+ @nr.times {|i|
254
+ @nc.times {|j|
255
+ #p @matrix[i,j]
256
+ if @matrix[i,j]!=0
257
+ no_r_likehood+= @matrix[i,j]*Math::log(e[i,j])
258
+ end
259
+ }
260
+ }
261
+ p no_r_likehood
262
+ model=Matrix.rows(@pd).collect {|c| c*@total}
263
+
264
+ model_likehood=0
265
+ @nr.times {|i|
266
+ @nc.times {|j|
267
+ #p @matrix[i,j]
268
+ if @matrix[i,j]!=0
269
+ model_likehood+= @matrix[i,j] * Math::log(model[i,j])
270
+ end
271
+ }
272
+ }
273
+
274
+ p model_likehood
275
+
276
+ -2*(no_r_likehood-model_likehood)
277
+
278
+ end
279
+ def expected
280
+ rt=[]
281
+ ct=[]
282
+ t=0
283
+ @matrix.row_size.times {|i|
284
+ @matrix.column_size.times {|j|
285
+ rt[i]=0 if rt[i].nil?
286
+ ct[j]=0 if ct[j].nil?
287
+ rt[i]+=@matrix[i,j]
288
+ ct[j]+=@matrix[i,j]
289
+ t+=@matrix[i,j]
290
+ }
291
+ }
292
+ m=[]
293
+ @matrix.row_size.times {|i|
294
+ row=[]
295
+ @matrix.column_size.times {|j|
296
+ row[j]=(rt[i]*ct[j]).quo(t)
297
+ }
298
+ m.push(row)
299
+ }
300
+
301
+ Matrix.rows(m)
302
+ end
303
+ # Compute polychoric using AS87.
304
+ # Doesn't work for now! I can't find the error :(
305
+
306
+ def compute_two_step_as87
307
+ @nn=@n-1
308
+ @mm=@m-1
309
+ @nn7=7*@nn
310
+ @mm7=7*@mm
311
+ @mn=@n*@m
312
+ @cont=[nil]
313
+ @n.times {|j|
314
+ @m.times {|i|
315
+ @cont.push(@matrix[i,j])
316
+ }
317
+ }
318
+
319
+ pcorl=0
320
+ cont=@cont
321
+ xmean=0.0
322
+ sum=0.0
323
+ row=[]
324
+ colmn=[]
325
+ (1..@m).each do |i|
326
+ row[i]=0.0
327
+ l=i
328
+ (1..@n).each do |j|
329
+ row[i]=row[i]+cont[l]
330
+ l+=@m
331
+ end
332
+ raise "Should not be empty rows" if(row[i]==0.0)
333
+ xmean=xmean+row[i]*i.to_f
334
+ sum+=row[i]
335
+ end
336
+ xmean=xmean/sum.to_f
337
+ ymean=0.0
338
+ (1..@n).each do |j|
339
+ colmn[j]=0.0
340
+ l=(j-1)*@m
341
+ (1..@m).each do |i|
342
+ l=l+1
343
+ colmn[j]=colmn[j]+cont[l] #12
344
+ end
345
+ raise "Should not be empty cols" if colmn[j]==0
346
+ ymean=ymean+colmn[j]*j.to_f
347
+ end
348
+ ymean=ymean/sum.to_f
349
+ covxy=0.0
350
+ (1..@m).each do |i|
351
+ l=i
352
+ (1..@n).each do |j|
353
+ conxy=covxy+cont[l]*(i.to_f-xmean)*(j.to_f-ymean)
354
+ l=l+@m
355
+ end
356
+ end
357
+
358
+ chisq=0.0
359
+ (1..@m).each do |i|
360
+ l=i
361
+ (1..@n).each do |j|
362
+ chisq=chisq+((cont[l]**2).quo(row[i]*colmn[j]))
363
+ l=l+@m
364
+ end
365
+ end
366
+
367
+ phisq=chisq-1.0-(@mm*@nn).to_f / sum.to_f
368
+ phisq=0 if(phisq<0.0)
369
+ # Compute cumulative sum of columns and rows
370
+ sumc=[]
371
+ sumr=[]
372
+ sumc[1]=colmn[1]
373
+ sumr[1]=row[1]
374
+ cum=0
375
+ (1..@nn).each do |i| # goto 17 r20
376
+ cum=cum+colmn[i]
377
+ sumc[i]=cum
378
+ end
379
+ cum=0
380
+ (1..@mm).each do |i|
381
+ cum=cum+row[i]
382
+ sumr[i]=cum
383
+ end
384
+ alpha=[]
385
+ beta=[]
386
+ # Compute points of polytomy
387
+ (1..@mm).each do |i| #do 21
388
+ alpha[i]=Distribution::Normal.p_value(sumr[i] / sum.to_f)
389
+ end # 21
390
+ (1..@nn).each do |i| #do 22
391
+ beta[i]=Distribution::Normal.p_value(sumc[i] / sum.to_f)
392
+ end # 21
393
+ @alpha=alpha[1,alpha.size] << nil
394
+ @beta=beta[1,beta.size] << nil
395
+ @sumr=sumr
396
+ @sumc=sumc
397
+ @total=sum
398
+
399
+ # Compute Fourier coefficients a and b. Verified
400
+ h=hermit(alpha,@mm)
401
+ hh=hermit(beta,@nn)
402
+ a=[]
403
+ b=[]
404
+ if @m!=2 # goto 24
405
+ mmm=@m-2
406
+ (1..mmm).each do |i| #do 23
407
+ a1=sum.quo(row[i+1] * sumr[i] * sumr[i+1])
408
+ a2=sumr[i] * xnorm(alpha[i+1])
409
+ a3=sumr[i+1] * xnorm(alpha[i])
410
+ l=i
411
+ (1..7).each do |j| #do 23
412
+ a[l]=Math::sqrt(a1.quo(j))*(h[l+1] * a2 - h[l] * a3)
413
+ l=l+@mm
414
+ end
415
+ end #23
416
+ end
417
+ # 24
418
+
419
+
420
+ if @n!=2 # goto 26
421
+ nnn=@n-2
422
+ (1..nnn).each do |i| #do 25
423
+ a1=sum.quo(colmn[i+1] * sumc[i] * sumc[i+1])
424
+ a2=sumc[i] * xnorm(beta[i+1])
425
+ a3=sumc[i+1] * xnorm(beta[i])
426
+ l=i
427
+ (1..7).each do |j| #do 25
428
+ b[l]=Math::sqrt(a1.quo(j))*(a2 * hh[l+1] - a3*hh[l])
429
+ l=l+@nn
430
+ end # 25
431
+ end # 25
432
+ end
433
+ #26 r20
434
+ l = @mm
435
+ a1 = -sum * xnorm(alpha[@mm])
436
+ a2 = row[@m] * sumr[@mm]
437
+ (1..7).each do |j| # do 27
438
+ a[l]=a1 * h[l].quo(Math::sqrt(j*a2))
439
+ l=l+@mm
440
+ end # 27
441
+
442
+ l = @nn
443
+ a1 = -sum * xnorm(beta[@nn])
444
+ a2 = colmn[@n] * sumc[@nn]
445
+
446
+ (1..7).each do |j| # do 28
447
+ b[l]=a1 * hh[l].quo(Math::sqrt(j*a2))
448
+ l = l + @nn
449
+ end # 28
450
+ rcof=[]
451
+ # compute coefficients rcof of polynomial of order 8
452
+ rcof[1]=-phisq
453
+ (2..9).each do |i| # do 30
454
+ rcof[i]=0.0
455
+ end #30
456
+ m1=@mm
457
+ (1..@mm).each do |i| # do 31
458
+ m1=m1+1
459
+ m2=m1+@mm
460
+ m3=m2+@mm
461
+ m4=m3+@mm
462
+ m5=m4+@mm
463
+ m6=m5+@mm
464
+ n1=@nn
465
+ (1..@nn).each do |j| # do 31
466
+ n1=n1+1
467
+ n2=n1+@nn
468
+ n3=n2+@nn
469
+ n4=n3+@nn
470
+ n5=n4+@nn
471
+ n6=n5+@nn
472
+
473
+ rcof[3] = rcof[3] + a[i]**2 * b[j]**2
474
+
475
+ rcof[4] = rcof[4] + 2.0 * a[i] * a[m1] * b[j] * b[n1]
476
+
477
+ rcof[5] = rcof[5] + a[m1]**2 * b[n1]**2 +
478
+ 2.0 * a[i] * a[m2] * b[j] * b[n2]
479
+
480
+ rcof[6] = rcof[6] + 2.0 * (a[i] * a[m3] * b[j] *
481
+ b[n3] + a[m1] * a[m2] * b[n1] * b[n2])
482
+
483
+ rcof[7] = rcof[7] + a[m2]**2 * b[n2]**2 +
484
+ 2.0 * (a[i] * a[m4] * b[j] * b[n4] + a[m1] * a[m3] *
485
+ b[n1] * b[n3])
486
+
487
+ rcof[8] = rcof[8] + 2.0 * (a[i] * a[m5] * b[j] * b[n5] +
488
+ a[m1] * a[m4] * b[n1] * b[n4] + a[m2] * a[m3] * b[n2] * b[n3])
489
+
490
+ rcof[9] = rcof[9] + a[m3]**2 * b[n3]**2 +
491
+ 2.0 * (a[i] * a[m6] * b[j] * b[n6] + a[m1] * a[m5] * b[n1] *
492
+ b[n5] + (a[m2] * a[m4] * b[n2] * b[n4]))
493
+ end # 31
494
+ end # 31
495
+
496
+ rcof=rcof[1,rcof.size]
497
+ poly = GSL::Poly.alloc(rcof)
498
+ roots=poly.solve
499
+ rootr=[nil]
500
+ rooti=[nil]
501
+ roots.each {|c|
502
+ rootr.push(c.real)
503
+ rooti.push(c.im)
504
+ }
505
+ @rootr=rootr
506
+ @rooti=rooti
507
+
508
+ norts=0
509
+ (1..7).each do |i| # do 43
510
+
511
+ next if rooti[i]!=0.0
512
+ if (covxy>=0.0)
513
+ next if(rootr[i]<0.0 or rootr[i]>1.0)
514
+ pcorl=rootr[i]
515
+ norts=norts+1
516
+ else
517
+ if (rootr[i]>=-1.0 and rootr[i]<0.0)
518
+ pcorl=rootr[i]
519
+ norts=norts+1
520
+ end
521
+ end
522
+ end # 43
523
+ raise "Error" if norts==0
524
+ @r=pcorl
525
+ end
526
+ #Computes vector h(mm7) of orthogonal hermite...
527
+ def hermit(s,k)
528
+ h=[]
529
+ (1..k).each do |i| # do 14
530
+ l=i
531
+ ll=i+k
532
+ lll=ll+k
533
+ h[i]=1.0
534
+ h[ll]=s[i]
535
+ v=1.0
536
+ (2..6).each do |j| #do 14
537
+ w=Math::sqrt(j)
538
+ h[lll]=(s[i]*h[ll] - v*h[l]).quo(w)
539
+ v=w
540
+ l=l+k
541
+ ll=ll+k
542
+ lll=lll+k
543
+ end
544
+ end
545
+ h
546
+ end
547
+ def xnorm(t)
548
+ Math::exp(-0.5 * t **2) * (1.0/Math::sqrt(2*Math::PI))
549
+ end
550
+
551
+ def summary
552
+ rp=ReportBuilder.new()
553
+ rp.add(self)
554
+ rp.to_text
555
+ end
556
+
557
+ def to_reportbuilder(generator)
558
+ compute if @r.nil?
559
+ section=ReportBuilder::Section.new(:name=>@name)
560
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>[""]+(@n.times.collect {|i| "Y=#{i}"})+["Total"])
561
+ @m.times do |i|
562
+ t.add_row(["X = #{i}"]+(@n.times.collect {|j| @matrix[i,j]}) + [@sumr[i]])
563
+ end
564
+ t.add_hr
565
+ t.add_row(["T"]+(@n.times.collect {|j| @sumc[j]})+[@total])
566
+ section.add(t)
567
+ #generator.parse_element(t)
568
+ section.add(sprintf("r: %0.4f",r))
569
+ t=ReportBuilder::Table.new(:name=>_("Thresholds"), :header=>["","Value"])
570
+ threshold_x.each_with_index {|val,i|
571
+ t.add_row(["Threshold X #{i}", sprintf("%0.4f", val)])
572
+ }
573
+ threshold_y.each_with_index {|val,i|
574
+ t.add_row(["Threshold Y #{i}", sprintf("%0.4f", val)])
575
+ }
576
+ section.add(t)
577
+ generator.parse_element(section)
578
+ end
579
+ end
580
+ end
581
+ end