statsample-bivariate-extension 0.13.3 → 0.13.4

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.13.4 / 2010-08-18
2
+
3
+ * Added cache for tetrachoric correlation matrix
4
+ * Extra documentation. Renamed internal two step estimation methods on Polychoric
5
+
1
6
  === 0.13.3 / 2010-06-22
2
7
 
3
8
  * Bug fix on Processor.fd_loglike_cell_rho.
data/README.txt CHANGED
@@ -11,7 +11,7 @@ Provides advanced bivariate statistics:
11
11
  == FEATURES/PROBLEMS:
12
12
 
13
13
  * Statsample::Bivariate::Polychoric class provides polychoric correlation
14
- * Statsample::Bivariate::Tetracoric class provides tetrachoric correlation
14
+ * Statsample::Bivariate::Tetrachoric class provides tetrachoric correlation
15
15
 
16
16
 
17
17
  == SYNOPSIS:
@@ -1,6 +1,7 @@
1
1
  module Statsample
2
2
  module Bivariate
3
- EXTENSION_VERSION="0.13.3"
3
+ # Version of bivariate extension
4
+ EXTENSION_VERSION="0.13.4"
4
5
  end
5
6
  end
6
7
 
@@ -42,14 +42,29 @@ module Statsample
42
42
  # the two undelying continuous variables (Drasgow, 2006)
43
43
  #
44
44
  # According to Drasgow(2006), there are tree methods to estimate
45
- # the polychoric correlation:
45
+ # the polychoric correlation: ML Joint estimation, ML two-step estimation
46
+ # and polycoric series estimate. You can select
47
+ # the estimation method with <tt>method</tt> attribute.
48
+ #
49
+ # == ML Joint Estimation
50
+ # Requires gsl library and <tt>gsl</tt> gem.
51
+ # Joint estimation uses derivative based algorithm by default, based
52
+ # on Ollson(1979).
53
+ # There is available a derivative free algorithm available
54
+ # compute_one_step_mle_without_derivatives() , based loosely
55
+ # on J.Fox R package 'polycor' algorithm.
56
+ #
57
+ # == Two-step Estimation
46
58
  #
47
- # 1. Maximum Likehood Estimator
48
- # 2. Two-step estimator
49
- # 3. Polychoric series estimate.
59
+ # Default method. Uses a no-derivative aproach, based on J.Fox
60
+ # R package 'polycor'.
50
61
  #
51
- # By default, two-step estimation are used. You can select
52
- # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
62
+ # == Polychoric series estimate.
63
+ # <b>Warning</b>: Result diverge a lot from Joint and two-step
64
+ # calculation.
65
+ #
66
+ # Requires gsl library and <tt>gsl</tt> gem.
67
+ # Based on Martinson and Hamdam(1975) algorithm.
53
68
  #
54
69
  # == Use
55
70
  #
@@ -90,19 +105,22 @@ module Statsample
90
105
  # See http://rb-gsl.rubyforge.org/min.html for reference.
91
106
  attr_accessor :minimizer_type_two_step
92
107
 
93
- # Minimizer type for joint estimate. Default "nmsimplex"
108
+ # Minimizer type for joint estimate, no derivative. Default "nmsimplex".
94
109
  # See http://rb-gsl.rubyforge.org/min.html for reference.
95
- attr_accessor :minimizer_type_joint
96
-
110
+ attr_accessor :minimizer_type_joint_no_derivative
111
+
112
+ # Minimizer type for joint estimate, using derivative. Default "conjugate_pr".
113
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
114
+ attr_accessor :minimizer_type_joint_derivative
97
115
 
98
116
  # Method of calculation of polychoric series.
99
117
  # <tt>:two_step</tt> used by default.
100
118
  #
101
- # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
119
+ # :two_step:: two-step ML, based on code by J.Fox
102
120
  # :polychoric_series:: polychoric series estimate, using
103
121
  # algorithm AS87 by Martinson and Hamdan (1975).
104
- # :joint:: one-step ML, based on R package 'polycor'
105
- # by J.Fox.
122
+ # :joint:: one-step ML, usign derivatives by Olsson (1979)
123
+ #
106
124
  attr_accessor :method
107
125
  # Absolute error for iteration.
108
126
  attr_accessor :epsilon
@@ -113,21 +131,39 @@ module Statsample
113
131
  # Log of algorithm
114
132
  attr_reader :log
115
133
 
116
-
134
+ # Model ll
117
135
  attr_reader :loglike_model
118
136
 
137
+ # Returns the polychoric correlation
138
+ attr_reader :r
139
+ # Returns the rows thresholds
140
+ attr_reader :alpha
141
+ # Returns the columns thresholds
142
+ attr_reader :beta
143
+
144
+ dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint_no_derivative, :minimizer_type_joint_derivative, :method
145
+ dirty_memoize :r, :alpha, :beta
146
+ # Default method
119
147
  METHOD=:two_step
148
+ # Max number of iteratios
120
149
  MAX_ITERATIONS=300
150
+ # Epsilon
121
151
  EPSILON=1e-6
152
+ # GSL unidimensional minimizer
122
153
  MINIMIZER_TYPE_TWO_STEP="brent"
123
- MINIMIZER_TYPE_JOINT="nmsimplex"
154
+ # GSL multidimensional minimizer, derivative based
155
+ MINIMIZER_TYPE_JOINT_DERIVATIVE="conjugate_pr"
156
+ # GSL multidimensional minimizer, non derivative based
157
+ MINIMIZER_TYPE_JOINT_NO_DERIVATIVE="nmsimplex"
158
+
159
+ # Create a Polychoric object, based on two vectors
124
160
  def self.new_with_vectors(v1,v2)
125
161
  Polychoric.new(Crosstab.new(v1,v2).to_matrix)
126
162
  end
127
163
  # Params:
128
- # * matrix: Contingence table
129
- # * opts: Any attribute
130
-
164
+ # * <tt>matrix</tt>: Contingence table
165
+ # * <tt>opts</tt>: Hash with options. Could be any
166
+ # accessable attribute of object
131
167
  def initialize(matrix, opts=Hash.new)
132
168
  @matrix=matrix
133
169
  @n=matrix.column_size
@@ -140,7 +176,9 @@ module Statsample
140
176
  @max_iterations=MAX_ITERATIONS
141
177
  @epsilon=EPSILON
142
178
  @minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
143
- @minimizer_type_joint=MINIMIZER_TYPE_JOINT
179
+ @minimizer_type_joint_no_derivative=MINIMIZER_TYPE_JOINT_NO_DERIVATIVE
180
+ @minimizer_type_joint_derivative=MINIMIZER_TYPE_JOINT_DERIVATIVE
181
+
144
182
  @debug=false
145
183
  @iteration=nil
146
184
  opts.each{|k,v|
@@ -150,25 +188,17 @@ module Statsample
150
188
  @pd=nil
151
189
  compute_basic_parameters
152
190
  end
153
- # Returns the polychoric correlation
154
- attr_reader :r
155
- # Returns the rows thresholds
156
- attr_reader :alpha
157
- # Returns the columns thresholds
158
- attr_reader :beta
159
-
160
- dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
161
- dirty_memoize :r, :alpha, :beta
191
+
162
192
 
163
193
  alias :threshold_x :alpha
164
194
  alias :threshold_y :beta
165
195
 
166
196
 
167
197
  # Start the computation of polychoric correlation
168
- # based on attribute method
198
+ # based on attribute <tt>method</tt>.
169
199
  def compute
170
200
  if @method==:two_step
171
- compute_two_step_mle_drasgow
201
+ compute_two_step_mle
172
202
  elsif @method==:joint
173
203
  compute_one_step_mle
174
204
  elsif @method==:polychoric_series
@@ -177,6 +207,8 @@ module Statsample
177
207
  raise "Not implemented"
178
208
  end
179
209
  end
210
+
211
+ # :section: LL methods
180
212
  # Retrieve log likehood for actual data.
181
213
  def loglike_data
182
214
  loglike=0
@@ -238,6 +270,7 @@ module Statsample
238
270
  end
239
271
  end
240
272
 
273
+ # :section: Estimation methods
241
274
 
242
275
  # Computation of polychoric correlation usign two-step ML estimation.
243
276
  #
@@ -249,23 +282,24 @@ module Statsample
249
282
  # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
250
283
  # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
251
284
  #
252
- def compute_two_step_mle_drasgow
285
+ def compute_two_step_mle
253
286
  if Statsample.has_gsl?
254
- compute_two_step_mle_drasgow_gsl
287
+ compute_two_step_mle_gsl
255
288
  else
256
- compute_two_step_mle_drasgow_ruby
289
+ compute_two_step_mle_ruby
257
290
  end
258
291
  end
259
292
 
260
- # Depends on minimization algorithm.
293
+ # Compute two step ML estimation using only ruby.
261
294
 
262
- def compute_two_step_mle_drasgow_ruby #:nodoc:
295
+ def compute_two_step_mle_ruby #:nodoc:
263
296
 
264
297
  f=proc {|rho|
265
298
  pr=Processor.new(@alpha,@beta, rho, @matrix)
266
299
  pr.loglike
267
300
  }
268
- @log=_("Minimizing using GSL Brent method\n")
301
+
302
+ @log=_("Two step minimization using GSL Brent method (pure ruby)\n")
269
303
  min=Minimization::Brent.new(-0.9999,0.9999,f)
270
304
  min.epsilon=@epsilon
271
305
  min.expected=0
@@ -277,8 +311,9 @@ module Statsample
277
311
 
278
312
  end
279
313
 
314
+ # Compute two step ML estimation using gsl.
280
315
 
281
- def compute_two_step_mle_drasgow_gsl #:nodoc:
316
+ def compute_two_step_mle_gsl
282
317
 
283
318
  fn1=GSL::Function.alloc {|rho|
284
319
  pr=Processor.new(@alpha,@beta, rho, @matrix)
@@ -321,7 +356,7 @@ module Statsample
321
356
  end
322
357
 
323
358
 
324
- def compute_derivatives_vector(v,df)
359
+ def compute_derivatives_vector(v,df) # :nodoc:
325
360
  new_rho=v[0]
326
361
  new_alpha=v[1, @nr-1]
327
362
  new_beta=v[@nr, @nc-1]
@@ -339,15 +374,19 @@ module Statsample
339
374
  df[offset+i]=-pr.fd_loglike_b(i)
340
375
  }
341
376
  end
342
-
377
+ # Compute joint ML estimation.
378
+ # Uses compute_one_step_mle_with_derivatives() by default.
343
379
  def compute_one_step_mle
344
380
  compute_one_step_mle_with_derivatives
345
381
  end
346
382
 
347
-
383
+ # Compute Polychoric correlation with joint estimate, usign
384
+ # derivative based minimization method.
385
+ #
386
+ # Much faster than method without derivatives.
348
387
  def compute_one_step_mle_with_derivatives
349
388
  # Get initial values with two-step aproach
350
- compute_two_step_mle_drasgow
389
+ compute_two_step_mle
351
390
  # Start iteration with past values
352
391
  rho=@r
353
392
  cut_alpha=@alpha
@@ -373,7 +412,7 @@ module Statsample
373
412
  my_func.set_params(parameters) # parameters
374
413
 
375
414
  x = GSL::Vector.alloc(parameters.dup)
376
- minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
415
+ minimizer = GSL::MultiMin::FdfMinimizer.alloc(minimizer_type_joint_derivative,np)
377
416
  minimizer.set(my_func, x, 1, 1e-3)
378
417
 
379
418
  iter = 0
@@ -409,14 +448,16 @@ module Statsample
409
448
 
410
449
  end
411
450
 
412
- # Compute Polychoric correlation with joint estimate.
451
+ # Compute Polychoric correlation with joint estimate, usign
452
+ # derivative-less minimization method.
453
+ #
413
454
  # Rho and thresholds are estimated at same time.
414
455
  # Code based on R package "polycor", by J.Fox.
415
456
  #
416
457
 
417
458
  def compute_one_step_mle_without_derivatives
418
459
  # Get initial values with two-step aproach
419
- compute_two_step_mle_drasgow
460
+ compute_two_step_mle
420
461
  # Start iteration with past values
421
462
  rho=@r
422
463
  cut_alpha=@alpha
@@ -452,7 +493,7 @@ module Statsample
452
493
  ss = GSL::Vector.alloc(np)
453
494
  ss.set_all(1.0)
454
495
 
455
- minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
496
+ minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint_no_derivative,np)
456
497
  minimizer.set(my_func, x, ss)
457
498
 
458
499
  iter = 0
@@ -9,17 +9,28 @@ module Statsample
9
9
  # Tetrachoric correlation matrix.
10
10
  # Order of rows and columns depends on Dataset#fields order
11
11
  def self.tetrachoric_correlation_matrix(ds)
12
- ds.collect_matrix do |row,col|
12
+ cache={}
13
+ matrix=ds.collect_matrix do |row,col|
13
14
  if row==col
14
15
  1.0
15
16
  else
16
17
  begin
17
- tetrachoric(ds[row],ds[col])
18
+ if cache[[col,row]].nil?
19
+ r=tetrachoric(ds[row],ds[col])
20
+ cache[[row,col]]=r
21
+ r
22
+ else
23
+ cache[[col,row]]
24
+ end
18
25
  rescue RuntimeError
19
26
  nil
20
27
  end
21
28
  end
22
29
  end
30
+
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
23
34
  end
24
35
  # Compute tetrachoric correlation.
25
36
  #
@@ -18,7 +18,7 @@ describe Statsample::Bivariate::Polychoric::Processor do
18
18
 
19
19
  end
20
20
  it "should return informacion matrix" do
21
- p @processor.information_matrix.inverse
21
+ @processor.information_matrix.inverse.should be_instance_of(::Matrix)
22
22
  end
23
23
  it "fd a loglike should be equal usign eq.6 and eq.13" do
24
24
  2.times {|k|
@@ -50,13 +50,13 @@ describe Statsample::Bivariate::Polychoric do
50
50
  @poly.threshold_x[1].should be_close(1.137 ,0.001)
51
51
  end
52
52
  it "should compute two step mle with ruby" do
53
- @poly.compute_two_step_mle_drasgow_ruby
53
+ @poly.compute_two_step_mle_ruby
54
54
  check_two_step
55
55
  end
56
56
 
57
57
  it "compute two-step with gsl" do
58
58
  pending("requires rb-gsl") unless Statsample.has_gsl?
59
- @poly.compute_two_step_mle_drasgow_gsl
59
+ @poly.compute_two_step_mle_gsl
60
60
  check_two_step
61
61
  end
62
62
 
@@ -26,12 +26,12 @@ describe Statsample::Bivariate::Tetrachoric do
26
26
  @poly = Statsample::Bivariate::Polychoric.new(@matrix)
27
27
  end
28
28
  it "should return similar values for two step ruby" do
29
- @poly.compute_two_step_mle_drasgow_ruby
29
+ @poly.compute_two_step_mle_ruby
30
30
  @tetra.r.should be_close(@poly.r,0.0001)
31
31
  end
32
32
  if Statsample.has_gsl?
33
33
  it "should return similar values for two step using gsl" do
34
- @poly.compute_two_step_mle_drasgow_gsl
34
+ @poly.compute_two_step_mle_gsl
35
35
  @tetra.r.should be_close(@poly.r,0.0001)
36
36
  end
37
37
  else
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 13
8
- - 3
9
- version: 0.13.3
8
+ - 4
9
+ version: 0.13.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
35
35
  rpP0jjs0
36
36
  -----END CERTIFICATE-----
37
37
 
38
- date: 2010-06-22 00:00:00 -04:00
38
+ date: 2010-08-18 00:00:00 -04:00
39
39
  default_executable:
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
metadata.gz.sig CHANGED
Binary file