statsample-bivariate-extension 0.13.3 → 0.13.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.13.4 / 2010-08-18
2
+
3
+ * Added cache for tetrachoric correlation matrix
4
+ * Extra documentation. Renamed internal two step estimation methods on Polychoric
5
+
1
6
  === 0.13.3 / 2010-06-22
2
7
 
3
8
  * Bug fix on Processor.fd_loglike_cell_rho.
data/README.txt CHANGED
@@ -11,7 +11,7 @@ Provides advanced bivariate statistics:
11
11
  == FEATURES/PROBLEMS:
12
12
 
13
13
  * Statsample::Bivariate::Polychoric class provides polychoric correlation
14
- * Statsample::Bivariate::Tetracoric class provides tetrachoric correlation
14
+ * Statsample::Bivariate::Tetrachoric class provides tetrachoric correlation
15
15
 
16
16
 
17
17
  == SYNOPSIS:
@@ -1,6 +1,7 @@
1
1
  module Statsample
2
2
  module Bivariate
3
- EXTENSION_VERSION="0.13.3"
3
+ # Version of bivariate extension
4
+ EXTENSION_VERSION="0.13.4"
4
5
  end
5
6
  end
6
7
 
@@ -42,14 +42,29 @@ module Statsample
42
42
  # the two undelying continuous variables (Drasgow, 2006)
43
43
  #
44
44
  # According to Drasgow(2006), there are tree methods to estimate
45
- # the polychoric correlation:
45
+ # the polychoric correlation: ML Joint estimation, ML two-step estimation
46
+ # and polycoric series estimate. You can select
47
+ # the estimation method with <tt>method</tt> attribute.
48
+ #
49
+ # == ML Joint Estimation
50
+ # Requires gsl library and <tt>gsl</tt> gem.
51
+ # Joint estimation uses derivative based algorithm by default, based
52
+ # on Ollson(1979).
53
+ # There is available a derivative free algorithm available
54
+ # compute_one_step_mle_without_derivatives() , based loosely
55
+ # on J.Fox R package 'polycor' algorithm.
56
+ #
57
+ # == Two-step Estimation
46
58
  #
47
- # 1. Maximum Likehood Estimator
48
- # 2. Two-step estimator
49
- # 3. Polychoric series estimate.
59
+ # Default method. Uses a no-derivative aproach, based on J.Fox
60
+ # R package 'polycor'.
50
61
  #
51
- # By default, two-step estimation are used. You can select
52
- # the estimation method with method attribute. Joint estimate and polychoric series requires gsl library and rb-gsl. Joint estimate uses Olsson(1979) derivatives and two-step uses a derivative free method.
62
+ # == Polychoric series estimate.
63
+ # <b>Warning</b>: Result diverge a lot from Joint and two-step
64
+ # calculation.
65
+ #
66
+ # Requires gsl library and <tt>gsl</tt> gem.
67
+ # Based on Martinson and Hamdam(1975) algorithm.
53
68
  #
54
69
  # == Use
55
70
  #
@@ -90,19 +105,22 @@ module Statsample
90
105
  # See http://rb-gsl.rubyforge.org/min.html for reference.
91
106
  attr_accessor :minimizer_type_two_step
92
107
 
93
- # Minimizer type for joint estimate. Default "nmsimplex"
108
+ # Minimizer type for joint estimate, no derivative. Default "nmsimplex".
94
109
  # See http://rb-gsl.rubyforge.org/min.html for reference.
95
- attr_accessor :minimizer_type_joint
96
-
110
+ attr_accessor :minimizer_type_joint_no_derivative
111
+
112
+ # Minimizer type for joint estimate, using derivative. Default "conjugate_pr".
113
+ # See http://rb-gsl.rubyforge.org/min.html for reference.
114
+ attr_accessor :minimizer_type_joint_derivative
97
115
 
98
116
  # Method of calculation of polychoric series.
99
117
  # <tt>:two_step</tt> used by default.
100
118
  #
101
- # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
119
+ # :two_step:: two-step ML, based on code by J.Fox
102
120
  # :polychoric_series:: polychoric series estimate, using
103
121
  # algorithm AS87 by Martinson and Hamdan (1975).
104
- # :joint:: one-step ML, based on R package 'polycor'
105
- # by J.Fox.
122
+ # :joint:: one-step ML, usign derivatives by Olsson (1979)
123
+ #
106
124
  attr_accessor :method
107
125
  # Absolute error for iteration.
108
126
  attr_accessor :epsilon
@@ -113,21 +131,39 @@ module Statsample
113
131
  # Log of algorithm
114
132
  attr_reader :log
115
133
 
116
-
134
+ # Model ll
117
135
  attr_reader :loglike_model
118
136
 
137
+ # Returns the polychoric correlation
138
+ attr_reader :r
139
+ # Returns the rows thresholds
140
+ attr_reader :alpha
141
+ # Returns the columns thresholds
142
+ attr_reader :beta
143
+
144
+ dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint_no_derivative, :minimizer_type_joint_derivative, :method
145
+ dirty_memoize :r, :alpha, :beta
146
+ # Default method
119
147
  METHOD=:two_step
148
+ # Max number of iteratios
120
149
  MAX_ITERATIONS=300
150
+ # Epsilon
121
151
  EPSILON=1e-6
152
+ # GSL unidimensional minimizer
122
153
  MINIMIZER_TYPE_TWO_STEP="brent"
123
- MINIMIZER_TYPE_JOINT="nmsimplex"
154
+ # GSL multidimensional minimizer, derivative based
155
+ MINIMIZER_TYPE_JOINT_DERIVATIVE="conjugate_pr"
156
+ # GSL multidimensional minimizer, non derivative based
157
+ MINIMIZER_TYPE_JOINT_NO_DERIVATIVE="nmsimplex"
158
+
159
+ # Create a Polychoric object, based on two vectors
124
160
  def self.new_with_vectors(v1,v2)
125
161
  Polychoric.new(Crosstab.new(v1,v2).to_matrix)
126
162
  end
127
163
  # Params:
128
- # * matrix: Contingence table
129
- # * opts: Any attribute
130
-
164
+ # * <tt>matrix</tt>: Contingence table
165
+ # * <tt>opts</tt>: Hash with options. Could be any
166
+ # accessable attribute of object
131
167
  def initialize(matrix, opts=Hash.new)
132
168
  @matrix=matrix
133
169
  @n=matrix.column_size
@@ -140,7 +176,9 @@ module Statsample
140
176
  @max_iterations=MAX_ITERATIONS
141
177
  @epsilon=EPSILON
142
178
  @minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
143
- @minimizer_type_joint=MINIMIZER_TYPE_JOINT
179
+ @minimizer_type_joint_no_derivative=MINIMIZER_TYPE_JOINT_NO_DERIVATIVE
180
+ @minimizer_type_joint_derivative=MINIMIZER_TYPE_JOINT_DERIVATIVE
181
+
144
182
  @debug=false
145
183
  @iteration=nil
146
184
  opts.each{|k,v|
@@ -150,25 +188,17 @@ module Statsample
150
188
  @pd=nil
151
189
  compute_basic_parameters
152
190
  end
153
- # Returns the polychoric correlation
154
- attr_reader :r
155
- # Returns the rows thresholds
156
- attr_reader :alpha
157
- # Returns the columns thresholds
158
- attr_reader :beta
159
-
160
- dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
161
- dirty_memoize :r, :alpha, :beta
191
+
162
192
 
163
193
  alias :threshold_x :alpha
164
194
  alias :threshold_y :beta
165
195
 
166
196
 
167
197
  # Start the computation of polychoric correlation
168
- # based on attribute method
198
+ # based on attribute <tt>method</tt>.
169
199
  def compute
170
200
  if @method==:two_step
171
- compute_two_step_mle_drasgow
201
+ compute_two_step_mle
172
202
  elsif @method==:joint
173
203
  compute_one_step_mle
174
204
  elsif @method==:polychoric_series
@@ -177,6 +207,8 @@ module Statsample
177
207
  raise "Not implemented"
178
208
  end
179
209
  end
210
+
211
+ # :section: LL methods
180
212
  # Retrieve log likehood for actual data.
181
213
  def loglike_data
182
214
  loglike=0
@@ -238,6 +270,7 @@ module Statsample
238
270
  end
239
271
  end
240
272
 
273
+ # :section: Estimation methods
241
274
 
242
275
  # Computation of polychoric correlation usign two-step ML estimation.
243
276
  #
@@ -249,23 +282,24 @@ module Statsample
249
282
  # * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
250
283
  # * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
251
284
  #
252
- def compute_two_step_mle_drasgow
285
+ def compute_two_step_mle
253
286
  if Statsample.has_gsl?
254
- compute_two_step_mle_drasgow_gsl
287
+ compute_two_step_mle_gsl
255
288
  else
256
- compute_two_step_mle_drasgow_ruby
289
+ compute_two_step_mle_ruby
257
290
  end
258
291
  end
259
292
 
260
- # Depends on minimization algorithm.
293
+ # Compute two step ML estimation using only ruby.
261
294
 
262
- def compute_two_step_mle_drasgow_ruby #:nodoc:
295
+ def compute_two_step_mle_ruby #:nodoc:
263
296
 
264
297
  f=proc {|rho|
265
298
  pr=Processor.new(@alpha,@beta, rho, @matrix)
266
299
  pr.loglike
267
300
  }
268
- @log=_("Minimizing using GSL Brent method\n")
301
+
302
+ @log=_("Two step minimization using GSL Brent method (pure ruby)\n")
269
303
  min=Minimization::Brent.new(-0.9999,0.9999,f)
270
304
  min.epsilon=@epsilon
271
305
  min.expected=0
@@ -277,8 +311,9 @@ module Statsample
277
311
 
278
312
  end
279
313
 
314
+ # Compute two step ML estimation using gsl.
280
315
 
281
- def compute_two_step_mle_drasgow_gsl #:nodoc:
316
+ def compute_two_step_mle_gsl
282
317
 
283
318
  fn1=GSL::Function.alloc {|rho|
284
319
  pr=Processor.new(@alpha,@beta, rho, @matrix)
@@ -321,7 +356,7 @@ module Statsample
321
356
  end
322
357
 
323
358
 
324
- def compute_derivatives_vector(v,df)
359
+ def compute_derivatives_vector(v,df) # :nodoc:
325
360
  new_rho=v[0]
326
361
  new_alpha=v[1, @nr-1]
327
362
  new_beta=v[@nr, @nc-1]
@@ -339,15 +374,19 @@ module Statsample
339
374
  df[offset+i]=-pr.fd_loglike_b(i)
340
375
  }
341
376
  end
342
-
377
+ # Compute joint ML estimation.
378
+ # Uses compute_one_step_mle_with_derivatives() by default.
343
379
  def compute_one_step_mle
344
380
  compute_one_step_mle_with_derivatives
345
381
  end
346
382
 
347
-
383
+ # Compute Polychoric correlation with joint estimate, usign
384
+ # derivative based minimization method.
385
+ #
386
+ # Much faster than method without derivatives.
348
387
  def compute_one_step_mle_with_derivatives
349
388
  # Get initial values with two-step aproach
350
- compute_two_step_mle_drasgow
389
+ compute_two_step_mle
351
390
  # Start iteration with past values
352
391
  rho=@r
353
392
  cut_alpha=@alpha
@@ -373,7 +412,7 @@ module Statsample
373
412
  my_func.set_params(parameters) # parameters
374
413
 
375
414
  x = GSL::Vector.alloc(parameters.dup)
376
- minimizer = GSL::MultiMin::FdfMinimizer.alloc('conjugate_pr',np)
415
+ minimizer = GSL::MultiMin::FdfMinimizer.alloc(minimizer_type_joint_derivative,np)
377
416
  minimizer.set(my_func, x, 1, 1e-3)
378
417
 
379
418
  iter = 0
@@ -409,14 +448,16 @@ module Statsample
409
448
 
410
449
  end
411
450
 
412
- # Compute Polychoric correlation with joint estimate.
451
+ # Compute Polychoric correlation with joint estimate, usign
452
+ # derivative-less minimization method.
453
+ #
413
454
  # Rho and thresholds are estimated at same time.
414
455
  # Code based on R package "polycor", by J.Fox.
415
456
  #
416
457
 
417
458
  def compute_one_step_mle_without_derivatives
418
459
  # Get initial values with two-step aproach
419
- compute_two_step_mle_drasgow
460
+ compute_two_step_mle
420
461
  # Start iteration with past values
421
462
  rho=@r
422
463
  cut_alpha=@alpha
@@ -452,7 +493,7 @@ module Statsample
452
493
  ss = GSL::Vector.alloc(np)
453
494
  ss.set_all(1.0)
454
495
 
455
- minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint,np)
496
+ minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint_no_derivative,np)
456
497
  minimizer.set(my_func, x, ss)
457
498
 
458
499
  iter = 0
@@ -9,17 +9,28 @@ module Statsample
9
9
  # Tetrachoric correlation matrix.
10
10
  # Order of rows and columns depends on Dataset#fields order
11
11
  def self.tetrachoric_correlation_matrix(ds)
12
- ds.collect_matrix do |row,col|
12
+ cache={}
13
+ matrix=ds.collect_matrix do |row,col|
13
14
  if row==col
14
15
  1.0
15
16
  else
16
17
  begin
17
- tetrachoric(ds[row],ds[col])
18
+ if cache[[col,row]].nil?
19
+ r=tetrachoric(ds[row],ds[col])
20
+ cache[[row,col]]=r
21
+ r
22
+ else
23
+ cache[[col,row]]
24
+ end
18
25
  rescue RuntimeError
19
26
  nil
20
27
  end
21
28
  end
22
29
  end
30
+
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
23
34
  end
24
35
  # Compute tetrachoric correlation.
25
36
  #
@@ -18,7 +18,7 @@ describe Statsample::Bivariate::Polychoric::Processor do
18
18
 
19
19
  end
20
20
  it "should return informacion matrix" do
21
- p @processor.information_matrix.inverse
21
+ @processor.information_matrix.inverse.should be_instance_of(::Matrix)
22
22
  end
23
23
  it "fd a loglike should be equal usign eq.6 and eq.13" do
24
24
  2.times {|k|
@@ -50,13 +50,13 @@ describe Statsample::Bivariate::Polychoric do
50
50
  @poly.threshold_x[1].should be_close(1.137 ,0.001)
51
51
  end
52
52
  it "should compute two step mle with ruby" do
53
- @poly.compute_two_step_mle_drasgow_ruby
53
+ @poly.compute_two_step_mle_ruby
54
54
  check_two_step
55
55
  end
56
56
 
57
57
  it "compute two-step with gsl" do
58
58
  pending("requires rb-gsl") unless Statsample.has_gsl?
59
- @poly.compute_two_step_mle_drasgow_gsl
59
+ @poly.compute_two_step_mle_gsl
60
60
  check_two_step
61
61
  end
62
62
 
@@ -26,12 +26,12 @@ describe Statsample::Bivariate::Tetrachoric do
26
26
  @poly = Statsample::Bivariate::Polychoric.new(@matrix)
27
27
  end
28
28
  it "should return similar values for two step ruby" do
29
- @poly.compute_two_step_mle_drasgow_ruby
29
+ @poly.compute_two_step_mle_ruby
30
30
  @tetra.r.should be_close(@poly.r,0.0001)
31
31
  end
32
32
  if Statsample.has_gsl?
33
33
  it "should return similar values for two step using gsl" do
34
- @poly.compute_two_step_mle_drasgow_gsl
34
+ @poly.compute_two_step_mle_gsl
35
35
  @tetra.r.should be_close(@poly.r,0.0001)
36
36
  end
37
37
  else
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 13
8
- - 3
9
- version: 0.13.3
8
+ - 4
9
+ version: 0.13.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Claudio Bustos
@@ -35,7 +35,7 @@ cert_chain:
35
35
  rpP0jjs0
36
36
  -----END CERTIFICATE-----
37
37
 
38
- date: 2010-06-22 00:00:00 -04:00
38
+ date: 2010-08-18 00:00:00 -04:00
39
39
  default_executable:
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
metadata.gz.sig CHANGED
Binary file