statsample-bivariate-extension 0.13.3 → 0.13.4
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +5 -0
- data/README.txt +1 -1
- data/lib/statsample/bivariate/extension_version.rb +2 -1
- data/lib/statsample/bivariate/polychoric.rb +85 -44
- data/lib/statsample/bivariate/tetrachoric.rb +13 -2
- data/spec/statsample/bivariate/polychoric_processor_spec.rb +1 -1
- data/spec/statsample/bivariate/polychoric_spec.rb +2 -2
- data/spec/statsample/bivariate/tetrachoric_spec.rb +2 -2
- metadata +3 -3
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -11,7 +11,7 @@ Provides advanced bivariate statistics:
|
|
11
11
|
== FEATURES/PROBLEMS:
|
12
12
|
|
13
13
|
* Statsample::Bivariate::Polychoric class provides polychoric correlation
|
14
|
-
* Statsample::Bivariate::
|
14
|
+
* Statsample::Bivariate::Tetrachoric class provides tetrachoric correlation
|
15
15
|
|
16
16
|
|
17
17
|
== SYNOPSIS:
|
@@ -42,14 +42,29 @@ module Statsample
|
|
42
42
|
# the two undelying continuous variables (Drasgow, 2006)
|
43
43
|
#
|
44
44
|
# According to Drasgow(2006), there are tree methods to estimate
|
45
|
-
# the polychoric correlation:
|
45
|
+
# the polychoric correlation: ML Joint estimation, ML two-step estimation
|
46
|
+
# and polycoric series estimate. You can select
|
47
|
+
# the estimation method with <tt>method</tt> attribute.
|
48
|
+
#
|
49
|
+
# == ML Joint Estimation
|
50
|
+
# Requires gsl library and <tt>gsl</tt> gem.
|
51
|
+
# Joint estimation uses derivative based algorithm by default, based
|
52
|
+
# on Ollson(1979).
|
53
|
+
# There is available a derivative free algorithm available
|
54
|
+
# compute_one_step_mle_without_derivatives() , based loosely
|
55
|
+
# on J.Fox R package 'polycor' algorithm.
|
56
|
+
#
|
57
|
+
# == Two-step Estimation
|
46
58
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# 3. Polychoric series estimate.
|
59
|
+
# Default method. Uses a no-derivative aproach, based on J.Fox
|
60
|
+
# R package 'polycor'.
|
50
61
|
#
|
51
|
-
#
|
52
|
-
#
|
62
|
+
# == Polychoric series estimate.
|
63
|
+
# <b>Warning</b>: Result diverge a lot from Joint and two-step
|
64
|
+
# calculation.
|
65
|
+
#
|
66
|
+
# Requires gsl library and <tt>gsl</tt> gem.
|
67
|
+
# Based on Martinson and Hamdam(1975) algorithm.
|
53
68
|
#
|
54
69
|
# == Use
|
55
70
|
#
|
@@ -90,19 +105,22 @@ module Statsample
|
|
90
105
|
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
91
106
|
attr_accessor :minimizer_type_two_step
|
92
107
|
|
93
|
-
# Minimizer type for joint estimate. Default "nmsimplex"
|
108
|
+
# Minimizer type for joint estimate, no derivative. Default "nmsimplex".
|
94
109
|
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
95
|
-
attr_accessor :
|
96
|
-
|
110
|
+
attr_accessor :minimizer_type_joint_no_derivative
|
111
|
+
|
112
|
+
# Minimizer type for joint estimate, using derivative. Default "conjugate_pr".
|
113
|
+
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
114
|
+
attr_accessor :minimizer_type_joint_derivative
|
97
115
|
|
98
116
|
# Method of calculation of polychoric series.
|
99
117
|
# <tt>:two_step</tt> used by default.
|
100
118
|
#
|
101
|
-
# :two_step:: two-step ML, based on code by
|
119
|
+
# :two_step:: two-step ML, based on code by J.Fox
|
102
120
|
# :polychoric_series:: polychoric series estimate, using
|
103
121
|
# algorithm AS87 by Martinson and Hamdan (1975).
|
104
|
-
# :joint:: one-step ML,
|
105
|
-
#
|
122
|
+
# :joint:: one-step ML, usign derivatives by Olsson (1979)
|
123
|
+
#
|
106
124
|
attr_accessor :method
|
107
125
|
# Absolute error for iteration.
|
108
126
|
attr_accessor :epsilon
|
@@ -113,21 +131,39 @@ module Statsample
|
|
113
131
|
# Log of algorithm
|
114
132
|
attr_reader :log
|
115
133
|
|
116
|
-
|
134
|
+
# Model ll
|
117
135
|
attr_reader :loglike_model
|
118
136
|
|
137
|
+
# Returns the polychoric correlation
|
138
|
+
attr_reader :r
|
139
|
+
# Returns the rows thresholds
|
140
|
+
attr_reader :alpha
|
141
|
+
# Returns the columns thresholds
|
142
|
+
attr_reader :beta
|
143
|
+
|
144
|
+
dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint_no_derivative, :minimizer_type_joint_derivative, :method
|
145
|
+
dirty_memoize :r, :alpha, :beta
|
146
|
+
# Default method
|
119
147
|
METHOD=:two_step
|
148
|
+
# Max number of iteratios
|
120
149
|
MAX_ITERATIONS=300
|
150
|
+
# Epsilon
|
121
151
|
EPSILON=1e-6
|
152
|
+
# GSL unidimensional minimizer
|
122
153
|
MINIMIZER_TYPE_TWO_STEP="brent"
|
123
|
-
|
154
|
+
# GSL multidimensional minimizer, derivative based
|
155
|
+
MINIMIZER_TYPE_JOINT_DERIVATIVE="conjugate_pr"
|
156
|
+
# GSL multidimensional minimizer, non derivative based
|
157
|
+
MINIMIZER_TYPE_JOINT_NO_DERIVATIVE="nmsimplex"
|
158
|
+
|
159
|
+
# Create a Polychoric object, based on two vectors
|
124
160
|
def self.new_with_vectors(v1,v2)
|
125
161
|
Polychoric.new(Crosstab.new(v1,v2).to_matrix)
|
126
162
|
end
|
127
163
|
# Params:
|
128
|
-
# * matrix
|
129
|
-
# * opts
|
130
|
-
|
164
|
+
# * <tt>matrix</tt>: Contingence table
|
165
|
+
# * <tt>opts</tt>: Hash with options. Could be any
|
166
|
+
# accessable attribute of object
|
131
167
|
def initialize(matrix, opts=Hash.new)
|
132
168
|
@matrix=matrix
|
133
169
|
@n=matrix.column_size
|
@@ -140,7 +176,9 @@ module Statsample
|
|
140
176
|
@max_iterations=MAX_ITERATIONS
|
141
177
|
@epsilon=EPSILON
|
142
178
|
@minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
|
143
|
-
@
|
179
|
+
@minimizer_type_joint_no_derivative=MINIMIZER_TYPE_JOINT_NO_DERIVATIVE
|
180
|
+
@minimizer_type_joint_derivative=MINIMIZER_TYPE_JOINT_DERIVATIVE
|
181
|
+
|
144
182
|
@debug=false
|
145
183
|
@iteration=nil
|
146
184
|
opts.each{|k,v|
|
@@ -150,25 +188,17 @@ module Statsample
|
|
150
188
|
@pd=nil
|
151
189
|
compute_basic_parameters
|
152
190
|
end
|
153
|
-
|
154
|
-
attr_reader :r
|
155
|
-
# Returns the rows thresholds
|
156
|
-
attr_reader :alpha
|
157
|
-
# Returns the columns thresholds
|
158
|
-
attr_reader :beta
|
159
|
-
|
160
|
-
dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
|
161
|
-
dirty_memoize :r, :alpha, :beta
|
191
|
+
|
162
192
|
|
163
193
|
alias :threshold_x :alpha
|
164
194
|
alias :threshold_y :beta
|
165
195
|
|
166
196
|
|
167
197
|
# Start the computation of polychoric correlation
|
168
|
-
# based on attribute method
|
198
|
+
# based on attribute <tt>method</tt>.
|
169
199
|
def compute
|
170
200
|
if @method==:two_step
|
171
|
-
|
201
|
+
compute_two_step_mle
|
172
202
|
elsif @method==:joint
|
173
203
|
compute_one_step_mle
|
174
204
|
elsif @method==:polychoric_series
|
@@ -177,6 +207,8 @@ module Statsample
|
|
177
207
|
raise "Not implemented"
|
178
208
|
end
|
179
209
|
end
|
210
|
+
|
211
|
+
# :section: LL methods
|
180
212
|
# Retrieve log likehood for actual data.
|
181
213
|
def loglike_data
|
182
214
|
loglike=0
|
@@ -238,6 +270,7 @@ module Statsample
|
|
238
270
|
end
|
239
271
|
end
|
240
272
|
|
273
|
+
# :section: Estimation methods
|
241
274
|
|
242
275
|
# Computation of polychoric correlation usign two-step ML estimation.
|
243
276
|
#
|
@@ -249,23 +282,24 @@ module Statsample
|
|
249
282
|
# * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
|
250
283
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
251
284
|
#
|
252
|
-
def
|
285
|
+
def compute_two_step_mle
|
253
286
|
if Statsample.has_gsl?
|
254
|
-
|
287
|
+
compute_two_step_mle_gsl
|
255
288
|
else
|
256
|
-
|
289
|
+
compute_two_step_mle_ruby
|
257
290
|
end
|
258
291
|
end
|
259
292
|
|
260
|
-
#
|
293
|
+
# Compute two step ML estimation using only ruby.
|
261
294
|
|
262
|
-
def
|
295
|
+
def compute_two_step_mle_ruby #:nodoc:
|
263
296
|
|
264
297
|
f=proc {|rho|
|
265
298
|
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
266
299
|
pr.loglike
|
267
300
|
}
|
268
|
-
|
301
|
+
|
302
|
+
@log=_("Two step minimization using GSL Brent method (pure ruby)\n")
|
269
303
|
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
270
304
|
min.epsilon=@epsilon
|
271
305
|
min.expected=0
|
@@ -277,8 +311,9 @@ module Statsample
|
|
277
311
|
|
278
312
|
end
|
279
313
|
|
314
|
+
# Compute two step ML estimation using gsl.
|
280
315
|
|
281
|
-
def
|
316
|
+
def compute_two_step_mle_gsl
|
282
317
|
|
283
318
|
fn1=GSL::Function.alloc {|rho|
|
284
319
|
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
@@ -321,7 +356,7 @@ module Statsample
|
|
321
356
|
end
|
322
357
|
|
323
358
|
|
324
|
-
def compute_derivatives_vector(v,df)
|
359
|
+
def compute_derivatives_vector(v,df) # :nodoc:
|
325
360
|
new_rho=v[0]
|
326
361
|
new_alpha=v[1, @nr-1]
|
327
362
|
new_beta=v[@nr, @nc-1]
|
@@ -339,15 +374,19 @@ module Statsample
|
|
339
374
|
df[offset+i]=-pr.fd_loglike_b(i)
|
340
375
|
}
|
341
376
|
end
|
342
|
-
|
377
|
+
# Compute joint ML estimation.
|
378
|
+
# Uses compute_one_step_mle_with_derivatives() by default.
|
343
379
|
def compute_one_step_mle
|
344
380
|
compute_one_step_mle_with_derivatives
|
345
381
|
end
|
346
382
|
|
347
|
-
|
383
|
+
# Compute Polychoric correlation with joint estimate, usign
|
384
|
+
# derivative based minimization method.
|
385
|
+
#
|
386
|
+
# Much faster than method without derivatives.
|
348
387
|
def compute_one_step_mle_with_derivatives
|
349
388
|
# Get initial values with two-step aproach
|
350
|
-
|
389
|
+
compute_two_step_mle
|
351
390
|
# Start iteration with past values
|
352
391
|
rho=@r
|
353
392
|
cut_alpha=@alpha
|
@@ -373,7 +412,7 @@ module Statsample
|
|
373
412
|
my_func.set_params(parameters) # parameters
|
374
413
|
|
375
414
|
x = GSL::Vector.alloc(parameters.dup)
|
376
|
-
minimizer = GSL::MultiMin::FdfMinimizer.alloc(
|
415
|
+
minimizer = GSL::MultiMin::FdfMinimizer.alloc(minimizer_type_joint_derivative,np)
|
377
416
|
minimizer.set(my_func, x, 1, 1e-3)
|
378
417
|
|
379
418
|
iter = 0
|
@@ -409,14 +448,16 @@ module Statsample
|
|
409
448
|
|
410
449
|
end
|
411
450
|
|
412
|
-
# Compute Polychoric correlation with joint estimate
|
451
|
+
# Compute Polychoric correlation with joint estimate, usign
|
452
|
+
# derivative-less minimization method.
|
453
|
+
#
|
413
454
|
# Rho and thresholds are estimated at same time.
|
414
455
|
# Code based on R package "polycor", by J.Fox.
|
415
456
|
#
|
416
457
|
|
417
458
|
def compute_one_step_mle_without_derivatives
|
418
459
|
# Get initial values with two-step aproach
|
419
|
-
|
460
|
+
compute_two_step_mle
|
420
461
|
# Start iteration with past values
|
421
462
|
rho=@r
|
422
463
|
cut_alpha=@alpha
|
@@ -452,7 +493,7 @@ module Statsample
|
|
452
493
|
ss = GSL::Vector.alloc(np)
|
453
494
|
ss.set_all(1.0)
|
454
495
|
|
455
|
-
minimizer = GSL::MultiMin::FMinimizer.alloc(
|
496
|
+
minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint_no_derivative,np)
|
456
497
|
minimizer.set(my_func, x, ss)
|
457
498
|
|
458
499
|
iter = 0
|
@@ -9,17 +9,28 @@ module Statsample
|
|
9
9
|
# Tetrachoric correlation matrix.
|
10
10
|
# Order of rows and columns depends on Dataset#fields order
|
11
11
|
def self.tetrachoric_correlation_matrix(ds)
|
12
|
-
|
12
|
+
cache={}
|
13
|
+
matrix=ds.collect_matrix do |row,col|
|
13
14
|
if row==col
|
14
15
|
1.0
|
15
16
|
else
|
16
17
|
begin
|
17
|
-
|
18
|
+
if cache[[col,row]].nil?
|
19
|
+
r=tetrachoric(ds[row],ds[col])
|
20
|
+
cache[[row,col]]=r
|
21
|
+
r
|
22
|
+
else
|
23
|
+
cache[[col,row]]
|
24
|
+
end
|
18
25
|
rescue RuntimeError
|
19
26
|
nil
|
20
27
|
end
|
21
28
|
end
|
22
29
|
end
|
30
|
+
|
31
|
+
matrix.extend CovariateMatrix
|
32
|
+
matrix.fields=ds.fields
|
33
|
+
matrix
|
23
34
|
end
|
24
35
|
# Compute tetrachoric correlation.
|
25
36
|
#
|
@@ -18,7 +18,7 @@ describe Statsample::Bivariate::Polychoric::Processor do
|
|
18
18
|
|
19
19
|
end
|
20
20
|
it "should return informacion matrix" do
|
21
|
-
|
21
|
+
@processor.information_matrix.inverse.should be_instance_of(::Matrix)
|
22
22
|
end
|
23
23
|
it "fd a loglike should be equal usign eq.6 and eq.13" do
|
24
24
|
2.times {|k|
|
@@ -50,13 +50,13 @@ describe Statsample::Bivariate::Polychoric do
|
|
50
50
|
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
51
51
|
end
|
52
52
|
it "should compute two step mle with ruby" do
|
53
|
-
@poly.
|
53
|
+
@poly.compute_two_step_mle_ruby
|
54
54
|
check_two_step
|
55
55
|
end
|
56
56
|
|
57
57
|
it "compute two-step with gsl" do
|
58
58
|
pending("requires rb-gsl") unless Statsample.has_gsl?
|
59
|
-
@poly.
|
59
|
+
@poly.compute_two_step_mle_gsl
|
60
60
|
check_two_step
|
61
61
|
end
|
62
62
|
|
@@ -26,12 +26,12 @@ describe Statsample::Bivariate::Tetrachoric do
|
|
26
26
|
@poly = Statsample::Bivariate::Polychoric.new(@matrix)
|
27
27
|
end
|
28
28
|
it "should return similar values for two step ruby" do
|
29
|
-
@poly.
|
29
|
+
@poly.compute_two_step_mle_ruby
|
30
30
|
@tetra.r.should be_close(@poly.r,0.0001)
|
31
31
|
end
|
32
32
|
if Statsample.has_gsl?
|
33
33
|
it "should return similar values for two step using gsl" do
|
34
|
-
@poly.
|
34
|
+
@poly.compute_two_step_mle_gsl
|
35
35
|
@tetra.r.should be_close(@poly.r,0.0001)
|
36
36
|
end
|
37
37
|
else
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 13
|
8
|
-
-
|
9
|
-
version: 0.13.
|
8
|
+
- 4
|
9
|
+
version: 0.13.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-
|
38
|
+
date: 2010-08-18 00:00:00 -04:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
metadata.gz.sig
CHANGED
Binary file
|