statsample-bivariate-extension 0.13.3 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +5 -0
- data/README.txt +1 -1
- data/lib/statsample/bivariate/extension_version.rb +2 -1
- data/lib/statsample/bivariate/polychoric.rb +85 -44
- data/lib/statsample/bivariate/tetrachoric.rb +13 -2
- data/spec/statsample/bivariate/polychoric_processor_spec.rb +1 -1
- data/spec/statsample/bivariate/polychoric_spec.rb +2 -2
- data/spec/statsample/bivariate/tetrachoric_spec.rb +2 -2
- metadata +3 -3
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -11,7 +11,7 @@ Provides advanced bivariate statistics:
|
|
11
11
|
== FEATURES/PROBLEMS:
|
12
12
|
|
13
13
|
* Statsample::Bivariate::Polychoric class provides polychoric correlation
|
14
|
-
* Statsample::Bivariate::
|
14
|
+
* Statsample::Bivariate::Tetrachoric class provides tetrachoric correlation
|
15
15
|
|
16
16
|
|
17
17
|
== SYNOPSIS:
|
@@ -42,14 +42,29 @@ module Statsample
|
|
42
42
|
# the two undelying continuous variables (Drasgow, 2006)
|
43
43
|
#
|
44
44
|
# According to Drasgow(2006), there are tree methods to estimate
|
45
|
-
# the polychoric correlation:
|
45
|
+
# the polychoric correlation: ML Joint estimation, ML two-step estimation
|
46
|
+
# and polycoric series estimate. You can select
|
47
|
+
# the estimation method with <tt>method</tt> attribute.
|
48
|
+
#
|
49
|
+
# == ML Joint Estimation
|
50
|
+
# Requires gsl library and <tt>gsl</tt> gem.
|
51
|
+
# Joint estimation uses derivative based algorithm by default, based
|
52
|
+
# on Ollson(1979).
|
53
|
+
# There is available a derivative free algorithm available
|
54
|
+
# compute_one_step_mle_without_derivatives() , based loosely
|
55
|
+
# on J.Fox R package 'polycor' algorithm.
|
56
|
+
#
|
57
|
+
# == Two-step Estimation
|
46
58
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# 3. Polychoric series estimate.
|
59
|
+
# Default method. Uses a no-derivative aproach, based on J.Fox
|
60
|
+
# R package 'polycor'.
|
50
61
|
#
|
51
|
-
#
|
52
|
-
#
|
62
|
+
# == Polychoric series estimate.
|
63
|
+
# <b>Warning</b>: Result diverge a lot from Joint and two-step
|
64
|
+
# calculation.
|
65
|
+
#
|
66
|
+
# Requires gsl library and <tt>gsl</tt> gem.
|
67
|
+
# Based on Martinson and Hamdam(1975) algorithm.
|
53
68
|
#
|
54
69
|
# == Use
|
55
70
|
#
|
@@ -90,19 +105,22 @@ module Statsample
|
|
90
105
|
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
91
106
|
attr_accessor :minimizer_type_two_step
|
92
107
|
|
93
|
-
# Minimizer type for joint estimate. Default "nmsimplex"
|
108
|
+
# Minimizer type for joint estimate, no derivative. Default "nmsimplex".
|
94
109
|
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
95
|
-
attr_accessor :
|
96
|
-
|
110
|
+
attr_accessor :minimizer_type_joint_no_derivative
|
111
|
+
|
112
|
+
# Minimizer type for joint estimate, using derivative. Default "conjugate_pr".
|
113
|
+
# See http://rb-gsl.rubyforge.org/min.html for reference.
|
114
|
+
attr_accessor :minimizer_type_joint_derivative
|
97
115
|
|
98
116
|
# Method of calculation of polychoric series.
|
99
117
|
# <tt>:two_step</tt> used by default.
|
100
118
|
#
|
101
|
-
# :two_step:: two-step ML, based on code by
|
119
|
+
# :two_step:: two-step ML, based on code by J.Fox
|
102
120
|
# :polychoric_series:: polychoric series estimate, using
|
103
121
|
# algorithm AS87 by Martinson and Hamdan (1975).
|
104
|
-
# :joint:: one-step ML,
|
105
|
-
#
|
122
|
+
# :joint:: one-step ML, usign derivatives by Olsson (1979)
|
123
|
+
#
|
106
124
|
attr_accessor :method
|
107
125
|
# Absolute error for iteration.
|
108
126
|
attr_accessor :epsilon
|
@@ -113,21 +131,39 @@ module Statsample
|
|
113
131
|
# Log of algorithm
|
114
132
|
attr_reader :log
|
115
133
|
|
116
|
-
|
134
|
+
# Model ll
|
117
135
|
attr_reader :loglike_model
|
118
136
|
|
137
|
+
# Returns the polychoric correlation
|
138
|
+
attr_reader :r
|
139
|
+
# Returns the rows thresholds
|
140
|
+
attr_reader :alpha
|
141
|
+
# Returns the columns thresholds
|
142
|
+
attr_reader :beta
|
143
|
+
|
144
|
+
dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint_no_derivative, :minimizer_type_joint_derivative, :method
|
145
|
+
dirty_memoize :r, :alpha, :beta
|
146
|
+
# Default method
|
119
147
|
METHOD=:two_step
|
148
|
+
# Max number of iteratios
|
120
149
|
MAX_ITERATIONS=300
|
150
|
+
# Epsilon
|
121
151
|
EPSILON=1e-6
|
152
|
+
# GSL unidimensional minimizer
|
122
153
|
MINIMIZER_TYPE_TWO_STEP="brent"
|
123
|
-
|
154
|
+
# GSL multidimensional minimizer, derivative based
|
155
|
+
MINIMIZER_TYPE_JOINT_DERIVATIVE="conjugate_pr"
|
156
|
+
# GSL multidimensional minimizer, non derivative based
|
157
|
+
MINIMIZER_TYPE_JOINT_NO_DERIVATIVE="nmsimplex"
|
158
|
+
|
159
|
+
# Create a Polychoric object, based on two vectors
|
124
160
|
def self.new_with_vectors(v1,v2)
|
125
161
|
Polychoric.new(Crosstab.new(v1,v2).to_matrix)
|
126
162
|
end
|
127
163
|
# Params:
|
128
|
-
# * matrix
|
129
|
-
# * opts
|
130
|
-
|
164
|
+
# * <tt>matrix</tt>: Contingence table
|
165
|
+
# * <tt>opts</tt>: Hash with options. Could be any
|
166
|
+
# accessable attribute of object
|
131
167
|
def initialize(matrix, opts=Hash.new)
|
132
168
|
@matrix=matrix
|
133
169
|
@n=matrix.column_size
|
@@ -140,7 +176,9 @@ module Statsample
|
|
140
176
|
@max_iterations=MAX_ITERATIONS
|
141
177
|
@epsilon=EPSILON
|
142
178
|
@minimizer_type_two_step=MINIMIZER_TYPE_TWO_STEP
|
143
|
-
@
|
179
|
+
@minimizer_type_joint_no_derivative=MINIMIZER_TYPE_JOINT_NO_DERIVATIVE
|
180
|
+
@minimizer_type_joint_derivative=MINIMIZER_TYPE_JOINT_DERIVATIVE
|
181
|
+
|
144
182
|
@debug=false
|
145
183
|
@iteration=nil
|
146
184
|
opts.each{|k,v|
|
@@ -150,25 +188,17 @@ module Statsample
|
|
150
188
|
@pd=nil
|
151
189
|
compute_basic_parameters
|
152
190
|
end
|
153
|
-
|
154
|
-
attr_reader :r
|
155
|
-
# Returns the rows thresholds
|
156
|
-
attr_reader :alpha
|
157
|
-
# Returns the columns thresholds
|
158
|
-
attr_reader :beta
|
159
|
-
|
160
|
-
dirty_writer :max_iterations, :epsilon, :minimizer_type_two_step, :minimizer_type_joint, :method
|
161
|
-
dirty_memoize :r, :alpha, :beta
|
191
|
+
|
162
192
|
|
163
193
|
alias :threshold_x :alpha
|
164
194
|
alias :threshold_y :beta
|
165
195
|
|
166
196
|
|
167
197
|
# Start the computation of polychoric correlation
|
168
|
-
# based on attribute method
|
198
|
+
# based on attribute <tt>method</tt>.
|
169
199
|
def compute
|
170
200
|
if @method==:two_step
|
171
|
-
|
201
|
+
compute_two_step_mle
|
172
202
|
elsif @method==:joint
|
173
203
|
compute_one_step_mle
|
174
204
|
elsif @method==:polychoric_series
|
@@ -177,6 +207,8 @@ module Statsample
|
|
177
207
|
raise "Not implemented"
|
178
208
|
end
|
179
209
|
end
|
210
|
+
|
211
|
+
# :section: LL methods
|
180
212
|
# Retrieve log likehood for actual data.
|
181
213
|
def loglike_data
|
182
214
|
loglike=0
|
@@ -238,6 +270,7 @@ module Statsample
|
|
238
270
|
end
|
239
271
|
end
|
240
272
|
|
273
|
+
# :section: Estimation methods
|
241
274
|
|
242
275
|
# Computation of polychoric correlation usign two-step ML estimation.
|
243
276
|
#
|
@@ -249,23 +282,24 @@ module Statsample
|
|
249
282
|
# * Gegenfurtner, K. (1992). PRAXIS: Brent's algorithm for function minimization. Behavior Research Methods, Instruments & Computers, 24(4), 560-564. Available on http://www.allpsych.uni-giessen.de/karl/pdf/03.praxis.pdf
|
250
283
|
# * Uebersax, J.S. (2006). The tetrachoric and polychoric correlation coefficients. Statistical Methods for Rater Agreement web site. 2006. Available at: http://john-uebersax.com/stat/tetra.htm . Accessed February, 11, 2010
|
251
284
|
#
|
252
|
-
def
|
285
|
+
def compute_two_step_mle
|
253
286
|
if Statsample.has_gsl?
|
254
|
-
|
287
|
+
compute_two_step_mle_gsl
|
255
288
|
else
|
256
|
-
|
289
|
+
compute_two_step_mle_ruby
|
257
290
|
end
|
258
291
|
end
|
259
292
|
|
260
|
-
#
|
293
|
+
# Compute two step ML estimation using only ruby.
|
261
294
|
|
262
|
-
def
|
295
|
+
def compute_two_step_mle_ruby #:nodoc:
|
263
296
|
|
264
297
|
f=proc {|rho|
|
265
298
|
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
266
299
|
pr.loglike
|
267
300
|
}
|
268
|
-
|
301
|
+
|
302
|
+
@log=_("Two step minimization using GSL Brent method (pure ruby)\n")
|
269
303
|
min=Minimization::Brent.new(-0.9999,0.9999,f)
|
270
304
|
min.epsilon=@epsilon
|
271
305
|
min.expected=0
|
@@ -277,8 +311,9 @@ module Statsample
|
|
277
311
|
|
278
312
|
end
|
279
313
|
|
314
|
+
# Compute two step ML estimation using gsl.
|
280
315
|
|
281
|
-
def
|
316
|
+
def compute_two_step_mle_gsl
|
282
317
|
|
283
318
|
fn1=GSL::Function.alloc {|rho|
|
284
319
|
pr=Processor.new(@alpha,@beta, rho, @matrix)
|
@@ -321,7 +356,7 @@ module Statsample
|
|
321
356
|
end
|
322
357
|
|
323
358
|
|
324
|
-
def compute_derivatives_vector(v,df)
|
359
|
+
def compute_derivatives_vector(v,df) # :nodoc:
|
325
360
|
new_rho=v[0]
|
326
361
|
new_alpha=v[1, @nr-1]
|
327
362
|
new_beta=v[@nr, @nc-1]
|
@@ -339,15 +374,19 @@ module Statsample
|
|
339
374
|
df[offset+i]=-pr.fd_loglike_b(i)
|
340
375
|
}
|
341
376
|
end
|
342
|
-
|
377
|
+
# Compute joint ML estimation.
|
378
|
+
# Uses compute_one_step_mle_with_derivatives() by default.
|
343
379
|
def compute_one_step_mle
|
344
380
|
compute_one_step_mle_with_derivatives
|
345
381
|
end
|
346
382
|
|
347
|
-
|
383
|
+
# Compute Polychoric correlation with joint estimate, usign
|
384
|
+
# derivative based minimization method.
|
385
|
+
#
|
386
|
+
# Much faster than method without derivatives.
|
348
387
|
def compute_one_step_mle_with_derivatives
|
349
388
|
# Get initial values with two-step aproach
|
350
|
-
|
389
|
+
compute_two_step_mle
|
351
390
|
# Start iteration with past values
|
352
391
|
rho=@r
|
353
392
|
cut_alpha=@alpha
|
@@ -373,7 +412,7 @@ module Statsample
|
|
373
412
|
my_func.set_params(parameters) # parameters
|
374
413
|
|
375
414
|
x = GSL::Vector.alloc(parameters.dup)
|
376
|
-
minimizer = GSL::MultiMin::FdfMinimizer.alloc(
|
415
|
+
minimizer = GSL::MultiMin::FdfMinimizer.alloc(minimizer_type_joint_derivative,np)
|
377
416
|
minimizer.set(my_func, x, 1, 1e-3)
|
378
417
|
|
379
418
|
iter = 0
|
@@ -409,14 +448,16 @@ module Statsample
|
|
409
448
|
|
410
449
|
end
|
411
450
|
|
412
|
-
# Compute Polychoric correlation with joint estimate
|
451
|
+
# Compute Polychoric correlation with joint estimate, usign
|
452
|
+
# derivative-less minimization method.
|
453
|
+
#
|
413
454
|
# Rho and thresholds are estimated at same time.
|
414
455
|
# Code based on R package "polycor", by J.Fox.
|
415
456
|
#
|
416
457
|
|
417
458
|
def compute_one_step_mle_without_derivatives
|
418
459
|
# Get initial values with two-step aproach
|
419
|
-
|
460
|
+
compute_two_step_mle
|
420
461
|
# Start iteration with past values
|
421
462
|
rho=@r
|
422
463
|
cut_alpha=@alpha
|
@@ -452,7 +493,7 @@ module Statsample
|
|
452
493
|
ss = GSL::Vector.alloc(np)
|
453
494
|
ss.set_all(1.0)
|
454
495
|
|
455
|
-
minimizer = GSL::MultiMin::FMinimizer.alloc(
|
496
|
+
minimizer = GSL::MultiMin::FMinimizer.alloc(minimizer_type_joint_no_derivative,np)
|
456
497
|
minimizer.set(my_func, x, ss)
|
457
498
|
|
458
499
|
iter = 0
|
@@ -9,17 +9,28 @@ module Statsample
|
|
9
9
|
# Tetrachoric correlation matrix.
|
10
10
|
# Order of rows and columns depends on Dataset#fields order
|
11
11
|
def self.tetrachoric_correlation_matrix(ds)
|
12
|
-
|
12
|
+
cache={}
|
13
|
+
matrix=ds.collect_matrix do |row,col|
|
13
14
|
if row==col
|
14
15
|
1.0
|
15
16
|
else
|
16
17
|
begin
|
17
|
-
|
18
|
+
if cache[[col,row]].nil?
|
19
|
+
r=tetrachoric(ds[row],ds[col])
|
20
|
+
cache[[row,col]]=r
|
21
|
+
r
|
22
|
+
else
|
23
|
+
cache[[col,row]]
|
24
|
+
end
|
18
25
|
rescue RuntimeError
|
19
26
|
nil
|
20
27
|
end
|
21
28
|
end
|
22
29
|
end
|
30
|
+
|
31
|
+
matrix.extend CovariateMatrix
|
32
|
+
matrix.fields=ds.fields
|
33
|
+
matrix
|
23
34
|
end
|
24
35
|
# Compute tetrachoric correlation.
|
25
36
|
#
|
@@ -18,7 +18,7 @@ describe Statsample::Bivariate::Polychoric::Processor do
|
|
18
18
|
|
19
19
|
end
|
20
20
|
it "should return informacion matrix" do
|
21
|
-
|
21
|
+
@processor.information_matrix.inverse.should be_instance_of(::Matrix)
|
22
22
|
end
|
23
23
|
it "fd a loglike should be equal usign eq.6 and eq.13" do
|
24
24
|
2.times {|k|
|
@@ -50,13 +50,13 @@ describe Statsample::Bivariate::Polychoric do
|
|
50
50
|
@poly.threshold_x[1].should be_close(1.137 ,0.001)
|
51
51
|
end
|
52
52
|
it "should compute two step mle with ruby" do
|
53
|
-
@poly.
|
53
|
+
@poly.compute_two_step_mle_ruby
|
54
54
|
check_two_step
|
55
55
|
end
|
56
56
|
|
57
57
|
it "compute two-step with gsl" do
|
58
58
|
pending("requires rb-gsl") unless Statsample.has_gsl?
|
59
|
-
@poly.
|
59
|
+
@poly.compute_two_step_mle_gsl
|
60
60
|
check_two_step
|
61
61
|
end
|
62
62
|
|
@@ -26,12 +26,12 @@ describe Statsample::Bivariate::Tetrachoric do
|
|
26
26
|
@poly = Statsample::Bivariate::Polychoric.new(@matrix)
|
27
27
|
end
|
28
28
|
it "should return similar values for two step ruby" do
|
29
|
-
@poly.
|
29
|
+
@poly.compute_two_step_mle_ruby
|
30
30
|
@tetra.r.should be_close(@poly.r,0.0001)
|
31
31
|
end
|
32
32
|
if Statsample.has_gsl?
|
33
33
|
it "should return similar values for two step using gsl" do
|
34
|
-
@poly.
|
34
|
+
@poly.compute_two_step_mle_gsl
|
35
35
|
@tetra.r.should be_close(@poly.r,0.0001)
|
36
36
|
end
|
37
37
|
else
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 13
|
8
|
-
-
|
9
|
-
version: 0.13.
|
8
|
+
- 4
|
9
|
+
version: 0.13.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-
|
38
|
+
date: 2010-08-18 00:00:00 -04:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
metadata.gz.sig
CHANGED
Binary file
|