statsample 0.11.2 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +1 -2
- data/History.txt +11 -0
- data/Manifest.txt +4 -0
- data/README.txt +14 -5
- data/Rakefile +24 -3
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/reliability.rb +1 -1
- data/lib/distribution.rb +5 -1
- data/lib/distribution/normalbivariate.rb +7 -1
- data/lib/distribution/normalmultivariate.rb +73 -0
- data/lib/distribution/t.rb +34 -1
- data/lib/statsample.rb +2 -1
- data/lib/statsample/anova/twoway.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +190 -69
- data/lib/statsample/factor/pca.rb +1 -1
- data/lib/statsample/graph/svgscatterplot.rb +10 -1
- data/lib/statsample/reliability.rb +38 -191
- data/lib/statsample/reliability/multiscaleanalysis.rb +87 -0
- data/lib/statsample/reliability/scaleanalysis.rb +204 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +193 -49
- data/po/statsample.pot +173 -40
- data/test/test_bivariate_polychoric.rb +6 -6
- data/test/test_distribution.rb +1 -1
- data/test/test_reliability.rb +87 -8
- data/test/test_vector.rb +0 -8
- metadata +44 -36
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
@@ -1,2 +1 @@
|
|
1
|
-
|
2
|
-
�VW��}�5�j�}�i������$�RR�U�����^c�B�Ҽ�^h��>*���������@�����QhR��Τ�v��[��W3�\���]{!��\P��J��M�D�,�Hq-��b��%g�{U5
|
1
|
+
�HG��@��^��uH�
|
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
=== 0.12.0 / 2010-06-09
|
2
|
+
|
3
|
+
* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
|
4
|
+
* T test with unequal variance fixed on i686
|
5
|
+
* API Change: Renamed Reliability::ItemAnalysis and moved to independent file
|
6
|
+
* New Reliability::MultiScaleAnalysis for easy analysis of scales on a same survey, includind reliability, correlation matrix and Factor Analysis
|
7
|
+
* Updated README to reflect changes on Reliability module
|
8
|
+
* SvgGraph works with reportbuilder.
|
9
|
+
* Added methods on Polychoric based on Olsson(1979): the idea is estimate using second derivatives.
|
10
|
+
* Distribution test changed (reduced precision on 32 bits system
|
11
|
+
|
1
12
|
=== 0.11.2 / 2010-05-05
|
2
13
|
* Updated dependency for 'extendedmatrix' to 0.2 (Matrix#build method)
|
3
14
|
|
data/Manifest.txt
CHANGED
@@ -10,6 +10,7 @@ data/repeated_fields.csv
|
|
10
10
|
data/test_binomial.csv
|
11
11
|
data/tetmat_matrix.txt
|
12
12
|
data/tetmat_test.txt
|
13
|
+
doc_latex/manual/equations.tex
|
13
14
|
examples/correlation_matrix.rb
|
14
15
|
examples/dataset.rb
|
15
16
|
examples/dominance_analysis.rb
|
@@ -30,6 +31,7 @@ lib/distribution/chisquare.rb
|
|
30
31
|
lib/distribution/f.rb
|
31
32
|
lib/distribution/normal.rb
|
32
33
|
lib/distribution/normalbivariate.rb
|
34
|
+
lib/distribution/normalmultivariate.rb
|
33
35
|
lib/distribution/t.rb
|
34
36
|
lib/spss.rb
|
35
37
|
lib/statsample.rb
|
@@ -79,6 +81,8 @@ lib/statsample/regression/multiple/matrixengine.rb
|
|
79
81
|
lib/statsample/regression/multiple/rubyengine.rb
|
80
82
|
lib/statsample/regression/simple.rb
|
81
83
|
lib/statsample/reliability.rb
|
84
|
+
lib/statsample/reliability/multiscaleanalysis.rb
|
85
|
+
lib/statsample/reliability/scaleanalysis.rb
|
82
86
|
lib/statsample/resample.rb
|
83
87
|
lib/statsample/srs.rb
|
84
88
|
lib/statsample/test.rb
|
data/README.txt
CHANGED
@@ -15,6 +15,7 @@ Include:
|
|
15
15
|
* Tests: F, T, Levene, U-Mannwhitney.
|
16
16
|
* Regression: Simple, Multiple (OLS), Probit and Logit
|
17
17
|
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
|
18
|
+
* Reliability analysis for simple scale and helpers to analyze multiple scales using factor analysis and correlations
|
18
19
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
19
20
|
* Sample calculation related formulas
|
20
21
|
* Creates reports on text, html and rtf, using ReportBuilder gem
|
@@ -50,7 +51,9 @@ Include:
|
|
50
51
|
* Statsample::Mx : Write Mx Files
|
51
52
|
* Statsample::GGobi : Write Ggobi files
|
52
53
|
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
53
|
-
* Reliability
|
54
|
+
* Module Statsample::Reliability provides functions to analyze scales.
|
55
|
+
* Class ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
56
|
+
* Class MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
54
57
|
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
55
58
|
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
|
56
59
|
* Statsample::Test::Levene
|
@@ -104,16 +107,22 @@ Optional:
|
|
104
107
|
|
105
108
|
* Source code on github: http://github.com/clbustos/statsample
|
106
109
|
* API: http://ruby-statsample.rubyforge.org/statsample/
|
107
|
-
* Bug report and feature request: http://
|
110
|
+
* Bug report and feature request: http://github.com/clbustos/statsample/issues
|
108
111
|
|
109
112
|
|
110
113
|
== INSTALL:
|
111
114
|
|
112
|
-
sudo gem install
|
115
|
+
$ sudo gem install statsample
|
116
|
+
|
117
|
+
On *nix, you should install statsample-optimization to retrieve gems gsl, statistics2 and a C extension to speed some methods.
|
118
|
+
|
119
|
+
$sudo gem install statsample-optimization
|
120
|
+
|
121
|
+
To use it, on Ubuntu I recommend install build-essential and libgsl0-dev using apt-get and compile ruby 1.8 or 1.9 from source code.
|
122
|
+
|
123
|
+
$sudo apt-get install build-essential libgsl0-dev
|
113
124
|
|
114
|
-
For optimization on *nix env
|
115
125
|
|
116
|
-
sudo gem install gsl ruby-statsample-optimization
|
117
126
|
|
118
127
|
Available setup.rb file
|
119
128
|
|
data/Rakefile
CHANGED
@@ -5,7 +5,8 @@ $:.unshift(File.dirname(__FILE__)+'/lib/')
|
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'hoe'
|
8
|
-
require '
|
8
|
+
require 'statsample'
|
9
|
+
|
9
10
|
Hoe.plugin :git
|
10
11
|
|
11
12
|
desc "Ruby Lint"
|
@@ -39,8 +40,28 @@ h=Hoe.spec('statsample') do
|
|
39
40
|
#self.testlib=:minitest
|
40
41
|
self.rubyforge_name = "ruby-statsample"
|
41
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
42
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["
|
43
|
-
self.
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"]
|
44
|
+
self.extra_dev_deps << ["shoulda"]
|
45
|
+
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
46
|
+
self.post_install_message = <<-EOF
|
47
|
+
***************************************************
|
48
|
+
Thanks for installing statsample.
|
49
|
+
|
50
|
+
On *nix, you should install statsample-optimization
|
51
|
+
to retrieve gems gsl, statistics2 and a C extension
|
52
|
+
to speed some methods.
|
53
|
+
|
54
|
+
$sudo gem install statsample-optimization
|
55
|
+
|
56
|
+
To use it, on Ubuntu I recommend install
|
57
|
+
build-essential and libgsl0-dev using apt-get and
|
58
|
+
compile ruby 1.8 or 1.9 from source code first.
|
59
|
+
|
60
|
+
$sudo apt-get install build-essential libgsl0-dev
|
61
|
+
|
62
|
+
|
63
|
+
*****************************************************
|
64
|
+
EOF
|
44
65
|
self.need_rdoc=false
|
45
66
|
end
|
46
67
|
|
Binary file
|
@@ -0,0 +1,78 @@
|
|
1
|
+
\part{Equations}
|
2
|
+
\section{Convention}
|
3
|
+
\begin{align*}
|
4
|
+
n &= \text{sample size}\\
|
5
|
+
N &= \text{population size}\\
|
6
|
+
p &= \text{proportion inside a sample}\\
|
7
|
+
P &= \text{proportion inside a population}
|
8
|
+
\end{align*}
|
9
|
+
\section{Ruby::Regression::Multiple}
|
10
|
+
|
11
|
+
To compute the standard error of coefficients, you obtain the estimated variance-covariance matrix of error.
|
12
|
+
|
13
|
+
Let \mathbf{X} be matrix of predictors data, including a constant column; \mathbf{MSE} as mean square error; SSE as Sum of squares of errors; n the number of cases; p as number of predictors
|
14
|
+
|
15
|
+
\begin{equation}
|
16
|
+
\mathbf{MSE}=\frac{SSE}{n-p-1}
|
17
|
+
\end{equation}
|
18
|
+
|
19
|
+
\begin{equation}
|
20
|
+
\mathbf{E}=(\mathbf{X'}\mathbf{X})^-1\mathbf{MSE}
|
21
|
+
\end{equation}
|
22
|
+
|
23
|
+
The root squares of diagonal should be standard errors
|
24
|
+
|
25
|
+
|
26
|
+
\section{Ruby::SRS}
|
27
|
+
Finite Poblation correction is used on standard error calculation on poblation below 10.000. Function
|
28
|
+
\begin{verbatim}
|
29
|
+
fpc_var(sam,pop)
|
30
|
+
\end{verbatim}
|
31
|
+
calculate FPC for variance with
|
32
|
+
\begin{equation}
|
33
|
+
fpc_{var} = \frac{N-n} {N-1}
|
34
|
+
\end{equation}
|
35
|
+
|
36
|
+
with n as sam and N as pop
|
37
|
+
|
38
|
+
Function
|
39
|
+
\begin{verbatim}
|
40
|
+
fpc = fpc(sam,pop)
|
41
|
+
\end{verbatim}
|
42
|
+
|
43
|
+
calculate FPC for standard deviation with
|
44
|
+
\begin{equation}
|
45
|
+
fpc_{sd} = \sqrt{\frac{N-n} {N-1}}
|
46
|
+
\label{fpc}
|
47
|
+
\end{equation}
|
48
|
+
with n as sample size and N as population size.
|
49
|
+
|
50
|
+
\subsection{Sample Size estimation for proportions}
|
51
|
+
|
52
|
+
On infinite poblations, you should use method
|
53
|
+
\begin{verbatim}
|
54
|
+
estimation_n0(d,prop,margin=0.95)
|
55
|
+
\end{verbatim}
|
56
|
+
which uses
|
57
|
+
\begin{equation}
|
58
|
+
n = \frac{t^2(pq)}{d^2}
|
59
|
+
\label{n_i}
|
60
|
+
\end{equation}
|
61
|
+
where
|
62
|
+
\begin{align*}
|
63
|
+
t &= \text{t value for given level of confidence ( 1.96 for 95\% )}\\
|
64
|
+
d &= \text{margin of error}
|
65
|
+
\end{align*}
|
66
|
+
|
67
|
+
On finite poblations, you should use
|
68
|
+
\begin{verbatim}
|
69
|
+
estimation_n(d,prop,n_pobl, margin=0.95)
|
70
|
+
\end{verbatim}
|
71
|
+
which uses
|
72
|
+
\begin{equation}
|
73
|
+
n = \frac{n_i}{1+(\frac{n_i-1}{N})}
|
74
|
+
\end{equation}
|
75
|
+
|
76
|
+
Where $n_i$ is n on \ref{n_i} and N is population size
|
77
|
+
|
78
|
+
|
data/examples/reliability.rb
CHANGED
data/lib/distribution.rb
CHANGED
@@ -15,7 +15,13 @@ module Distribution
|
|
15
15
|
class << self
|
16
16
|
SIDE=0.1 # :nodoc:
|
17
17
|
LIMIT=5 # :nodoc:
|
18
|
-
|
18
|
+
# Return the partial derivative of cdf over x, with y and rho constant
|
19
|
+
# Reference:
|
20
|
+
# * Tallis, 1962, p.346, cited by Olsson, 1979
|
21
|
+
def partial_derivative_cdf_x(x,y,rho)
|
22
|
+
Distribution::Normal.pdf(x) * Distribution::Normal.cdf((y-rho*x).quo( Math::sqrt( 1 - rho**2 )))
|
23
|
+
end
|
24
|
+
alias :pd_cdf_x :partial_derivative_cdf_x
|
19
25
|
# Probability density function for a given x, y and rho value.
|
20
26
|
#
|
21
27
|
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Distribution
|
2
|
+
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
3
|
+
module NormalMultivariate
|
4
|
+
class << self
|
5
|
+
# Returns multivariate cdf distribution
|
6
|
+
# * a is the array of lower values
|
7
|
+
# * b is the array of higher values
|
8
|
+
# * s is an symmetric positive definite covariance matrix
|
9
|
+
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
10
|
+
raise "Doesn't work yet"
|
11
|
+
a=[nil]+aa
|
12
|
+
b=[nil]+bb
|
13
|
+
m=aa.size
|
14
|
+
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
15
|
+
|
16
|
+
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
17
|
+
c=cc.lower
|
18
|
+
intsum=0
|
19
|
+
varsum=0
|
20
|
+
n=0
|
21
|
+
d=Array.new(m+1,nil)
|
22
|
+
e=Array.new(m+1,nil)
|
23
|
+
f=Array.new(m+1,nil)
|
24
|
+
(1..m).each {|i|
|
25
|
+
d[i]=0.0 if a[i].nil?
|
26
|
+
e[i]=1.0 if b[i].nil?
|
27
|
+
}
|
28
|
+
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
29
|
+
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
30
|
+
f[1]=e[1]-d[1]
|
31
|
+
|
32
|
+
error=1000
|
33
|
+
begin
|
34
|
+
w=(m+1).times.collect {|i| rand*epsilon}
|
35
|
+
y=[]
|
36
|
+
(2..m).each do |i|
|
37
|
+
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
38
|
+
sumc=0
|
39
|
+
(1..(i-1)).each do |j|
|
40
|
+
sumc+=c[i-1, j-1]*y[j]
|
41
|
+
end
|
42
|
+
|
43
|
+
if a[i]!=nil
|
44
|
+
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
45
|
+
end
|
46
|
+
# puts "sumc:#{sumc}"
|
47
|
+
|
48
|
+
if b[i]!=nil
|
49
|
+
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
50
|
+
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
51
|
+
end
|
52
|
+
f[i]=(e[i]-d[i])*f[i-1]
|
53
|
+
end
|
54
|
+
intsum+=intsum+f[m]
|
55
|
+
varsum=varsum+f[m]**2
|
56
|
+
n+=1
|
57
|
+
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
58
|
+
end while(error>epsilon and n<max_iterations)
|
59
|
+
|
60
|
+
f=intsum.quo(n)
|
61
|
+
#p intsum
|
62
|
+
#puts "f:#{f}, n:#{n}, error:#{error}"
|
63
|
+
f
|
64
|
+
end
|
65
|
+
def iPhi(pr)
|
66
|
+
Distribution::Normal.p_value(pr)
|
67
|
+
end
|
68
|
+
def uPhi(x)
|
69
|
+
Distribution::Normal.cdf(x)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/distribution/t.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbconfig'
|
1
2
|
module Distribution
|
2
3
|
|
3
4
|
# Calculate cdf and inverse cdf for T Distribution.
|
@@ -15,8 +16,40 @@ module Distribution
|
|
15
16
|
# with n degrees of freedom over (-Infty, x].
|
16
17
|
#
|
17
18
|
def cdf(x,k)
|
18
|
-
|
19
|
+
if RbConfig::CONFIG['arch']=~/i686/
|
20
|
+
tdist(k, x)
|
21
|
+
else
|
22
|
+
Statistics2.tdist(k,x)
|
23
|
+
end
|
19
24
|
end
|
25
|
+
|
26
|
+
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
27
|
+
def tdist(n, t)
|
28
|
+
p_t(n, t)
|
29
|
+
end
|
30
|
+
|
31
|
+
# t-distribution ([1])
|
32
|
+
# (-\infty, x]
|
33
|
+
def p_t(df, t)
|
34
|
+
c2 = df.to_f / (df + t * t);
|
35
|
+
s = Math.sqrt(1.0 - c2)
|
36
|
+
s = -s if t < 0.0
|
37
|
+
p = 0.0;
|
38
|
+
i = df % 2 + 2
|
39
|
+
while i <= df
|
40
|
+
p += s
|
41
|
+
s *= (i - 1) * c2 / i
|
42
|
+
i += 2
|
43
|
+
end
|
44
|
+
if df.is_a? Float or df & 1 != 0
|
45
|
+
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
|
46
|
+
else
|
47
|
+
(1.0 + p) / 2.0
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
20
53
|
end
|
21
54
|
end
|
22
55
|
end
|
data/lib/statsample.rb
CHANGED
@@ -23,6 +23,7 @@ require 'matrix'
|
|
23
23
|
require 'distribution'
|
24
24
|
require 'dirty-memoize'
|
25
25
|
require 'reportbuilder'
|
26
|
+
|
26
27
|
class Numeric
|
27
28
|
def square ; self * self ; end
|
28
29
|
end
|
@@ -111,7 +112,7 @@ module Statsample
|
|
111
112
|
false
|
112
113
|
end
|
113
114
|
end
|
114
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.12.0'
|
115
116
|
SPLIT_TOKEN = ","
|
116
117
|
autoload(:Database, 'statsample/converters')
|
117
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -175,7 +175,7 @@ module Statsample
|
|
175
175
|
df_b=_q-1
|
176
176
|
df_within=(_p*_q)*(n-1)
|
177
177
|
|
178
|
-
opts_default={:name=>_("Anova Two-Way on
|
178
|
+
opts_default={:name=>_("Anova Two-Way on %s") % @ds[dep_var].name,
|
179
179
|
:name_a=>@ds[a_var].name,
|
180
180
|
:name_b=>@ds[b_var].name,
|
181
181
|
:summary_descriptives=>true,
|
@@ -75,6 +75,65 @@ module Statsample
|
|
75
75
|
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
76
76
|
|
77
77
|
class Polychoric
|
78
|
+
|
79
|
+
class Processor
|
80
|
+
attr_reader :alpha, :beta, :rho
|
81
|
+
def initialize(alpha,beta,rho)
|
82
|
+
@alpha=alpha
|
83
|
+
@beta=beta
|
84
|
+
@nr=@alpha.size+1
|
85
|
+
@nc=@beta.size+1
|
86
|
+
@rho=rho
|
87
|
+
@pd=nil
|
88
|
+
end
|
89
|
+
def bipdf(i,j)
|
90
|
+
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
91
|
+
end
|
92
|
+
def a(i)
|
93
|
+
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
94
|
+
end
|
95
|
+
def b(j)
|
96
|
+
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
97
|
+
end
|
98
|
+
# Equation(10) from Olsson(1979)
|
99
|
+
def fd_loglike_cell_a(i,j,k)
|
100
|
+
if k==i
|
101
|
+
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
102
|
+
elsif k==(i-1)
|
103
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
104
|
+
else
|
105
|
+
0
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
# phi_ij for each i and j
|
110
|
+
# Uses equation(4) from Olsson(1979)
|
111
|
+
def pd
|
112
|
+
if @pd.nil?
|
113
|
+
@pd=@nr.times.collect{ [0] * @nc}
|
114
|
+
pc=@nr.times.collect{ [0] * @nc}
|
115
|
+
@nr.times do |i|
|
116
|
+
@nc.times do |j|
|
117
|
+
|
118
|
+
if i==@nr-1 and j==@nc-1
|
119
|
+
@pd[i][j]=1.0
|
120
|
+
else
|
121
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
122
|
+
b=(j==@nc-1) ? 100: beta[j]
|
123
|
+
#puts "a:#{a} b:#{b}"
|
124
|
+
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
125
|
+
end
|
126
|
+
pc[i][j] = @pd[i][j]
|
127
|
+
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
128
|
+
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
129
|
+
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
@pd
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
78
137
|
include GetText
|
79
138
|
include DirtyMemoize
|
80
139
|
bindtextdomain("statsample")
|
@@ -145,6 +204,7 @@ module Statsample
|
|
145
204
|
self.send("#{k}=",v) if self.respond_to? k
|
146
205
|
}
|
147
206
|
@r=nil
|
207
|
+
@pd=nil
|
148
208
|
compute_basic_parameters
|
149
209
|
end
|
150
210
|
# Returns the polychoric correlation
|
@@ -174,7 +234,7 @@ module Statsample
|
|
174
234
|
raise "Not implemented"
|
175
235
|
end
|
176
236
|
end
|
177
|
-
|
237
|
+
# Retrieve log likehood for actual data.
|
178
238
|
def loglike_data
|
179
239
|
loglike=0
|
180
240
|
@nr.times do |i|
|
@@ -188,97 +248,147 @@ module Statsample
|
|
188
248
|
end
|
189
249
|
loglike
|
190
250
|
end
|
251
|
+
|
252
|
+
# Chi Square of model
|
191
253
|
def chi_square
|
192
254
|
if @loglike_model.nil?
|
193
255
|
compute
|
194
256
|
end
|
195
257
|
-2*(@loglike_model-loglike_data)
|
196
258
|
end
|
259
|
+
|
197
260
|
def chi_square_df
|
198
261
|
(@nr*@nc)-@nc-@nr
|
199
262
|
end
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@nc.times { |j|
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
# Retrieve all cell probabilities for givens alpha, beta and rho
|
268
|
+
def cell_probabilities(alpha,beta,rho)
|
269
|
+
pd=@nr.times.collect{ [0] * @nc}
|
270
|
+
pc=@nr.times.collect{ [0] * @nc}
|
271
|
+
@nr.times do |i|
|
272
|
+
@nc.times do |j|
|
273
|
+
|
212
274
|
if i==@nr-1 and j==@nc-1
|
213
275
|
pd[i][j]=1.0
|
214
|
-
a=100
|
215
|
-
b=100
|
216
276
|
else
|
217
277
|
a=(i==@nr-1) ? 100: alpha[i]
|
218
278
|
b=(j==@nc-1) ? 100: beta[j]
|
279
|
+
#puts "a:#{a} b:#{b}"
|
219
280
|
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
220
281
|
end
|
221
282
|
pc[i][j] = pd[i][j]
|
222
283
|
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
223
284
|
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
224
285
|
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
else
|
230
|
-
alpha_m1=alpha[i-1]
|
231
|
-
end
|
232
|
-
|
233
|
-
if j==0
|
234
|
-
beta_m1=-10
|
235
|
-
else
|
236
|
-
beta_m1=beta[j-1]
|
237
|
-
end
|
238
|
-
|
239
|
-
loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
|
240
|
-
|
241
|
-
}
|
242
|
-
}
|
243
|
-
#puts "derivative: #{loglike}"
|
244
|
-
-loglike
|
286
|
+
end
|
287
|
+
end
|
288
|
+
@pd=pd
|
289
|
+
pd
|
245
290
|
end
|
246
291
|
def loglike(alpha,beta,rho)
|
247
292
|
if rho.abs>0.9999
|
248
293
|
rho= (rho>0) ? 0.9999 : -0.9999
|
249
294
|
end
|
250
|
-
|
295
|
+
pr=Processor.new(alpha,beta,rho)
|
251
296
|
loglike=0
|
252
|
-
|
253
|
-
|
254
|
-
@nr.times
|
255
|
-
@nc.times
|
256
|
-
|
257
|
-
if i==@nr-1 and j==@nc-1
|
258
|
-
pd[i][j]=1.0
|
259
|
-
else
|
260
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
261
|
-
b=(j==@nc-1) ? 100: beta[j]
|
262
|
-
#puts "a:#{a} b:#{b}"
|
263
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
264
|
-
|
265
|
-
end
|
266
|
-
pc[i][j] = pd[i][j]
|
267
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
268
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
269
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
270
|
-
res= pd[i][j]
|
271
|
-
#puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
|
272
|
-
if (res<=0)
|
273
|
-
# puts "Correccion"
|
274
|
-
res=1e-16
|
275
|
-
end
|
297
|
+
|
298
|
+
|
299
|
+
@nr.times do |i|
|
300
|
+
@nc.times do |j|
|
301
|
+
res=pr.pd[i][j]+EPSILON
|
276
302
|
loglike+= @matrix[i,j] * Math::log( res )
|
277
|
-
|
278
|
-
|
279
|
-
@pd=pd
|
303
|
+
end
|
304
|
+
end
|
280
305
|
-loglike
|
281
306
|
end
|
307
|
+
# First derivate for rho
|
308
|
+
# Uses equation (9) from Olsson(1979)
|
309
|
+
def fd_loglike_rho(alpha,beta,rho)
|
310
|
+
if rho.abs>0.9999
|
311
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
312
|
+
end
|
313
|
+
total=0
|
314
|
+
pr=Processor.new(alpha,beta,rho)
|
315
|
+
@nr.times do |i|
|
316
|
+
@nc.times do |j|
|
317
|
+
pi=pr.pd[i][j] + EPSILON
|
318
|
+
total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
|
319
|
+
end
|
320
|
+
end
|
321
|
+
total
|
322
|
+
end
|
323
|
+
|
324
|
+
# First derivative for alpha_k
|
325
|
+
def fd_loglike_a(alpha,beta,rho,k)
|
326
|
+
fd_loglike_a_eq6(alpha,beta,rho,k)
|
327
|
+
end
|
328
|
+
# Uses equation (6) from Olsson(1979)
|
329
|
+
def fd_loglike_a_eq6(alpha,beta,rho,k)
|
330
|
+
if rho.abs>0.9999
|
331
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
332
|
+
end
|
333
|
+
pr=Processor.new(alpha,beta,rho)
|
334
|
+
total=0
|
335
|
+
pd=pr.pd
|
336
|
+
@nr.times do |i|
|
337
|
+
@nc.times do |j|
|
338
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
|
339
|
+
end
|
340
|
+
end
|
341
|
+
total
|
342
|
+
end
|
343
|
+
# Uses equation(13) from Olsson(1979)
|
344
|
+
def fd_loglike_a_eq13(alpha,beta,rho,k)
|
345
|
+
if rho.abs>0.9999
|
346
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
347
|
+
end
|
348
|
+
pr=Processor.new(alpha,beta,rho)
|
349
|
+
total=0
|
350
|
+
a_k=pr.a(k)
|
351
|
+
pd=pr.pd
|
352
|
+
@nc.times do |j|
|
353
|
+
#puts "j: #{j}"
|
354
|
+
#puts "b #{j} : #{b.call(j)}"
|
355
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
356
|
+
|
357
|
+
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
358
|
+
e_2=Distribution::Normal.pdf(a_k)
|
359
|
+
e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
360
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
361
|
+
|
362
|
+
total+= e_1*e_2*e_3
|
363
|
+
end
|
364
|
+
total
|
365
|
+
end
|
366
|
+
# First derivative for beta_m
|
367
|
+
# Uses equation(14) from Olsson(1979)
|
368
|
+
def fd_loglike_b(alpha,beta,rho,m)
|
369
|
+
if rho.abs>0.9999
|
370
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
371
|
+
end
|
372
|
+
pr=Processor.new(alpha,beta,rho)
|
373
|
+
total=0
|
374
|
+
b_m=pr.b m
|
375
|
+
pd=pr.pd
|
376
|
+
@nr.times do |i|
|
377
|
+
#puts "j: #{j}"
|
378
|
+
#puts "b #{j} : #{b.call(j)}"
|
379
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
380
|
+
|
381
|
+
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
382
|
+
e_2=Distribution::Normal.pdf(b_m)
|
383
|
+
e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
384
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
385
|
+
|
386
|
+
total+= e_1*e_2*e_3
|
387
|
+
end
|
388
|
+
total
|
389
|
+
end
|
390
|
+
|
391
|
+
|
282
392
|
def compute_basic_parameters
|
283
393
|
@nr=@matrix.row_size
|
284
394
|
@nc=@matrix.column_size
|
@@ -333,7 +443,7 @@ module Statsample
|
|
333
443
|
|
334
444
|
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
335
445
|
|
336
|
-
f=proc {|rho|
|
446
|
+
f=proc {|rho|
|
337
447
|
loglike(@alpha,@beta, rho)
|
338
448
|
}
|
339
449
|
@log="Minimizing using GSL Brent method\n"
|
@@ -351,9 +461,9 @@ module Statsample
|
|
351
461
|
|
352
462
|
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
353
463
|
|
354
|
-
|
355
|
-
|
356
|
-
|
464
|
+
fn1=GSL::Function.alloc {|rho|
|
465
|
+
loglike(@alpha,@beta, rho)
|
466
|
+
}
|
357
467
|
@iteration = 0
|
358
468
|
max_iter = @max_iterations
|
359
469
|
m = 0 # initial guess
|
@@ -405,8 +515,19 @@ module Statsample
|
|
405
515
|
parameters=[rho]+cut_alpha+cut_beta
|
406
516
|
minimization = Proc.new { |v, params|
|
407
517
|
rho=v[0]
|
408
|
-
alpha=v[1
|
409
|
-
beta=v[@nr
|
518
|
+
alpha=v[1, @nr-1]
|
519
|
+
beta=v[@nr, @nc-1]
|
520
|
+
|
521
|
+
#puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
|
522
|
+
#(@nr-1).times {|k|
|
523
|
+
# puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
|
524
|
+
# puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
|
525
|
+
#
|
526
|
+
#}
|
527
|
+
#(@nc-1).times {|k|
|
528
|
+
# puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
|
529
|
+
#}
|
530
|
+
|
410
531
|
loglike(alpha,beta,rho)
|
411
532
|
}
|
412
533
|
np=@nc-1+@nr
|