statsample 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +1 -2
- data/History.txt +11 -0
- data/Manifest.txt +4 -0
- data/README.txt +14 -5
- data/Rakefile +24 -3
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/reliability.rb +1 -1
- data/lib/distribution.rb +5 -1
- data/lib/distribution/normalbivariate.rb +7 -1
- data/lib/distribution/normalmultivariate.rb +73 -0
- data/lib/distribution/t.rb +34 -1
- data/lib/statsample.rb +2 -1
- data/lib/statsample/anova/twoway.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +190 -69
- data/lib/statsample/factor/pca.rb +1 -1
- data/lib/statsample/graph/svgscatterplot.rb +10 -1
- data/lib/statsample/reliability.rb +38 -191
- data/lib/statsample/reliability/multiscaleanalysis.rb +87 -0
- data/lib/statsample/reliability/scaleanalysis.rb +204 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +193 -49
- data/po/statsample.pot +173 -40
- data/test/test_bivariate_polychoric.rb +6 -6
- data/test/test_distribution.rb +1 -1
- data/test/test_reliability.rb +87 -8
- data/test/test_vector.rb +0 -8
- metadata +44 -36
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
@@ -1,2 +1 @@
|
|
1
|
-
|
2
|
-
�VW��}�5�j�}�i������$�RR�U�����^c�B�Ҽ�^h��>*���������@�����QhR��Τ�v��[��W3�\���]{!��\P��J��M�D�,�Hq-��b��%g�{U5
|
1
|
+
�HG��@��^��uH�
|
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
=== 0.12.0 / 2010-06-09
|
2
|
+
|
3
|
+
* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
|
4
|
+
* T test with unequal variance fixed on i686
|
5
|
+
* API Change: Renamed Reliability::ItemAnalysis and moved to independent file
|
6
|
+
* New Reliability::MultiScaleAnalysis for easy analysis of scales on a same survey, includind reliability, correlation matrix and Factor Analysis
|
7
|
+
* Updated README to reflect changes on Reliability module
|
8
|
+
* SvgGraph works with reportbuilder.
|
9
|
+
* Added methods on Polychoric based on Olsson(1979): the idea is estimate using second derivatives.
|
10
|
+
* Distribution test changed (reduced precision on 32 bits system
|
11
|
+
|
1
12
|
=== 0.11.2 / 2010-05-05
|
2
13
|
* Updated dependency for 'extendedmatrix' to 0.2 (Matrix#build method)
|
3
14
|
|
data/Manifest.txt
CHANGED
@@ -10,6 +10,7 @@ data/repeated_fields.csv
|
|
10
10
|
data/test_binomial.csv
|
11
11
|
data/tetmat_matrix.txt
|
12
12
|
data/tetmat_test.txt
|
13
|
+
doc_latex/manual/equations.tex
|
13
14
|
examples/correlation_matrix.rb
|
14
15
|
examples/dataset.rb
|
15
16
|
examples/dominance_analysis.rb
|
@@ -30,6 +31,7 @@ lib/distribution/chisquare.rb
|
|
30
31
|
lib/distribution/f.rb
|
31
32
|
lib/distribution/normal.rb
|
32
33
|
lib/distribution/normalbivariate.rb
|
34
|
+
lib/distribution/normalmultivariate.rb
|
33
35
|
lib/distribution/t.rb
|
34
36
|
lib/spss.rb
|
35
37
|
lib/statsample.rb
|
@@ -79,6 +81,8 @@ lib/statsample/regression/multiple/matrixengine.rb
|
|
79
81
|
lib/statsample/regression/multiple/rubyengine.rb
|
80
82
|
lib/statsample/regression/simple.rb
|
81
83
|
lib/statsample/reliability.rb
|
84
|
+
lib/statsample/reliability/multiscaleanalysis.rb
|
85
|
+
lib/statsample/reliability/scaleanalysis.rb
|
82
86
|
lib/statsample/resample.rb
|
83
87
|
lib/statsample/srs.rb
|
84
88
|
lib/statsample/test.rb
|
data/README.txt
CHANGED
@@ -15,6 +15,7 @@ Include:
|
|
15
15
|
* Tests: F, T, Levene, U-Mannwhitney.
|
16
16
|
* Regression: Simple, Multiple (OLS), Probit and Logit
|
17
17
|
* Factorial Analysis: Extraction (PCA and Principal Axis), Rotation (Varimax, Equimax, Quartimax) and Parallel Analysis, for estimation of number of factors.
|
18
|
+
* Reliability analysis for simple scale and helpers to analyze multiple scales using factor analysis and correlations
|
18
19
|
* Dominance Analysis, with multivariate dependent and bootstrap (Azen & Budescu)
|
19
20
|
* Sample calculation related formulas
|
20
21
|
* Creates reports on text, html and rtf, using ReportBuilder gem
|
@@ -50,7 +51,9 @@ Include:
|
|
50
51
|
* Statsample::Mx : Write Mx Files
|
51
52
|
* Statsample::GGobi : Write Ggobi files
|
52
53
|
* Module Statsample::Crosstab provides function to create crosstab for categorical data
|
53
|
-
* Reliability
|
54
|
+
* Module Statsample::Reliability provides functions to analyze scales.
|
55
|
+
* Class ScaleAnalysis provides statistics like mean, standard deviation for a scale, Cronbach's alpha and standarized Cronbach's alpha, and for each item: mean, correlation with total scale, mean if deleted, Cronbach's alpha is deleted.
|
56
|
+
* Class MultiScaleAnalysis provides a DSL to easily analyze reliability of multiple scales and retrieve correlation matrix and factor analysis of them.
|
54
57
|
* Module Statsample::SRS (Simple Random Sampling) provides a lot of functions to estimate standard error for several type of samples
|
55
58
|
* Module Statsample::Test provides several methods and classes to perform inferencial statistics
|
56
59
|
* Statsample::Test::Levene
|
@@ -104,16 +107,22 @@ Optional:
|
|
104
107
|
|
105
108
|
* Source code on github: http://github.com/clbustos/statsample
|
106
109
|
* API: http://ruby-statsample.rubyforge.org/statsample/
|
107
|
-
* Bug report and feature request: http://
|
110
|
+
* Bug report and feature request: http://github.com/clbustos/statsample/issues
|
108
111
|
|
109
112
|
|
110
113
|
== INSTALL:
|
111
114
|
|
112
|
-
sudo gem install
|
115
|
+
$ sudo gem install statsample
|
116
|
+
|
117
|
+
On *nix, you should install statsample-optimization to retrieve gems gsl, statistics2 and a C extension to speed some methods.
|
118
|
+
|
119
|
+
$sudo gem install statsample-optimization
|
120
|
+
|
121
|
+
To use it, on Ubuntu I recommend install build-essential and libgsl0-dev using apt-get and compile ruby 1.8 or 1.9 from source code.
|
122
|
+
|
123
|
+
$sudo apt-get install build-essential libgsl0-dev
|
113
124
|
|
114
|
-
For optimization on *nix env
|
115
125
|
|
116
|
-
sudo gem install gsl ruby-statsample-optimization
|
117
126
|
|
118
127
|
Available setup.rb file
|
119
128
|
|
data/Rakefile
CHANGED
@@ -5,7 +5,8 @@ $:.unshift(File.dirname(__FILE__)+'/lib/')
|
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
7
|
require 'hoe'
|
8
|
-
require '
|
8
|
+
require 'statsample'
|
9
|
+
|
9
10
|
Hoe.plugin :git
|
10
11
|
|
11
12
|
desc "Ruby Lint"
|
@@ -39,8 +40,28 @@ h=Hoe.spec('statsample') do
|
|
39
40
|
#self.testlib=:minitest
|
40
41
|
self.rubyforge_name = "ruby-statsample"
|
41
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
42
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["
|
43
|
-
self.
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"]
|
44
|
+
self.extra_dev_deps << ["shoulda"]
|
45
|
+
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
46
|
+
self.post_install_message = <<-EOF
|
47
|
+
***************************************************
|
48
|
+
Thanks for installing statsample.
|
49
|
+
|
50
|
+
On *nix, you should install statsample-optimization
|
51
|
+
to retrieve gems gsl, statistics2 and a C extension
|
52
|
+
to speed some methods.
|
53
|
+
|
54
|
+
$sudo gem install statsample-optimization
|
55
|
+
|
56
|
+
To use it, on Ubuntu I recommend install
|
57
|
+
build-essential and libgsl0-dev using apt-get and
|
58
|
+
compile ruby 1.8 or 1.9 from source code first.
|
59
|
+
|
60
|
+
$sudo apt-get install build-essential libgsl0-dev
|
61
|
+
|
62
|
+
|
63
|
+
*****************************************************
|
64
|
+
EOF
|
44
65
|
self.need_rdoc=false
|
45
66
|
end
|
46
67
|
|
Binary file
|
@@ -0,0 +1,78 @@
|
|
1
|
+
\part{Equations}
|
2
|
+
\section{Convention}
|
3
|
+
\begin{align*}
|
4
|
+
n &= \text{sample size}\\
|
5
|
+
N &= \text{population size}\\
|
6
|
+
p &= \text{proportion inside a sample}\\
|
7
|
+
P &= \text{proportion inside a population}
|
8
|
+
\end{align*}
|
9
|
+
\section{Ruby::Regression::Multiple}
|
10
|
+
|
11
|
+
To compute the standard error of coefficients, you obtain the estimated variance-covariance matrix of error.
|
12
|
+
|
13
|
+
Let \mathbf{X} be matrix of predictors data, including a constant column; \mathbf{MSE} as mean square error; SSE as Sum of squares of errors; n the number of cases; p as number of predictors
|
14
|
+
|
15
|
+
\begin{equation}
|
16
|
+
\mathbf{MSE}=\frac{SSE}{n-p-1}
|
17
|
+
\end{equation}
|
18
|
+
|
19
|
+
\begin{equation}
|
20
|
+
\mathbf{E}=(\mathbf{X'}\mathbf{X})^-1\mathbf{MSE}
|
21
|
+
\end{equation}
|
22
|
+
|
23
|
+
The root squares of diagonal should be standard errors
|
24
|
+
|
25
|
+
|
26
|
+
\section{Ruby::SRS}
|
27
|
+
Finite Poblation correction is used on standard error calculation on poblation below 10.000. Function
|
28
|
+
\begin{verbatim}
|
29
|
+
fpc_var(sam,pop)
|
30
|
+
\end{verbatim}
|
31
|
+
calculate FPC for variance with
|
32
|
+
\begin{equation}
|
33
|
+
fpc_{var} = \frac{N-n} {N-1}
|
34
|
+
\end{equation}
|
35
|
+
|
36
|
+
with n as sam and N as pop
|
37
|
+
|
38
|
+
Function
|
39
|
+
\begin{verbatim}
|
40
|
+
fpc = fpc(sam,pop)
|
41
|
+
\end{verbatim}
|
42
|
+
|
43
|
+
calculate FPC for standard deviation with
|
44
|
+
\begin{equation}
|
45
|
+
fpc_{sd} = \sqrt{\frac{N-n} {N-1}}
|
46
|
+
\label{fpc}
|
47
|
+
\end{equation}
|
48
|
+
with n as sample size and N as population size.
|
49
|
+
|
50
|
+
\subsection{Sample Size estimation for proportions}
|
51
|
+
|
52
|
+
On infinite poblations, you should use method
|
53
|
+
\begin{verbatim}
|
54
|
+
estimation_n0(d,prop,margin=0.95)
|
55
|
+
\end{verbatim}
|
56
|
+
which uses
|
57
|
+
\begin{equation}
|
58
|
+
n = \frac{t^2(pq)}{d^2}
|
59
|
+
\label{n_i}
|
60
|
+
\end{equation}
|
61
|
+
where
|
62
|
+
\begin{align*}
|
63
|
+
t &= \text{t value for given level of confidence ( 1.96 for 95\% )}\\
|
64
|
+
d &= \text{margin of error}
|
65
|
+
\end{align*}
|
66
|
+
|
67
|
+
On finite poblations, you should use
|
68
|
+
\begin{verbatim}
|
69
|
+
estimation_n(d,prop,n_pobl, margin=0.95)
|
70
|
+
\end{verbatim}
|
71
|
+
which uses
|
72
|
+
\begin{equation}
|
73
|
+
n = \frac{n_i}{1+(\frac{n_i-1}{N})}
|
74
|
+
\end{equation}
|
75
|
+
|
76
|
+
Where $n_i$ is n on \ref{n_i} and N is population size
|
77
|
+
|
78
|
+
|
data/examples/reliability.rb
CHANGED
data/lib/distribution.rb
CHANGED
@@ -15,7 +15,13 @@ module Distribution
|
|
15
15
|
class << self
|
16
16
|
SIDE=0.1 # :nodoc:
|
17
17
|
LIMIT=5 # :nodoc:
|
18
|
-
|
18
|
+
# Return the partial derivative of cdf over x, with y and rho constant
|
19
|
+
# Reference:
|
20
|
+
# * Tallis, 1962, p.346, cited by Olsson, 1979
|
21
|
+
def partial_derivative_cdf_x(x,y,rho)
|
22
|
+
Distribution::Normal.pdf(x) * Distribution::Normal.cdf((y-rho*x).quo( Math::sqrt( 1 - rho**2 )))
|
23
|
+
end
|
24
|
+
alias :pd_cdf_x :partial_derivative_cdf_x
|
19
25
|
# Probability density function for a given x, y and rho value.
|
20
26
|
#
|
21
27
|
# Source: http://en.wikipedia.org/wiki/Multivariate_normal_distribution
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Distribution
|
2
|
+
# Calculate cdf and inverse cdf for Multivariate Distribution.
|
3
|
+
module NormalMultivariate
|
4
|
+
class << self
|
5
|
+
# Returns multivariate cdf distribution
|
6
|
+
# * a is the array of lower values
|
7
|
+
# * b is the array of higher values
|
8
|
+
# * s is an symmetric positive definite covariance matrix
|
9
|
+
def cdf(aa,bb,sigma, epsilon=0.0001, alpha=2.5, max_iterations=100) # :nodoc:
|
10
|
+
raise "Doesn't work yet"
|
11
|
+
a=[nil]+aa
|
12
|
+
b=[nil]+bb
|
13
|
+
m=aa.size
|
14
|
+
sigma=sigma.to_gsl if sigma.respond_to? :to_gsl
|
15
|
+
|
16
|
+
cc=GSL::Linalg::Cholesky.decomp(sigma)
|
17
|
+
c=cc.lower
|
18
|
+
intsum=0
|
19
|
+
varsum=0
|
20
|
+
n=0
|
21
|
+
d=Array.new(m+1,nil)
|
22
|
+
e=Array.new(m+1,nil)
|
23
|
+
f=Array.new(m+1,nil)
|
24
|
+
(1..m).each {|i|
|
25
|
+
d[i]=0.0 if a[i].nil?
|
26
|
+
e[i]=1.0 if b[i].nil?
|
27
|
+
}
|
28
|
+
d[1]=uPhi(a[1].quo( c[0,0])) unless d[1]==0
|
29
|
+
e[1]=uPhi(b[1].quo( c[0,0])) unless e[1]==1
|
30
|
+
f[1]=e[1]-d[1]
|
31
|
+
|
32
|
+
error=1000
|
33
|
+
begin
|
34
|
+
w=(m+1).times.collect {|i| rand*epsilon}
|
35
|
+
y=[]
|
36
|
+
(2..m).each do |i|
|
37
|
+
y[i-1]=iPhi(d[i-1] + w[i-1] * (e[i-1] - d[i-1]))
|
38
|
+
sumc=0
|
39
|
+
(1..(i-1)).each do |j|
|
40
|
+
sumc+=c[i-1, j-1]*y[j]
|
41
|
+
end
|
42
|
+
|
43
|
+
if a[i]!=nil
|
44
|
+
d[i]=uPhi((a[i]-sumc).quo(c[i-1,i-1]))
|
45
|
+
end
|
46
|
+
# puts "sumc:#{sumc}"
|
47
|
+
|
48
|
+
if b[i]!=nil
|
49
|
+
#puts "e[#{i}] :#{c[i-1,i-1]}"
|
50
|
+
e[i]=uPhi((b[i]-sumc).quo(c[i-1, i-1]))
|
51
|
+
end
|
52
|
+
f[i]=(e[i]-d[i])*f[i-1]
|
53
|
+
end
|
54
|
+
intsum+=intsum+f[m]
|
55
|
+
varsum=varsum+f[m]**2
|
56
|
+
n+=1
|
57
|
+
error=alpha*Math::sqrt((varsum.quo(n) - (intsum.quo(n))**2).quo(n))
|
58
|
+
end while(error>epsilon and n<max_iterations)
|
59
|
+
|
60
|
+
f=intsum.quo(n)
|
61
|
+
#p intsum
|
62
|
+
#puts "f:#{f}, n:#{n}, error:#{error}"
|
63
|
+
f
|
64
|
+
end
|
65
|
+
def iPhi(pr)
|
66
|
+
Distribution::Normal.p_value(pr)
|
67
|
+
end
|
68
|
+
def uPhi(x)
|
69
|
+
Distribution::Normal.cdf(x)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/distribution/t.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbconfig'
|
1
2
|
module Distribution
|
2
3
|
|
3
4
|
# Calculate cdf and inverse cdf for T Distribution.
|
@@ -15,8 +16,40 @@ module Distribution
|
|
15
16
|
# with n degrees of freedom over (-Infty, x].
|
16
17
|
#
|
17
18
|
def cdf(x,k)
|
18
|
-
|
19
|
+
if RbConfig::CONFIG['arch']=~/i686/
|
20
|
+
tdist(k, x)
|
21
|
+
else
|
22
|
+
Statistics2.tdist(k,x)
|
23
|
+
end
|
19
24
|
end
|
25
|
+
|
26
|
+
# Returns the integral of t-distribution with n degrees of freedom over (-Infty, x].
|
27
|
+
def tdist(n, t)
|
28
|
+
p_t(n, t)
|
29
|
+
end
|
30
|
+
|
31
|
+
# t-distribution ([1])
|
32
|
+
# (-\infty, x]
|
33
|
+
def p_t(df, t)
|
34
|
+
c2 = df.to_f / (df + t * t);
|
35
|
+
s = Math.sqrt(1.0 - c2)
|
36
|
+
s = -s if t < 0.0
|
37
|
+
p = 0.0;
|
38
|
+
i = df % 2 + 2
|
39
|
+
while i <= df
|
40
|
+
p += s
|
41
|
+
s *= (i - 1) * c2 / i
|
42
|
+
i += 2
|
43
|
+
end
|
44
|
+
if df.is_a? Float or df & 1 != 0
|
45
|
+
0.5+(p*Math.sqrt(c2)+Math.atan(t/Math.sqrt(df)))/Math::PI
|
46
|
+
else
|
47
|
+
(1.0 + p) / 2.0
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
20
53
|
end
|
21
54
|
end
|
22
55
|
end
|
data/lib/statsample.rb
CHANGED
@@ -23,6 +23,7 @@ require 'matrix'
|
|
23
23
|
require 'distribution'
|
24
24
|
require 'dirty-memoize'
|
25
25
|
require 'reportbuilder'
|
26
|
+
|
26
27
|
class Numeric
|
27
28
|
def square ; self * self ; end
|
28
29
|
end
|
@@ -111,7 +112,7 @@ module Statsample
|
|
111
112
|
false
|
112
113
|
end
|
113
114
|
end
|
114
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.12.0'
|
115
116
|
SPLIT_TOKEN = ","
|
116
117
|
autoload(:Database, 'statsample/converters')
|
117
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -175,7 +175,7 @@ module Statsample
|
|
175
175
|
df_b=_q-1
|
176
176
|
df_within=(_p*_q)*(n-1)
|
177
177
|
|
178
|
-
opts_default={:name=>_("Anova Two-Way on
|
178
|
+
opts_default={:name=>_("Anova Two-Way on %s") % @ds[dep_var].name,
|
179
179
|
:name_a=>@ds[a_var].name,
|
180
180
|
:name_b=>@ds[b_var].name,
|
181
181
|
:summary_descriptives=>true,
|
@@ -75,6 +75,65 @@ module Statsample
|
|
75
75
|
# * Drasgow F. (2006). Polychoric and polyserial correlations. In Kotz L, Johnson NL (Eds.), Encyclopedia of statistical sciences. Vol. 7 (pp. 69-74). New York: Wiley.
|
76
76
|
|
77
77
|
class Polychoric
|
78
|
+
|
79
|
+
class Processor
|
80
|
+
attr_reader :alpha, :beta, :rho
|
81
|
+
def initialize(alpha,beta,rho)
|
82
|
+
@alpha=alpha
|
83
|
+
@beta=beta
|
84
|
+
@nr=@alpha.size+1
|
85
|
+
@nc=@beta.size+1
|
86
|
+
@rho=rho
|
87
|
+
@pd=nil
|
88
|
+
end
|
89
|
+
def bipdf(i,j)
|
90
|
+
Distribution::NormalBivariate.pdf(a(i), b(j), rho)
|
91
|
+
end
|
92
|
+
def a(i)
|
93
|
+
i < 0 ? -100 : (i==@nr-1 ? 100 : alpha[i])
|
94
|
+
end
|
95
|
+
def b(j)
|
96
|
+
j < 0 ? -100 : (j==@nc-1 ? 100 : beta[j])
|
97
|
+
end
|
98
|
+
# Equation(10) from Olsson(1979)
|
99
|
+
def fd_loglike_cell_a(i,j,k)
|
100
|
+
if k==i
|
101
|
+
Distribution::NormalBivariate.pd_cdf_x(a(k),b(j), rho) - Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
102
|
+
elsif k==(i-1)
|
103
|
+
-Distribution::NormalBivariate.pd_cdf_x(a(k),b(j),rho) + Distribution::NormalBivariate.pd_cdf_x(a(k),b(j-1),rho)
|
104
|
+
else
|
105
|
+
0
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
# phi_ij for each i and j
|
110
|
+
# Uses equation(4) from Olsson(1979)
|
111
|
+
def pd
|
112
|
+
if @pd.nil?
|
113
|
+
@pd=@nr.times.collect{ [0] * @nc}
|
114
|
+
pc=@nr.times.collect{ [0] * @nc}
|
115
|
+
@nr.times do |i|
|
116
|
+
@nc.times do |j|
|
117
|
+
|
118
|
+
if i==@nr-1 and j==@nc-1
|
119
|
+
@pd[i][j]=1.0
|
120
|
+
else
|
121
|
+
a=(i==@nr-1) ? 100: alpha[i]
|
122
|
+
b=(j==@nc-1) ? 100: beta[j]
|
123
|
+
#puts "a:#{a} b:#{b}"
|
124
|
+
@pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
125
|
+
end
|
126
|
+
pc[i][j] = @pd[i][j]
|
127
|
+
@pd[i][j] = @pd[i][j] - pc[i-1][j] if i>0
|
128
|
+
@pd[i][j] = @pd[i][j] - pc[i][j-1] if j>0
|
129
|
+
@pd[i][j] = @pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
@pd
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
78
137
|
include GetText
|
79
138
|
include DirtyMemoize
|
80
139
|
bindtextdomain("statsample")
|
@@ -145,6 +204,7 @@ module Statsample
|
|
145
204
|
self.send("#{k}=",v) if self.respond_to? k
|
146
205
|
}
|
147
206
|
@r=nil
|
207
|
+
@pd=nil
|
148
208
|
compute_basic_parameters
|
149
209
|
end
|
150
210
|
# Returns the polychoric correlation
|
@@ -174,7 +234,7 @@ module Statsample
|
|
174
234
|
raise "Not implemented"
|
175
235
|
end
|
176
236
|
end
|
177
|
-
|
237
|
+
# Retrieve log likehood for actual data.
|
178
238
|
def loglike_data
|
179
239
|
loglike=0
|
180
240
|
@nr.times do |i|
|
@@ -188,97 +248,147 @@ module Statsample
|
|
188
248
|
end
|
189
249
|
loglike
|
190
250
|
end
|
251
|
+
|
252
|
+
# Chi Square of model
|
191
253
|
def chi_square
|
192
254
|
if @loglike_model.nil?
|
193
255
|
compute
|
194
256
|
end
|
195
257
|
-2*(@loglike_model-loglike_data)
|
196
258
|
end
|
259
|
+
|
197
260
|
def chi_square_df
|
198
261
|
(@nr*@nc)-@nc-@nr
|
199
262
|
end
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
@nc.times { |j|
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
# Retrieve all cell probabilities for givens alpha, beta and rho
|
268
|
+
def cell_probabilities(alpha,beta,rho)
|
269
|
+
pd=@nr.times.collect{ [0] * @nc}
|
270
|
+
pc=@nr.times.collect{ [0] * @nc}
|
271
|
+
@nr.times do |i|
|
272
|
+
@nc.times do |j|
|
273
|
+
|
212
274
|
if i==@nr-1 and j==@nc-1
|
213
275
|
pd[i][j]=1.0
|
214
|
-
a=100
|
215
|
-
b=100
|
216
276
|
else
|
217
277
|
a=(i==@nr-1) ? 100: alpha[i]
|
218
278
|
b=(j==@nc-1) ? 100: beta[j]
|
279
|
+
#puts "a:#{a} b:#{b}"
|
219
280
|
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
220
281
|
end
|
221
282
|
pc[i][j] = pd[i][j]
|
222
283
|
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
223
284
|
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
224
285
|
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
else
|
230
|
-
alpha_m1=alpha[i-1]
|
231
|
-
end
|
232
|
-
|
233
|
-
if j==0
|
234
|
-
beta_m1=-10
|
235
|
-
else
|
236
|
-
beta_m1=beta[j-1]
|
237
|
-
end
|
238
|
-
|
239
|
-
loglike+= (@matrix[i,j].quo(pij))*(Distribution::NormalBivariate.pdf(a,b,rho) - Distribution::NormalBivariate.pdf(alpha_m1, b,rho) - Distribution::NormalBivariate.pdf(a, beta_m1,rho) + Distribution::NormalBivariate.pdf(alpha_m1, beta_m1,rho) )
|
240
|
-
|
241
|
-
}
|
242
|
-
}
|
243
|
-
#puts "derivative: #{loglike}"
|
244
|
-
-loglike
|
286
|
+
end
|
287
|
+
end
|
288
|
+
@pd=pd
|
289
|
+
pd
|
245
290
|
end
|
246
291
|
def loglike(alpha,beta,rho)
|
247
292
|
if rho.abs>0.9999
|
248
293
|
rho= (rho>0) ? 0.9999 : -0.9999
|
249
294
|
end
|
250
|
-
|
295
|
+
pr=Processor.new(alpha,beta,rho)
|
251
296
|
loglike=0
|
252
|
-
|
253
|
-
|
254
|
-
@nr.times
|
255
|
-
@nc.times
|
256
|
-
|
257
|
-
if i==@nr-1 and j==@nc-1
|
258
|
-
pd[i][j]=1.0
|
259
|
-
else
|
260
|
-
a=(i==@nr-1) ? 100: alpha[i]
|
261
|
-
b=(j==@nc-1) ? 100: beta[j]
|
262
|
-
#puts "a:#{a} b:#{b}"
|
263
|
-
pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
|
264
|
-
|
265
|
-
end
|
266
|
-
pc[i][j] = pd[i][j]
|
267
|
-
pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
|
268
|
-
pd[i][j] = pd[i][j] - pc[i][j-1] if j>0
|
269
|
-
pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
|
270
|
-
res= pd[i][j]
|
271
|
-
#puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
|
272
|
-
if (res<=0)
|
273
|
-
# puts "Correccion"
|
274
|
-
res=1e-16
|
275
|
-
end
|
297
|
+
|
298
|
+
|
299
|
+
@nr.times do |i|
|
300
|
+
@nc.times do |j|
|
301
|
+
res=pr.pd[i][j]+EPSILON
|
276
302
|
loglike+= @matrix[i,j] * Math::log( res )
|
277
|
-
|
278
|
-
|
279
|
-
@pd=pd
|
303
|
+
end
|
304
|
+
end
|
280
305
|
-loglike
|
281
306
|
end
|
307
|
+
# First derivate for rho
|
308
|
+
# Uses equation (9) from Olsson(1979)
|
309
|
+
def fd_loglike_rho(alpha,beta,rho)
|
310
|
+
if rho.abs>0.9999
|
311
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
312
|
+
end
|
313
|
+
total=0
|
314
|
+
pr=Processor.new(alpha,beta,rho)
|
315
|
+
@nr.times do |i|
|
316
|
+
@nc.times do |j|
|
317
|
+
pi=pr.pd[i][j] + EPSILON
|
318
|
+
total+= (@matrix[i,j] / pi) * (pr.bipdf(i,j)-pr.bipdf(i-1,j)-pr.bipdf(i,j-1)+pr.bipdf(i-1,j-1))
|
319
|
+
end
|
320
|
+
end
|
321
|
+
total
|
322
|
+
end
|
323
|
+
|
324
|
+
# First derivative for alpha_k
|
325
|
+
def fd_loglike_a(alpha,beta,rho,k)
|
326
|
+
fd_loglike_a_eq6(alpha,beta,rho,k)
|
327
|
+
end
|
328
|
+
# Uses equation (6) from Olsson(1979)
|
329
|
+
def fd_loglike_a_eq6(alpha,beta,rho,k)
|
330
|
+
if rho.abs>0.9999
|
331
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
332
|
+
end
|
333
|
+
pr=Processor.new(alpha,beta,rho)
|
334
|
+
total=0
|
335
|
+
pd=pr.pd
|
336
|
+
@nr.times do |i|
|
337
|
+
@nc.times do |j|
|
338
|
+
total+=@matrix[i,j].quo(pd[i][j]+EPSILON) * pr.fd_loglike_cell_a(i,j,k)
|
339
|
+
end
|
340
|
+
end
|
341
|
+
total
|
342
|
+
end
|
343
|
+
# Uses equation(13) from Olsson(1979)
|
344
|
+
def fd_loglike_a_eq13(alpha,beta,rho,k)
|
345
|
+
if rho.abs>0.9999
|
346
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
347
|
+
end
|
348
|
+
pr=Processor.new(alpha,beta,rho)
|
349
|
+
total=0
|
350
|
+
a_k=pr.a(k)
|
351
|
+
pd=pr.pd
|
352
|
+
@nc.times do |j|
|
353
|
+
#puts "j: #{j}"
|
354
|
+
#puts "b #{j} : #{b.call(j)}"
|
355
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
356
|
+
|
357
|
+
e_1=@matrix[k,j].quo(pd[k][j]+EPSILON) - @matrix[k+1,j].quo(pd[k+1][j]+EPSILON)
|
358
|
+
e_2=Distribution::Normal.pdf(a_k)
|
359
|
+
e_3=Distribution::Normal.cdf((pr.b(j)-rho*a_k).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.b(j-1)-rho*a_k).quo(Math::sqrt(1-rho**2)))
|
360
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
361
|
+
|
362
|
+
total+= e_1*e_2*e_3
|
363
|
+
end
|
364
|
+
total
|
365
|
+
end
|
366
|
+
# First derivative for beta_m
|
367
|
+
# Uses equation(14) from Olsson(1979)
|
368
|
+
def fd_loglike_b(alpha,beta,rho,m)
|
369
|
+
if rho.abs>0.9999
|
370
|
+
rho= (rho>0) ? 0.9999 : -0.9999
|
371
|
+
end
|
372
|
+
pr=Processor.new(alpha,beta,rho)
|
373
|
+
total=0
|
374
|
+
b_m=pr.b m
|
375
|
+
pd=pr.pd
|
376
|
+
@nr.times do |i|
|
377
|
+
#puts "j: #{j}"
|
378
|
+
#puts "b #{j} : #{b.call(j)}"
|
379
|
+
#puts "b #{j-1} : #{b.call(j-1)}"
|
380
|
+
|
381
|
+
e_1=@matrix[i,m].quo(pd[i][m]+EPSILON) - @matrix[i,m+1].quo(pd[i][m+1]+EPSILON)
|
382
|
+
e_2=Distribution::Normal.pdf(b_m)
|
383
|
+
e_3=Distribution::Normal.cdf((pr.a(i)-rho*b_m).quo(Math::sqrt(1-rho**2))) - Distribution::Normal.cdf((pr.a(i-1)-rho*b_m).quo(Math::sqrt(1-rho**2)))
|
384
|
+
#puts "val #{j}: #{e_1} | #{e_2} | #{e_3}"
|
385
|
+
|
386
|
+
total+= e_1*e_2*e_3
|
387
|
+
end
|
388
|
+
total
|
389
|
+
end
|
390
|
+
|
391
|
+
|
282
392
|
def compute_basic_parameters
|
283
393
|
@nr=@matrix.row_size
|
284
394
|
@nc=@matrix.column_size
|
@@ -333,7 +443,7 @@ module Statsample
|
|
333
443
|
|
334
444
|
def compute_two_step_mle_drasgow_ruby #:nodoc:
|
335
445
|
|
336
|
-
f=proc {|rho|
|
446
|
+
f=proc {|rho|
|
337
447
|
loglike(@alpha,@beta, rho)
|
338
448
|
}
|
339
449
|
@log="Minimizing using GSL Brent method\n"
|
@@ -351,9 +461,9 @@ module Statsample
|
|
351
461
|
|
352
462
|
def compute_two_step_mle_drasgow_gsl #:nodoc:
|
353
463
|
|
354
|
-
|
355
|
-
|
356
|
-
|
464
|
+
fn1=GSL::Function.alloc {|rho|
|
465
|
+
loglike(@alpha,@beta, rho)
|
466
|
+
}
|
357
467
|
@iteration = 0
|
358
468
|
max_iter = @max_iterations
|
359
469
|
m = 0 # initial guess
|
@@ -405,8 +515,19 @@ module Statsample
|
|
405
515
|
parameters=[rho]+cut_alpha+cut_beta
|
406
516
|
minimization = Proc.new { |v, params|
|
407
517
|
rho=v[0]
|
408
|
-
alpha=v[1
|
409
|
-
beta=v[@nr
|
518
|
+
alpha=v[1, @nr-1]
|
519
|
+
beta=v[@nr, @nc-1]
|
520
|
+
|
521
|
+
#puts "f'rho=#{fd_loglike_rho(alpha,beta,rho)}"
|
522
|
+
#(@nr-1).times {|k|
|
523
|
+
# puts "f'a(#{k}) = #{fd_loglike_a(alpha,beta,rho,k)}"
|
524
|
+
# puts "f'a(#{k}) v2 = #{fd_loglike_a2(alpha,beta,rho,k)}"
|
525
|
+
#
|
526
|
+
#}
|
527
|
+
#(@nc-1).times {|k|
|
528
|
+
# puts "f'b(#{k}) = #{fd_loglike_b(alpha,beta,rho,k)}"
|
529
|
+
#}
|
530
|
+
|
410
531
|
loglike(alpha,beta,rho)
|
411
532
|
}
|
412
533
|
np=@nc-1+@nr
|