statsample 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +0,0 @@
1
- #! /usr/bin/ruby1.8
2
- echo "Nothing today!"
@@ -1,139 +0,0 @@
1
- module Statsample
2
- # Module for generic MLE calculations.
3
- # Use subclass of BaseMLE for specific MLE model estimation.
4
- # You should visit Statsample::Regression for method to perform fast
5
- # regression analysis.
6
- # == Usage:
7
- #
8
- # mle=Statsample::MLE::Probit.new
9
- # mle.newton_raphson(x,y)
10
- # beta=mle.parameters
11
- # likehood=mle.likehood(x,y,beta)
12
- # iterations=mle.iterations
13
- #
14
- module MLE
15
- class BaseMLE
16
- attr_accessor :verbose
17
- attr_accessor :output
18
- # Could be :parameters or :mle
19
- attr_accessor :stop_criteria
20
- # Variance - Covariance matrix
21
- attr_reader :var_cov_matrix
22
- # Iterations
23
- attr_reader :iterations
24
- # Parameters (beta coefficients)
25
- attr_reader :parameters
26
- ITERATIONS=100
27
- MIN_DIFF=1e-5
28
- MIN_DIFF_PARAMETERS=1e-2
29
- # Model should be a MLE subclass
30
- def initialize()
31
- @verbose = false
32
- @output = STDOUT
33
- @stop_criteria = :parameters
34
- @var_cov_matrix = nil
35
- @iterations = nil
36
- @parameters = nil
37
- end
38
- # Calculate likehood for matrices x and y, given b parameters
39
- def likehood(x,y,b)
40
- prod=1
41
- x.row_size.times{|i|
42
- xi=Matrix.rows([x.row(i).to_a.collect{|v| v.to_f}])
43
- y_val=y[i,0].to_f
44
- #fbx=f(b,x)
45
- prod=prod*likehood_i(xi, y_val ,b)
46
- }
47
- prod
48
- end
49
- # Calculate log likehood for matrices x and y, given b parameters
50
- def log_likehood(x,y,b)
51
- sum=0
52
- x.row_size.times{|i|
53
- xi=Matrix.rows([x.row(i).to_a.collect{|v| v.to_f}])
54
- y_val=y[i,0].to_f
55
- sum+=log_likehood_i(xi,y_val,b)
56
- }
57
- sum
58
- end
59
-
60
-
61
- # Creates a zero matrix Mx1, with M=x.M
62
- def set_default_parameters(x)
63
- fd=[0.0]*x.column_size
64
- fd.push(0.1) if self.is_a? Statsample::MLE::Normal
65
- Matrix.columns([fd])
66
- end
67
-
68
- # Newton Raphson with automatic stopping criteria.
69
- # Based on: Von Tessin, P. (2005). Maximum Likelihood Estimation With Java and Ruby
70
- #
71
- # <tt>x</tt>:: matrix of dependent variables. Should have nxk dimensions
72
- # <tt>y</tt>:: matrix of independent values. Should have nx1 dimensions
73
- # <tt>@m</tt>:: class for @ming. Could be Normal or Logit
74
- # <tt>start_values</tt>:: matrix of coefficients. Should have 1xk dimensions
75
- def newton_raphson(x,y, start_values=nil)
76
- # deep copy?
77
- if start_values.nil?
78
- parameters=set_default_parameters(x)
79
- else
80
- parameters = start_values.dup
81
- end
82
- k=parameters.row_size
83
- #cv=Matrix.rows([([1.0]*k)])
84
- #last_diff=nil
85
- raise "n on y != n on x" if x.row_size!=y.row_size
86
- h=nil
87
- fd=nil
88
- if @stop_criteria==:mle
89
- old_likehood=log_likehood(x, y, parameters)
90
- else
91
- old_parameters=parameters
92
- end
93
- ITERATIONS.times do |i|
94
- @iterations=i+1
95
- puts "Set #{i}" if @verbose
96
- h = second_derivative(x,y,parameters)
97
- if h.singular?
98
- raise "Hessian is singular!"
99
- end
100
- fd = first_derivative(x,y,parameters)
101
- parameters = parameters-(h.inverse*(fd))
102
-
103
- if @stop_criteria==:parameters
104
- flag=true
105
- k.times do |j|
106
- diff= ( parameters[j,0] - old_parameters[j,0] ) / parameters[j,0]
107
- flag=false if diff.abs >= MIN_DIFF_PARAMETERS
108
- @output.puts "Parameters #{j}: #{diff}" if @verbose
109
- end
110
- if flag
111
- @var_cov_matrix = h.inverse*-1.0
112
- return parameters
113
- end
114
- old_parameters=parameters
115
- else
116
- begin
117
- new_likehood = log_likehood(x,y,parameters)
118
- @output.puts "[#{i}]Log-MLE:#{new_likehood} (Diff:#{(new_likehood-old_likehood) / new_likehood})" if @verbose
119
- if(new_likehood < old_likehood) or ((new_likehood - old_likehood) / new_likehood).abs < MIN_DIFF
120
- @var_cov_matrix = h.inverse*-1.0
121
- #@output.puts "Ok"
122
- break;
123
- end
124
- old_likehood=new_likehood
125
- rescue =>e
126
- puts "#{e}"
127
- #puts "dup"
128
- end
129
- end
130
- end
131
- @parameters=parameters
132
- parameters
133
- end
134
- end
135
- end
136
- end
137
- require 'statsample/mle/normal'
138
- require 'statsample/mle/logit'
139
- require 'statsample/mle/probit'
@@ -1,97 +0,0 @@
1
- module Statsample
2
- module MLE
3
- # Logit MLE estimation.
4
- # See Statsample::Regression for methods to generate a logit regression.
5
- # Usage:
6
- #
7
- # mle=Statsample::MLE::Logit.new
8
- # mle.newton_raphson(x,y)
9
- # beta=mle.parameters
10
- # likehood=mle.likehood(x, y, beta)
11
- # iterations=mle.iterations
12
- #
13
- class Logit < BaseMLE
14
- # F(B'Xi)
15
- def f(b,xi)
16
- p_bx=(xi*b)[0,0]
17
- res=(1.0/(1.0+Math::exp(-p_bx)))
18
- if res==0.0
19
- res=1e-15
20
- elsif res==1.0
21
- res=0.999999999999999
22
- end
23
- res
24
- end
25
- # Likehood for x_i vector, y_i scalar and b parameters
26
- def likehood_i(xi,yi,b)
27
- (f(b,xi)**yi)*((1-f(b,xi))**(1-yi))
28
- end
29
- # Log Likehood for x_i vector, y_i scalar and b parameters
30
- def log_likehood_i(xi,yi,b)
31
- fbx=f(b,xi)
32
- (yi.to_f*Math::log(fbx))+((1.0-yi.to_f)*Math::log(1.0-fbx))
33
- end
34
-
35
- # First derivative of log-likehood function
36
- # x: Matrix (NxM)
37
- # y: Matrix (Nx1)
38
- # p: Matrix (Mx1)
39
- def first_derivative(x,y,p)
40
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
41
- raise "x.columns!=p.rows" if x.column_size!=p.row_size
42
- n = x.row_size
43
- k = x.column_size
44
- fd = Array.new(k)
45
- k.times {|i| fd[i] = [0.0]}
46
- n.times do |i|
47
- row = x.row(i).to_a
48
- value1 = (1-y[i,0]) -p_plus(row,p)
49
- k.times do |j|
50
- fd[j][0] -= value1*row[j]
51
- end
52
- end
53
- Matrix.rows(fd, true)
54
-
55
- end
56
- # Second derivative of log-likehood function
57
- # x: Matrix (NxM)
58
- # y: Matrix (Nx1)
59
- # p: Matrix (Mx1)
60
- def second_derivative(x,y,p2)
61
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
62
- raise "x.columns!=p.rows" if x.column_size!=p2.row_size
63
- n = x.row_size
64
- k = x.column_size
65
- sd = Array.new(k)
66
- k.times do |i|
67
- arr = Array.new(k)
68
- k.times{ |j| arr[j]=0.0}
69
- sd[i] = arr
70
- end
71
- n.times do |i|
72
- row = x.row(i).to_a
73
- p_m = p_minus(row,p2)
74
- k.times do |j|
75
- k.times do |l|
76
- sd[j][l] -= (p_m*(1-p_m)*row[j]*row[l])
77
- end
78
- end
79
- end
80
- Matrix.rows(sd, true)
81
- end
82
-
83
- private
84
- def p_minus(x_row,p)
85
- value = 0.0;
86
- x_row.each_index { |i| value += x_row[i]*p[i,0]}
87
- 1/(1+Math.exp(-value))
88
- end
89
- def p_plus(x_row,p)
90
- value = 0.0;
91
- x_row.each_index { |i| value += x_row[i]*p[i,0]}
92
- 1/(1+Math.exp(value))
93
- end
94
-
95
- end # Logit
96
- end # MLE
97
- end # Statsample
@@ -1,83 +0,0 @@
1
- module Statsample
2
- module MLE
3
- # Normal Distribution MLE estimation.
4
- # Usage:
5
- #
6
- # mle=Statsample::MLE::Normal.new
7
- # mle.newton_raphson(x,y)
8
- # beta=mle.parameters
9
- # likehood=mle.likehood(x,y,beta)
10
- # iterations=mle.iterations
11
-
12
- class Normal < BaseMLE
13
- # Total MLE for given X, Y and B matrices
14
- def log_likehood(x,y,b)
15
- n=x.row_size.to_f
16
- sigma2=b[b.row_size-1,0]
17
- betas=Matrix.columns([b.column(0). to_a[0...b.row_size-1]])
18
- e=y-(x*betas)
19
- last=(1 / (2*sigma2))*e.t*e
20
- (-(n / 2.0) * Math::log(2*Math::PI))-((n / 2.0)*Math::log(sigma2)) - last[0,0]
21
- end
22
- # First derivative for Normal Model.
23
- # p should be [k+1,1], because the last parameter is sigma^2
24
- def first_derivative(x,y,p)
25
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
26
- raise "x.columns+1!=p.rows" if x.column_size+1!=p.row_size
27
-
28
- n = x.row_size
29
- k = x.column_size
30
- b = Array.new(k)
31
- k.times{|i| b[i]=[p[i,0]]}
32
- beta = Matrix.rows(b)
33
- sigma2 = p[k,0]
34
- sigma4=sigma2*sigma2
35
- e = y-(x*(beta))
36
- xte = x.transpose*(e)
37
- ete = e.transpose*(e)
38
- #rows of the Jacobian
39
- rows = Array.new(k+1)
40
- k.times{|i| rows[i] = [xte[i,0] / sigma2]}
41
- rows[k] = [ete[0,0] / (2*sigma4) - n / (2*sigma2)]
42
- Matrix.rows(rows, true)
43
- end
44
-
45
- # second derivative for normal model
46
- # p should be [k+1,1], because the last parameter is sigma^2
47
- def second_derivative(x,y,p)
48
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
49
- raise "x.columns+1!=p.rows" if x.column_size+1!=p.row_size
50
-
51
- #n = x.row_size
52
- k = x.column_size
53
- b = Array.new(k)
54
- k.times{|i| b[i]=[p[i,0]]}
55
- beta = Matrix.rows(b)
56
- sigma2 = p[k,0]
57
- sigma4=sigma2*sigma2
58
- sigma6 = sigma2*sigma2*sigma2
59
- e = y-(x*(beta))
60
- xtx = x.transpose*(x)
61
- xte = x.transpose*(e)
62
- ete = e.transpose*(e)
63
- #rows of the Hessian
64
- rows = Array.new(k+1)
65
- k.times do |i|
66
- row = Array.new(k+1)
67
- k.times do |j|
68
- row[j] = -xtx[i,j] / sigma2
69
- end
70
- row[k] = -xte[i,0] / sigma4
71
- rows[i] = row
72
- end
73
- last_row = Array.new(k+1)
74
- k.times do |i|
75
- last_row[i] = -xte[i,0] / sigma4
76
- end
77
- last_row[k] = 2*sigma4 - ete[0,0] / sigma6
78
- rows[k] = last_row
79
- Matrix.rows(rows, true)
80
- end
81
- end
82
- end
83
- end
@@ -1,93 +0,0 @@
1
- module Statsample
2
- module MLE
3
- # Probit MLE estimation.
4
- # See Statsample::Regression for methods to generate a probit regression.
5
- #
6
- # == Usage:
7
- #
8
- # mle=Statsample::MLE::Probit.new
9
- # mle.newton_raphson(x,y)
10
- # beta=mle.parameters
11
- # likehood=mle.likehood(x,y,beta)
12
- # iterations=mle.iterations
13
- class Probit < BaseMLE
14
- # F(B'Xi)
15
- if Statsample.has_gsl?
16
- # F(B'Xi)
17
- def f(b,x)
18
- p_bx=(x*b)[0,0]
19
- GSL::Cdf::ugaussian_P(p_bx)
20
- end
21
- # f(B'Xi)
22
- def ff(b,x)
23
- p_bx=(x*b)[0,0]
24
- GSL::Ran::ugaussian_pdf(p_bx)
25
- end
26
- else
27
- def f(b,x) #:nodoc:
28
- p_bx=(x*b)[0,0]
29
- Distribution::Normal.cdf(p_bx)
30
- end
31
- def ff(b,x) #:nodoc:
32
- p_bx=(x*b)[0,0]
33
- Distribution::Normal.pdf(p_bx)
34
- end
35
- end
36
- # Log Likehood for x_i vector, y_i scalar and b parameters
37
- def log_likehood_i(xi,yi,b)
38
- fbx=f(b,xi)
39
- (yi.to_f*Math::log(fbx))+((1.0-yi.to_f)*Math::log(1.0-fbx))
40
- end
41
- # First derivative of log-likehood probit function
42
- # x: Matrix (NxM)
43
- # y: Matrix (Nx1)
44
- # p: Matrix (Mx1)
45
- def first_derivative(x,y,b)
46
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
47
- raise "x.columns!=p.rows" if x.column_size!=b.row_size
48
- n = x.row_size
49
- k = x.column_size
50
- fd = Array.new(k)
51
- k.times {|i| fd[i] = [0.0]}
52
- n.times do |i|
53
- xi = Matrix.rows([x.row(i).to_a])
54
- fbx=f(b,xi)
55
- value1 = (y[i,0]-fbx)/ ( fbx*(1-fbx))*ff(b,xi)
56
- k.times do |j|
57
- fd[j][0] += value1*xi[0,j]
58
- end
59
- end
60
- Matrix.rows(fd, true)
61
- end
62
- # Second derivative of log-likehood probit function
63
- # x: Matrix (NxM)
64
- # y: Matrix (Nx1)
65
- # p: Matrix (Mx1)
66
-
67
- def second_derivative(x,y,b)
68
- raise "x.rows!=y.rows" if x.row_size!=y.row_size
69
- raise "x.columns!=p.rows" if x.column_size!=b.row_size
70
- n = x.row_size
71
- k = x.column_size
72
- if Statsample.has_gsl?
73
- sum=GSL::Matrix.zeros(k)
74
- else
75
- sum=Matrix.zero(k)
76
- end
77
- n.times do |i|
78
- xi=Matrix.rows([x.row(i).to_a])
79
- fbx=f(b,xi)
80
- val=((ff(b,xi)**2) / (fbx*(1.0-fbx)))*xi.t*xi
81
- if Statsample.has_gsl?
82
- val=val.to_gsl
83
- end
84
- sum-=val
85
- end
86
- if Statsample.has_gsl?
87
- sum=sum.to_matrix
88
- end
89
- sum
90
- end
91
- end # Probit
92
- end # MLE
93
- end # Statsample
@@ -1,72 +0,0 @@
1
- module Statsample
2
- module Regression
3
- module Binomial
4
- # Base Engine for binomial regression analysis.
5
- # Use Statsample::Regression.logit and Statsample::Regression.probit
6
- # for fast access methods.
7
- #
8
- # == Usage:
9
- # dataset=Statsample::CSV.read("data.csv")
10
- # y="y"
11
- # model=Statsample::MLE::Logit.new
12
- # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
13
- class BaseEngine
14
- attr_reader :log_likehood, :iterations
15
- # Parameters
16
- # * ds: Dataset
17
- # * y_var: Name of dependent variable
18
- # * model: One of Statsample::Regression::Binomial classes
19
- def initialize(ds,y_var,model)
20
- @ds=ds
21
- @y_var=y_var
22
- @dy=@ds[@y_var]
23
- @ds_indep=ds.dup(ds.fields-[y_var])
24
- constant=([1.0]*ds.cases).to_vector(:scale)
25
- @ds_indep.add_vector("_constant",constant)
26
- mat_x=@ds_indep.to_matrix
27
- mat_y=@dy.to_matrix(:vertical)
28
- @fields=@ds_indep.fields
29
- @model=model
30
- coeffs=model.newton_raphson(mat_x, mat_y)
31
- @coeffs=assign_names(coeffs.column(0).to_a)
32
- @iterations=model.iterations
33
- @var_cov_matrix=model.var_cov_matrix
34
- @log_likehood=model.log_likehood(mat_x, mat_y, coeffs)
35
- end # init
36
- # Coefficients standard error
37
- def coeffs_se
38
- out={}
39
- @fields.each_index{|i|
40
- f=@fields[i]
41
- out[f]=Math::sqrt(@var_cov_matrix[i,i])
42
- }
43
- out.delete("_constant")
44
- out
45
- end
46
- # Value of constant on regression
47
- def constant
48
- @coeffs['_constant']
49
- end
50
- # Constant standard error
51
- def constant_se
52
- i=@fields.index "_constant"
53
- Math::sqrt(@var_cov_matrix[i,i])
54
- end
55
- # Regression coefficients
56
- def coeffs
57
- c=@coeffs.dup
58
- c.delete("_constant")
59
- c
60
- end
61
-
62
- def assign_names(c) # :nodoc:
63
- a={}
64
- @fields.each_index do |i|
65
- a[@fields[i]]=c[i]
66
- end
67
- a
68
- end
69
- end # Base Engine
70
- end # Binomial
71
- end # Regression
72
- end # Stasample