statsample-sem 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Binary file
@@ -0,0 +1,3 @@
1
+ === 0.1.0 / 2010-07-03
2
+
3
+ * First operational version, with 'sem' and 'OpenMx' R's libraries.
@@ -0,0 +1,18 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/statsample_sem
6
+ example/normal_vs_saturated.rb
7
+ lib/statsample/sem.rb
8
+ lib/statsample/sem/model.rb
9
+ lib/statsample/sem/openmxengine.rb
10
+ lib/statsample/sem/semjfoxengine.rb
11
+ spec/fixtures/demo_open_mx.csv
12
+ spec/fixtures/demo_open_mx.ds
13
+ spec/spec.opts
14
+ spec/spec_helper.rb
15
+ spec/statsample_sem_model_spec.rb
16
+ spec/statsample_sem_openmxengine_spec.rb
17
+ spec/statsample_sem_semjfoxengine_spec.rb
18
+ spec/statsample_sem_spec.rb
@@ -0,0 +1,56 @@
1
+ = statsample-sem
2
+
3
+ * http://ruby-statsample.rubyforge.org/
4
+
5
+ == DESCRIPTION:
6
+
7
+ Structural equation modeling (SEM) for statsample gem, usign ruby and R
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * Abstract generation of models. You could use R packages sem or OpenMx only changing the configuration for engine
12
+ * Generates visual representations of output models using GraphViz (to implement)
13
+
14
+ == SYNOPSIS:
15
+
16
+ require 'statsample'
17
+ require 'statsample/sem'
18
+ require 'matrix'
19
+ matrix=Matrix[[0.1985443,0.1999953,0.2311884,0.2783865,0.3155943],
20
+ [0.1999953,0.2916950,0.2924566,0.3515298,0.4019234],
21
+ [0.2311884,0.2924566,0.3740354,0.4061291,0.4573587],
22
+ [0.2783865,0.3515298,0.4061291,0.5332788,0.5610769],
23
+ [0.3155943,0.4019234,0.4573587,0.5610769,0.6703023]]
24
+
25
+ cases=500
26
+ sem1=Statsample::SEM.new do |m|
27
+ m.data_from_matrix(matrix,:cases=>cases)
28
+ m.variables=%w{x1 x2 x3 x4 x5} # Variables on matrix
29
+ m.manifests %w{x1 x2 x3 x4 x5}
30
+ m.latents %w{G}
31
+ m.path :from=>m.latents, :to=>m.manifests
32
+ m.path :from=>m.manifests
33
+ m.path :from=>m.latents, :free=>false, :values=>1.0
34
+ end
35
+ sem1.compute
36
+ puts sem1.chi_square
37
+
38
+ sem2=sem1.dup
39
+ sem2.make_null # Generate a null model
40
+ sem2.compute
41
+ puts sem2.chi_square
42
+ puts sem2.df
43
+
44
+
45
+ == REQUIREMENTS:
46
+
47
+ * statsample
48
+ * R with sem and/or OpenMx packages installed
49
+
50
+ == INSTALL:
51
+
52
+ * sudo gem install statsample-sem
53
+
54
+ == LICENSE:
55
+
56
+ GPL-2
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+ $:.unshift(File.dirname(__FILE__)+"/lib")
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'statsample/sem'
6
+ Hoe.plugin :git
7
+
8
+
9
+ Hoe.spec 'statsample-sem' do
10
+ self.version=Statsample::SEM::VERSION
11
+ self.rubyforge_name = 'ruby-statsample'
12
+ self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
13
+ self.extra_deps << ["statsample","~>0.13.1"] << ["rserve-client", "~>0.2.0"] << ['dirty-memoize']
14
+ end
15
+
16
+ # vim: syntax=ruby
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ abort "you need to write me"
@@ -0,0 +1,36 @@
1
+ $:.unshift(File.dirname(__FILE__)+"/../lib/")
2
+ require 'statsample'
3
+ require 'statsample/sem'
4
+ require 'matrix'
5
+
6
+
7
+ matrix=Matrix[[0.1985443,0.1999953,0.2311884,0.2783865,0.3155943],
8
+ [0.1999953,0.2916950,0.2924566,0.3515298,0.4019234],
9
+ [0.2311884,0.2924566,0.3740354,0.4061291,0.4573587],
10
+ [0.2783865,0.3515298,0.4061291,0.5332788,0.5610769],
11
+ [0.3155943,0.4019234,0.4573587,0.5610769,0.6703023]]
12
+
13
+
14
+ cases=500
15
+ sem1=Statsample::SEM.new do |m|
16
+ m.data_from_matrix(matrix,:cases=>cases)
17
+ m.variables=%w{x1 x2 x3 x4 x5}
18
+ m.manifests %w{x1 x2 x3 x4 x5}
19
+ m.latents %w{G}
20
+ m.path :from=>m.latents, :to=>m.manifests
21
+ m.path :from=>m.manifests
22
+ m.path :from=>m.latents, :free=>false, :values=>1.0
23
+
24
+ end
25
+ sem1.compute
26
+ puts sem1.chi_square
27
+
28
+ sem2=sem1.dup
29
+ sem2.make_null
30
+ sem2.compute
31
+
32
+
33
+
34
+ puts sem2.chi_square
35
+ puts sem2.df
36
+
@@ -0,0 +1,46 @@
1
+ require 'rserve'
2
+ require 'dirty-memoize'
3
+ require 'forwardable'
4
+ require 'statsample'
5
+ require 'statsample/rserve_extension'
6
+ require 'statsample/sem/model'
7
+ require 'statsample/sem/openmxengine'
8
+ require 'statsample/sem/semjfoxengine'
9
+
10
+ module Statsample
11
+ class SEM
12
+ extend Forwardable
13
+ VERSION='0.1.0'
14
+ attr_accessor :name
15
+ attr_accessor :engine
16
+ def_delegators :@model, :path, :data_from_matrix, :data_from_dataset, :manifests, :manifests=, :latents, :latents=, :variables=, :make_null
17
+ def_delegators :@engine_obj, :chi_square, :df
18
+ def initialize(opts=Hash.new, &block)
19
+ default_opts={:name=>"SEM Analysis", :engine=>:sem}
20
+ @opts=default_opts.merge(opts)
21
+ @engine_obj=nil
22
+ @name=@opts.delete :name
23
+ @engine=@opts.delete :engine
24
+ @model=Statsample::SEM::Model.new(:name=>@name)
25
+ if block
26
+ block.arity<1 ? instance_eval(&block) : block.call(self)
27
+ end
28
+ end
29
+ def compute
30
+ @engine_obj=case @engine
31
+ when :openmx
32
+ OpenMxEngine.new(@model)
33
+ when :sem
34
+ SemJFoxEngine.new(@model)
35
+ end
36
+ @engine_obj.compute
37
+ @engine_obj
38
+ end
39
+ def engine_obj
40
+ @engine_obj||=compute
41
+ end
42
+ def r_summary
43
+ engine_obj.r_summary
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,244 @@
1
+ module Statsample
2
+ class SEM
3
+ class Model
4
+ include Summarizable
5
+ # Type of data used. Could be +:covariance+, +:correlation+ or +:raw+
6
+ attr_reader :data_type
7
+ # Covariance/correlation matrix
8
+ attr_reader :matrix
9
+ # Raw data on a dataset
10
+ attr_reader :ds
11
+ # Number of cases
12
+ attr_accessor :cases
13
+ # Optional array of mean for use when data is matrix based
14
+ attr_accessor :means
15
+ # Name of variables
16
+ attr_reader :variables
17
+
18
+ attr_reader :paths
19
+ def initialize(opts=Hash.new, &block)
20
+ raise ArgumentError,"opts should be a Hash" if !opts.is_a? Hash
21
+ default_opts={:name=>_("SEM Model")}
22
+ @opts=default_opts.merge opts
23
+ @paths=Hash.new
24
+ @manifests=nil
25
+ @latents=nil
26
+ if block
27
+ block.arity<1 ? self.instance_eval(&block) : block.call(self)
28
+ end
29
+ end
30
+ def dup
31
+ mod=Model.new(@opts)
32
+ if @data_type==:raw
33
+ mod.data_from_dataset(@ds)
34
+ else
35
+ mod.data_from_matrix(@matrix,:type=>@data_type,:cases=>@cases, :means=>@means)
36
+ end
37
+ mod.latents=latents
38
+ mod.manifests=manifests
39
+ if @paths
40
+ @paths.each_pair do |k,v|
41
+ mod.add_raw_path(k[0],k[1],v[:arrow],v[:label],v[:free], v[:value])
42
+ end
43
+ end
44
+ mod
45
+ end
46
+ def make_null
47
+ # Get current variables
48
+ vars=variables_on_paths
49
+ @paths.clear
50
+ vars.each do |v|
51
+ if @variables.include? v
52
+ free=true
53
+ value=nil
54
+ else
55
+ free=false
56
+ value=1.0
57
+ end
58
+ add_raw_path(v,v,2,"var #{v}",free,value)
59
+ end
60
+ end
61
+ # True if model have enough information to process it
62
+ def complete?
63
+ !@data_type.nil? and !@manifests.nil? and !@latents.nil? and @paths.size>0
64
+ end
65
+ def add_raw_path(f1,f2,arrows,label,free,value)
66
+ @paths[[f1,f2].sort]={ :from=>f1, :to=>f2, :arrow=>arrows, :label=>label, :free=>free, :value=>value}
67
+ end
68
+ def add_path(f1,f2,arrows,free,values,labels,i)
69
+ arrow_s = (arrows==1) ? "to":"cov"
70
+ raise "Path repeated : #{f1},#{f2}" if @paths.has_key? [[f1,f2].sort]
71
+ label= (labels.nil? or !labels.respond_to?:[] or labels[i].nil?) ? "#{f1} #{arrow_s} #{f2}" : labels[i]
72
+
73
+ free_v = (free.is_a? Array) ? free[i] : free
74
+ if values.is_a? Array
75
+ value= values[i].nil? ? nil : values[i]
76
+ elsif values.is_a? Numeric
77
+ value=values
78
+ end
79
+
80
+ value = nil if free_v
81
+
82
+ @paths[[f1,f2].sort]={ :from=>f1, :to=>f2, :arrow=>arrows, :label=>label, :free=>free_v, :value=>value}
83
+ end
84
+ # Set one or more paths. Based on OpenMx mxPath method.
85
+ #
86
+ # ==Options:
87
+ # * +:from+ : String or Array. sources of new paths
88
+ # * +:to+ : String or Array. sinks of new paths
89
+ # * +:all+ : bool. If you, connect all sources to all sinks. If false,
90
+ # connect one-on-one sources to sinks if both are arrays, one source to
91
+ # many sinks if +:from+ is a String and +:to+ is an Array
92
+ # * +:arrows+ : 1 for regression, 2 for variance-covariance. See rules
93
+ # for specific automatic setting
94
+ # * +:free+ : Indicates whether paths are free or fixed. By default, true
95
+ # * +:values+ : The starting values of the parameters
96
+ # * +:labels+ : The names of paths
97
+ # ==Rules
98
+ # * from : variance-> from and to equal, label equal to "s^2 NAME_OF_FIELD", arrows=2, free=>true
99
+ # * from and to: regression -> label equal to "FROM->TO", arrows=1, free=>true
100
+ # * from, to, arrows -> label equal to "FROM->TO" if arrows=1,
101
+ # "FROM<->TO" if arrows=2, free=>true
102
+ # * free=false -> requires values for each from - to value
103
+
104
+ def path(opts)
105
+ raise "Requires at least :from option" unless opts.has_key? :from
106
+
107
+ free=true
108
+ all=false
109
+
110
+ from=opts[:from]
111
+ to=opts[:to]
112
+
113
+ all=opts[:all] if opts.has_key? :all
114
+ free=opts[:free] if opts.has_key? :free
115
+
116
+ labels=opts[:labels]
117
+ arrows=opts[:arrows]
118
+
119
+ values=opts[:values]
120
+ from=[from] if from.is_a? String
121
+ to||=from
122
+ to=[to] if to.is_a? String
123
+
124
+ if from==to # variances
125
+ arrows||=2
126
+ labels_2=Array.new(from.size)
127
+
128
+ from.each_with_index do |f,i|
129
+ labels_2[i]=(labels.nil? or !labels.respond_to?:[] or labels[i].nil?) ? "var #{f}" : labels[i]
130
+ end
131
+
132
+ from.each_with_index do |f,i|
133
+ add_path(f,f,arrows,free,values, labels_2,i)
134
+ end
135
+ else # regression and covariances
136
+ arrows||=1
137
+ i=0
138
+ all=true if from.size==1 or to.size==1
139
+ if all
140
+ from.each do |f1|
141
+ to.each do |f2|
142
+ add_path(f1,f2,arrows,free,values,labels,i)
143
+ i+=1
144
+ end
145
+ end
146
+ else
147
+ raise ":from and :to should be the same size" if from.size!=to.size
148
+ from.size.times.each do |i|
149
+ add_path(from[i],to[i],arrows,free,values,labels,i)
150
+ end
151
+
152
+ end
153
+
154
+ end
155
+
156
+
157
+ end
158
+ def variables=(v)
159
+ raise ArgumentError, "Should be size=#{@variables.row_size}" if @data_type!=:raw and v.size!=@matrix.row_size
160
+ @variables=v.map {|i| i.to_s}
161
+ end
162
+ # Total number of variables
163
+ def variables_on_paths
164
+ out=Array.new
165
+ @paths.keys.each {|k|
166
+ out << k[0] unless out.include? k[0]
167
+ out << k[1] unless out.include? k[1]
168
+ }
169
+ out
170
+ end
171
+ # Number of variables on model
172
+ def k
173
+ variables_on_paths.size
174
+ end
175
+ # Latents will be any variable set on a path not present
176
+ # on @variables
177
+ def infer_latents
178
+ variables_on_paths-@variables
179
+ end
180
+ # Latens will be any variable set on path present on @variables
181
+ def infer_manifests
182
+ variables_on_paths & @variables
183
+ end
184
+ def get_label(key)
185
+ @paths[key][:label]
186
+ end
187
+ def latents(*argv)
188
+ if argv.size==0
189
+ if @latents.nil? and @paths.size>0 and !@data_type.nil?
190
+ @latents=infer_latents
191
+ end
192
+ @latents
193
+ elsif argv[0].is_a? Array
194
+ @latents=argv[0]
195
+ else
196
+ @latents=[argv]
197
+ end
198
+ end
199
+ def latents=(argv)
200
+ @latents=argv
201
+ end
202
+
203
+ def manifests(*argv)
204
+ if argv.size==0
205
+ if @manifests.nil? and @paths.size>0 and !@data_type.nil?
206
+ @manifests=infer_manifests
207
+ end
208
+ @manifests
209
+ elsif argv[0].is_a? Array
210
+ @manifests=argv[0]
211
+ else
212
+ @manifests=[argv]
213
+ end
214
+ end
215
+ def manifests=(argv)
216
+ @manifests=argv
217
+ end
218
+ def data_from_dataset(ds)
219
+ @data_type=:raw
220
+ @ds=ds
221
+ @variables=@ds.fields
222
+ end
223
+
224
+ def data_from_matrix(matrix,opts=Hash.new)
225
+ type = opts[:type]
226
+ type||=(matrix.respond_to? :type) ? matrix.type : :covariance
227
+ variable_names = opts[:variable_names]
228
+ cases = opts[:cases]
229
+ means = opts[:means]
230
+ raise "You should set number of cases" if cases.nil?
231
+
232
+ @data_type= (type==:covariance) ? :covariance : :correlation
233
+ @matrix=matrix
234
+ @cases=cases
235
+ @means=means
236
+ if variable_names.nil?
237
+ @variables=@matrix.fields if @matrix.respond_to? :fields
238
+ else
239
+ @variables=variable_names
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,144 @@
1
+ require 'rserve'
2
+ require 'tempfile'
3
+ module Statsample
4
+ class SEM
5
+ class OpenMxEngine
6
+ include DirtyMemoize
7
+ include Summarizable
8
+ attr_accessor :summarizable
9
+ attr_accessor :name
10
+ attr_reader :r_summary
11
+ def initialize(model,opts=Hash.new)
12
+ @model=model
13
+ defaults = {
14
+ :name=>_("SEM analysis using OpenMx")
15
+ }
16
+ @opts=defaults.merge defaults
17
+ @name=@opts[:name]
18
+ end
19
+ def r
20
+ @r||=Rserve::Connection.new
21
+ end
22
+ def r_mxpaths
23
+ @model.paths.values.map {|path|
24
+ value= path[:value] ? ", values = #{path[:value]}":""
25
+ label= path[:label] ? ", labels = \"#{path[:label]}\"" : ""
26
+ "mxPath(from=c(\"#{path[:from]}\"), to=c(\"#{path[:to]}\"), arrows = #{path[:arrow]}, free = #{path[:free] ? "TRUE" : "FALSE"} #{value} #{label})"
27
+ }.join(",\n")
28
+ end
29
+ def r_mxdata
30
+ type=case @model.data_type
31
+ when :raw
32
+ 'raw'
33
+ when :covariance
34
+ 'cov'
35
+ when :correlation
36
+ 'cor'
37
+ end
38
+ means=(@model.data_type!=:raw and !@model.means.nil?) ? ", means = d_means " : ""
39
+ num=(@model.data_type!=:raw) ? ", numObs = #{@model.cases} " : ""
40
+
41
+ "mxData(observed=data, type='#{type}' #{means} #{num})"
42
+ end
43
+
44
+ def r_query
45
+ <<-EOF
46
+ library(OpenMx);
47
+ factorModel <- mxModel(
48
+ name="#{name}",
49
+ type="RAM",
50
+ manifestVars = manifests,
51
+ latentVars = latents,
52
+ #{r_mxpaths},
53
+ #{r_mxdata}
54
+ );
55
+ factorFit<-mxRun(factorModel);
56
+ rm(data,manifests,latents,d_means);
57
+ EOF
58
+ #p r.eval('factorFit').to_ruby
59
+ end
60
+
61
+ def compute
62
+ raise "Insuficient information" unless @model.complete?
63
+ r.assign 'data', @model.data_type==:raw ? @model.ds : @model.matrix
64
+ if @model.matrix
65
+ r.assign 'vn', @model.variables
66
+ # We should assing names to fields on matrix
67
+ r.void_eval('dimnames(data)<-list(vn,vn)')
68
+ end
69
+ r.assign 'manifests',@model.manifests
70
+ r.assign 'latents', @model.latents
71
+ r.assign 'd_means',@model.means unless @model.means.nil?
72
+ r.void_eval r_query
73
+ @r_summary=@r.eval('summary(factorFit)').to_ruby
74
+ true
75
+ end
76
+ def graphviz
77
+ compute if @r_summary.nil?
78
+ tf=Tempfile.new('model.dot')
79
+ r.void_eval("omxGraphviz(factorModel,'#{tf.path}')")
80
+ # tf.close
81
+ # tf.open
82
+ tf.read
83
+ end
84
+ def chi_square
85
+ @r_summary['Chi']
86
+ end
87
+ def df
88
+ @r_summary['degreesOfFreedom']
89
+ end
90
+ def chi_square_null
91
+ null_model.r_summary['Chi']
92
+ end
93
+ def df_null
94
+ null_model.r_summary['degreesOfFreedom']
95
+ end
96
+
97
+ def null_model
98
+ @null_model||=compute_null_model
99
+ end
100
+ def compute_null_model #:nodoc:
101
+ nm=@model.dup
102
+ nm.make_null
103
+ (self.class).new(nm,@opts)
104
+ end
105
+ def rmsea
106
+ @r_summary['RMSEA']
107
+ end
108
+ # [χ2(Null Model) - χ2(Proposed Model)]/ [χ2(Null Model)]
109
+ def nfi
110
+ (chi_square_null-chi_square).quo(chi_square_null)
111
+ end
112
+
113
+ # [χ2/df(Null Model) - χ2/df(Proposed Model)]/[χ2/df(Null Model) - 1]
114
+ def nnfi
115
+ (chi_square_null.quo(df_null) - chi_square.quo(df)).quo(chi_square_null.quo(df_null)-1)
116
+ end
117
+ def cfi
118
+ d_null=chi_square_null-df_null
119
+ ((d_null)-(chi_square-df)).quo(d_null)
120
+ end
121
+ def bic
122
+ k=@model.k
123
+ p k
124
+ ln_n=Math.log(@model.cases)
125
+ chi_square+((k*(k-1).quo(2)) - df)*ln_n
126
+ end
127
+ def coefficients
128
+ est=Hash.new
129
+ coeffs=@r_summary['parameters']
130
+ # 0:name, 1:matrix, 2:row, 3:col, 4:estimate, 5:Std.error
131
+ coeffs[0].each_with_index do |v,i|
132
+ f1=coeffs[2][i]
133
+ f2=coeffs[3][i]
134
+ key=[f1,f2].sort
135
+ est[key]={:estimate=>coeffs[4][i], :se=>coeffs[5][i], :z=>nil, :p=>nil, :label=>v}
136
+ end
137
+ est
138
+
139
+ end
140
+ dirty_memoize :chi_square, :df, :rmsea, :coefficients, :r_summary, :chi_square_null, :df_null, :nfi, :nnfi
141
+
142
+ end
143
+ end
144
+ end