statsample-sem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -0,0 +1,3 @@
1
+ === 0.1.0 / 2010-07-03
2
+
3
+ * First operational version, with 'sem' and 'OpenMx' R's libraries.
@@ -0,0 +1,18 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/statsample_sem
6
+ example/normal_vs_saturated.rb
7
+ lib/statsample/sem.rb
8
+ lib/statsample/sem/model.rb
9
+ lib/statsample/sem/openmxengine.rb
10
+ lib/statsample/sem/semjfoxengine.rb
11
+ spec/fixtures/demo_open_mx.csv
12
+ spec/fixtures/demo_open_mx.ds
13
+ spec/spec.opts
14
+ spec/spec_helper.rb
15
+ spec/statsample_sem_model_spec.rb
16
+ spec/statsample_sem_openmxengine_spec.rb
17
+ spec/statsample_sem_semjfoxengine_spec.rb
18
+ spec/statsample_sem_spec.rb
@@ -0,0 +1,56 @@
1
+ = statsample-sem
2
+
3
+ * http://ruby-statsample.rubyforge.org/
4
+
5
+ == DESCRIPTION:
6
+
7
+ Structural equation modeling (SEM) for statsample gem, usign ruby and R
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * Abstract generation of models. You could use R packages sem or OpenMx only changing the configuration for engine
12
+ * Generates visual representations of output models using GraphViz (to implement)
13
+
14
+ == SYNOPSIS:
15
+
16
+ require 'statsample'
17
+ require 'statsample/sem'
18
+ require 'matrix'
19
+ matrix=Matrix[[0.1985443,0.1999953,0.2311884,0.2783865,0.3155943],
20
+ [0.1999953,0.2916950,0.2924566,0.3515298,0.4019234],
21
+ [0.2311884,0.2924566,0.3740354,0.4061291,0.4573587],
22
+ [0.2783865,0.3515298,0.4061291,0.5332788,0.5610769],
23
+ [0.3155943,0.4019234,0.4573587,0.5610769,0.6703023]]
24
+
25
+ cases=500
26
+ sem1=Statsample::SEM.new do |m|
27
+ m.data_from_matrix(matrix,:cases=>cases)
28
+ m.variables=%w{x1 x2 x3 x4 x5} # Variables on matrix
29
+ m.manifests %w{x1 x2 x3 x4 x5}
30
+ m.latents %w{G}
31
+ m.path :from=>m.latents, :to=>m.manifests
32
+ m.path :from=>m.manifests
33
+ m.path :from=>m.latents, :free=>false, :values=>1.0
34
+ end
35
+ sem1.compute
36
+ puts sem1.chi_square
37
+
38
+ sem2=sem1.dup
39
+ sem2.make_null # Generate a null model
40
+ sem2.compute
41
+ puts sem2.chi_square
42
+ puts sem2.df
43
+
44
+
45
+ == REQUIREMENTS:
46
+
47
+ * statsample
48
+ * R with sem and/or OpenMx packages installed
49
+
50
+ == INSTALL:
51
+
52
+ * sudo gem install statsample-sem
53
+
54
+ == LICENSE:
55
+
56
+ GPL-2
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+ $:.unshift(File.dirname(__FILE__)+"/lib")
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require 'statsample/sem'
6
+ Hoe.plugin :git
7
+
8
+
9
+ Hoe.spec 'statsample-sem' do
10
+ self.version=Statsample::SEM::VERSION
11
+ self.rubyforge_name = 'ruby-statsample'
12
+ self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
13
+ self.extra_deps << ["statsample","~>0.13.1"] << ["rserve-client", "~>0.2.0"] << ['dirty-memoize']
14
+ end
15
+
16
+ # vim: syntax=ruby
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ abort "you need to write me"
@@ -0,0 +1,36 @@
1
+ $:.unshift(File.dirname(__FILE__)+"/../lib/")
2
+ require 'statsample'
3
+ require 'statsample/sem'
4
+ require 'matrix'
5
+
6
+
7
+ matrix=Matrix[[0.1985443,0.1999953,0.2311884,0.2783865,0.3155943],
8
+ [0.1999953,0.2916950,0.2924566,0.3515298,0.4019234],
9
+ [0.2311884,0.2924566,0.3740354,0.4061291,0.4573587],
10
+ [0.2783865,0.3515298,0.4061291,0.5332788,0.5610769],
11
+ [0.3155943,0.4019234,0.4573587,0.5610769,0.6703023]]
12
+
13
+
14
+ cases=500
15
+ sem1=Statsample::SEM.new do |m|
16
+ m.data_from_matrix(matrix,:cases=>cases)
17
+ m.variables=%w{x1 x2 x3 x4 x5}
18
+ m.manifests %w{x1 x2 x3 x4 x5}
19
+ m.latents %w{G}
20
+ m.path :from=>m.latents, :to=>m.manifests
21
+ m.path :from=>m.manifests
22
+ m.path :from=>m.latents, :free=>false, :values=>1.0
23
+
24
+ end
25
+ sem1.compute
26
+ puts sem1.chi_square
27
+
28
+ sem2=sem1.dup
29
+ sem2.make_null
30
+ sem2.compute
31
+
32
+
33
+
34
+ puts sem2.chi_square
35
+ puts sem2.df
36
+
@@ -0,0 +1,46 @@
1
+ require 'rserve'
2
+ require 'dirty-memoize'
3
+ require 'forwardable'
4
+ require 'statsample'
5
+ require 'statsample/rserve_extension'
6
+ require 'statsample/sem/model'
7
+ require 'statsample/sem/openmxengine'
8
+ require 'statsample/sem/semjfoxengine'
9
+
10
+ module Statsample
11
+ class SEM
12
+ extend Forwardable
13
+ VERSION='0.1.0'
14
+ attr_accessor :name
15
+ attr_accessor :engine
16
+ def_delegators :@model, :path, :data_from_matrix, :data_from_dataset, :manifests, :manifests=, :latents, :latents=, :variables=, :make_null
17
+ def_delegators :@engine_obj, :chi_square, :df
18
+ def initialize(opts=Hash.new, &block)
19
+ default_opts={:name=>"SEM Analysis", :engine=>:sem}
20
+ @opts=default_opts.merge(opts)
21
+ @engine_obj=nil
22
+ @name=@opts.delete :name
23
+ @engine=@opts.delete :engine
24
+ @model=Statsample::SEM::Model.new(:name=>@name)
25
+ if block
26
+ block.arity<1 ? instance_eval(&block) : block.call(self)
27
+ end
28
+ end
29
+ def compute
30
+ @engine_obj=case @engine
31
+ when :openmx
32
+ OpenMxEngine.new(@model)
33
+ when :sem
34
+ SemJFoxEngine.new(@model)
35
+ end
36
+ @engine_obj.compute
37
+ @engine_obj
38
+ end
39
+ def engine_obj
40
+ @engine_obj||=compute
41
+ end
42
+ def r_summary
43
+ engine_obj.r_summary
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,244 @@
1
+ module Statsample
2
+ class SEM
3
+ class Model
4
+ include Summarizable
5
+ # Type of data used. Could be +:covariance+, +:correlation+ or +:raw+
6
+ attr_reader :data_type
7
+ # Covariance/correlation matrix
8
+ attr_reader :matrix
9
+ # Raw data on a dataset
10
+ attr_reader :ds
11
+ # Number of cases
12
+ attr_accessor :cases
13
+ # Optional array of mean for use when data is matrix based
14
+ attr_accessor :means
15
+ # Name of variables
16
+ attr_reader :variables
17
+
18
+ attr_reader :paths
19
+ def initialize(opts=Hash.new, &block)
20
+ raise ArgumentError,"opts should be a Hash" if !opts.is_a? Hash
21
+ default_opts={:name=>_("SEM Model")}
22
+ @opts=default_opts.merge opts
23
+ @paths=Hash.new
24
+ @manifests=nil
25
+ @latents=nil
26
+ if block
27
+ block.arity<1 ? self.instance_eval(&block) : block.call(self)
28
+ end
29
+ end
30
+ def dup
31
+ mod=Model.new(@opts)
32
+ if @data_type==:raw
33
+ mod.data_from_dataset(@ds)
34
+ else
35
+ mod.data_from_matrix(@matrix,:type=>@data_type,:cases=>@cases, :means=>@means)
36
+ end
37
+ mod.latents=latents
38
+ mod.manifests=manifests
39
+ if @paths
40
+ @paths.each_pair do |k,v|
41
+ mod.add_raw_path(k[0],k[1],v[:arrow],v[:label],v[:free], v[:value])
42
+ end
43
+ end
44
+ mod
45
+ end
46
+ def make_null
47
+ # Get current variables
48
+ vars=variables_on_paths
49
+ @paths.clear
50
+ vars.each do |v|
51
+ if @variables.include? v
52
+ free=true
53
+ value=nil
54
+ else
55
+ free=false
56
+ value=1.0
57
+ end
58
+ add_raw_path(v,v,2,"var #{v}",free,value)
59
+ end
60
+ end
61
+ # True if model have enough information to process it
62
+ def complete?
63
+ !@data_type.nil? and !@manifests.nil? and !@latents.nil? and @paths.size>0
64
+ end
65
+ def add_raw_path(f1,f2,arrows,label,free,value)
66
+ @paths[[f1,f2].sort]={ :from=>f1, :to=>f2, :arrow=>arrows, :label=>label, :free=>free, :value=>value}
67
+ end
68
+ def add_path(f1,f2,arrows,free,values,labels,i)
69
+ arrow_s = (arrows==1) ? "to":"cov"
70
+ raise "Path repeated : #{f1},#{f2}" if @paths.has_key? [[f1,f2].sort]
71
+ label= (labels.nil? or !labels.respond_to?:[] or labels[i].nil?) ? "#{f1} #{arrow_s} #{f2}" : labels[i]
72
+
73
+ free_v = (free.is_a? Array) ? free[i] : free
74
+ if values.is_a? Array
75
+ value= values[i].nil? ? nil : values[i]
76
+ elsif values.is_a? Numeric
77
+ value=values
78
+ end
79
+
80
+ value = nil if free_v
81
+
82
+ @paths[[f1,f2].sort]={ :from=>f1, :to=>f2, :arrow=>arrows, :label=>label, :free=>free_v, :value=>value}
83
+ end
84
+ # Set one or more paths. Based on OpenMx mxPath method.
85
+ #
86
+ # ==Options:
87
+ # * +:from+ : String or Array. sources of new paths
88
+ # * +:to+ : String or Array. sinks of new paths
89
+ # * +:all+ : bool. If you, connect all sources to all sinks. If false,
90
+ # connect one-on-one sources to sinks if both are arrays, one source to
91
+ # many sinks if +:from+ is a String and +:to+ is an Array
92
+ # * +:arrows+ : 1 for regression, 2 for variance-covariance. See rules
93
+ # for specific automatic setting
94
+ # * +:free+ : Indicates whether paths are free or fixed. By default, true
95
+ # * +:values+ : The starting values of the parameters
96
+ # * +:labels+ : The names of paths
97
+ # ==Rules
98
+ # * from : variance-> from and to equal, label equal to "s^2 NAME_OF_FIELD", arrows=2, free=>true
99
+ # * from and to: regression -> label equal to "FROM->TO", arrows=1, free=>true
100
+ # * from, to, arrows -> label equal to "FROM->TO" if arrows=1,
101
+ # "FROM<->TO" if arrows=2, free=>true
102
+ # * free=false -> requires values for each from - to value
103
+
104
+ def path(opts)
105
+ raise "Requires at least :from option" unless opts.has_key? :from
106
+
107
+ free=true
108
+ all=false
109
+
110
+ from=opts[:from]
111
+ to=opts[:to]
112
+
113
+ all=opts[:all] if opts.has_key? :all
114
+ free=opts[:free] if opts.has_key? :free
115
+
116
+ labels=opts[:labels]
117
+ arrows=opts[:arrows]
118
+
119
+ values=opts[:values]
120
+ from=[from] if from.is_a? String
121
+ to||=from
122
+ to=[to] if to.is_a? String
123
+
124
+ if from==to # variances
125
+ arrows||=2
126
+ labels_2=Array.new(from.size)
127
+
128
+ from.each_with_index do |f,i|
129
+ labels_2[i]=(labels.nil? or !labels.respond_to?:[] or labels[i].nil?) ? "var #{f}" : labels[i]
130
+ end
131
+
132
+ from.each_with_index do |f,i|
133
+ add_path(f,f,arrows,free,values, labels_2,i)
134
+ end
135
+ else # regression and covariances
136
+ arrows||=1
137
+ i=0
138
+ all=true if from.size==1 or to.size==1
139
+ if all
140
+ from.each do |f1|
141
+ to.each do |f2|
142
+ add_path(f1,f2,arrows,free,values,labels,i)
143
+ i+=1
144
+ end
145
+ end
146
+ else
147
+ raise ":from and :to should be the same size" if from.size!=to.size
148
+ from.size.times.each do |i|
149
+ add_path(from[i],to[i],arrows,free,values,labels,i)
150
+ end
151
+
152
+ end
153
+
154
+ end
155
+
156
+
157
+ end
158
+ def variables=(v)
159
+ raise ArgumentError, "Should be size=#{@variables.row_size}" if @data_type!=:raw and v.size!=@matrix.row_size
160
+ @variables=v.map {|i| i.to_s}
161
+ end
162
+ # Total number of variables
163
+ def variables_on_paths
164
+ out=Array.new
165
+ @paths.keys.each {|k|
166
+ out << k[0] unless out.include? k[0]
167
+ out << k[1] unless out.include? k[1]
168
+ }
169
+ out
170
+ end
171
+ # Number of variables on model
172
+ def k
173
+ variables_on_paths.size
174
+ end
175
+ # Latents will be any variable set on a path not present
176
+ # on @variables
177
+ def infer_latents
178
+ variables_on_paths-@variables
179
+ end
180
+ # Latens will be any variable set on path present on @variables
181
+ def infer_manifests
182
+ variables_on_paths & @variables
183
+ end
184
+ def get_label(key)
185
+ @paths[key][:label]
186
+ end
187
+ def latents(*argv)
188
+ if argv.size==0
189
+ if @latents.nil? and @paths.size>0 and !@data_type.nil?
190
+ @latents=infer_latents
191
+ end
192
+ @latents
193
+ elsif argv[0].is_a? Array
194
+ @latents=argv[0]
195
+ else
196
+ @latents=[argv]
197
+ end
198
+ end
199
+ def latents=(argv)
200
+ @latents=argv
201
+ end
202
+
203
+ def manifests(*argv)
204
+ if argv.size==0
205
+ if @manifests.nil? and @paths.size>0 and !@data_type.nil?
206
+ @manifests=infer_manifests
207
+ end
208
+ @manifests
209
+ elsif argv[0].is_a? Array
210
+ @manifests=argv[0]
211
+ else
212
+ @manifests=[argv]
213
+ end
214
+ end
215
+ def manifests=(argv)
216
+ @manifests=argv
217
+ end
218
+ def data_from_dataset(ds)
219
+ @data_type=:raw
220
+ @ds=ds
221
+ @variables=@ds.fields
222
+ end
223
+
224
+ def data_from_matrix(matrix,opts=Hash.new)
225
+ type = opts[:type]
226
+ type||=(matrix.respond_to? :type) ? matrix.type : :covariance
227
+ variable_names = opts[:variable_names]
228
+ cases = opts[:cases]
229
+ means = opts[:means]
230
+ raise "You should set number of cases" if cases.nil?
231
+
232
+ @data_type= (type==:covariance) ? :covariance : :correlation
233
+ @matrix=matrix
234
+ @cases=cases
235
+ @means=means
236
+ if variable_names.nil?
237
+ @variables=@matrix.fields if @matrix.respond_to? :fields
238
+ else
239
+ @variables=variable_names
240
+ end
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,144 @@
1
+ require 'rserve'
2
+ require 'tempfile'
3
+ module Statsample
4
+ class SEM
5
+ class OpenMxEngine
6
+ include DirtyMemoize
7
+ include Summarizable
8
+ attr_accessor :summarizable
9
+ attr_accessor :name
10
+ attr_reader :r_summary
11
+ def initialize(model,opts=Hash.new)
12
+ @model=model
13
+ defaults = {
14
+ :name=>_("SEM analysis using OpenMx")
15
+ }
16
+ @opts=defaults.merge defaults
17
+ @name=@opts[:name]
18
+ end
19
+ def r
20
+ @r||=Rserve::Connection.new
21
+ end
22
+ def r_mxpaths
23
+ @model.paths.values.map {|path|
24
+ value= path[:value] ? ", values = #{path[:value]}":""
25
+ label= path[:label] ? ", labels = \"#{path[:label]}\"" : ""
26
+ "mxPath(from=c(\"#{path[:from]}\"), to=c(\"#{path[:to]}\"), arrows = #{path[:arrow]}, free = #{path[:free] ? "TRUE" : "FALSE"} #{value} #{label})"
27
+ }.join(",\n")
28
+ end
29
+ def r_mxdata
30
+ type=case @model.data_type
31
+ when :raw
32
+ 'raw'
33
+ when :covariance
34
+ 'cov'
35
+ when :correlation
36
+ 'cor'
37
+ end
38
+ means=(@model.data_type!=:raw and !@model.means.nil?) ? ", means = d_means " : ""
39
+ num=(@model.data_type!=:raw) ? ", numObs = #{@model.cases} " : ""
40
+
41
+ "mxData(observed=data, type='#{type}' #{means} #{num})"
42
+ end
43
+
44
+ def r_query
45
+ <<-EOF
46
+ library(OpenMx);
47
+ factorModel <- mxModel(
48
+ name="#{name}",
49
+ type="RAM",
50
+ manifestVars = manifests,
51
+ latentVars = latents,
52
+ #{r_mxpaths},
53
+ #{r_mxdata}
54
+ );
55
+ factorFit<-mxRun(factorModel);
56
+ rm(data,manifests,latents,d_means);
57
+ EOF
58
+ #p r.eval('factorFit').to_ruby
59
+ end
60
+
61
+ def compute
62
+ raise "Insuficient information" unless @model.complete?
63
+ r.assign 'data', @model.data_type==:raw ? @model.ds : @model.matrix
64
+ if @model.matrix
65
+ r.assign 'vn', @model.variables
66
+ # We should assing names to fields on matrix
67
+ r.void_eval('dimnames(data)<-list(vn,vn)')
68
+ end
69
+ r.assign 'manifests',@model.manifests
70
+ r.assign 'latents', @model.latents
71
+ r.assign 'd_means',@model.means unless @model.means.nil?
72
+ r.void_eval r_query
73
+ @r_summary=@r.eval('summary(factorFit)').to_ruby
74
+ true
75
+ end
76
+ def graphviz
77
+ compute if @r_summary.nil?
78
+ tf=Tempfile.new('model.dot')
79
+ r.void_eval("omxGraphviz(factorModel,'#{tf.path}')")
80
+ # tf.close
81
+ # tf.open
82
+ tf.read
83
+ end
84
+ def chi_square
85
+ @r_summary['Chi']
86
+ end
87
+ def df
88
+ @r_summary['degreesOfFreedom']
89
+ end
90
+ def chi_square_null
91
+ null_model.r_summary['Chi']
92
+ end
93
+ def df_null
94
+ null_model.r_summary['degreesOfFreedom']
95
+ end
96
+
97
+ def null_model
98
+ @null_model||=compute_null_model
99
+ end
100
+ def compute_null_model #:nodoc:
101
+ nm=@model.dup
102
+ nm.make_null
103
+ (self.class).new(nm,@opts)
104
+ end
105
+ def rmsea
106
+ @r_summary['RMSEA']
107
+ end
108
+ # [χ2(Null Model) - χ2(Proposed Model)]/ [χ2(Null Model)]
109
+ def nfi
110
+ (chi_square_null-chi_square).quo(chi_square_null)
111
+ end
112
+
113
+ # [χ2/df(Null Model) - χ2/df(Proposed Model)]/[χ2/df(Null Model) - 1]
114
+ def nnfi
115
+ (chi_square_null.quo(df_null) - chi_square.quo(df)).quo(chi_square_null.quo(df_null)-1)
116
+ end
117
+ def cfi
118
+ d_null=chi_square_null-df_null
119
+ ((d_null)-(chi_square-df)).quo(d_null)
120
+ end
121
+ def bic
122
+ k=@model.k
123
+ p k
124
+ ln_n=Math.log(@model.cases)
125
+ chi_square+((k*(k-1).quo(2)) - df)*ln_n
126
+ end
127
+ def coefficients
128
+ est=Hash.new
129
+ coeffs=@r_summary['parameters']
130
+ # 0:name, 1:matrix, 2:row, 3:col, 4:estimate, 5:Std.error
131
+ coeffs[0].each_with_index do |v,i|
132
+ f1=coeffs[2][i]
133
+ f2=coeffs[3][i]
134
+ key=[f1,f2].sort
135
+ est[key]={:estimate=>coeffs[4][i], :se=>coeffs[5][i], :z=>nil, :p=>nil, :label=>v}
136
+ end
137
+ est
138
+
139
+ end
140
+ dirty_memoize :chi_square, :df, :rmsea, :coefficients, :r_summary, :chi_square_null, :df_null, :nfi, :nnfi
141
+
142
+ end
143
+ end
144
+ end