statsample 0.11.2 → 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -39,7 +39,7 @@ module Factor
39
39
  include GetText
40
40
  bindtextdomain("statsample")
41
41
 
42
- def initialize(matrix ,opts=Hash.new)
42
+ def initialize(matrix, opts=Hash.new)
43
43
  @use_gsl=nil
44
44
  @name=_("Principal Component Analysis")
45
45
  @matrix=matrix
@@ -1,3 +1,4 @@
1
+ require 'tempfile'
1
2
  module Statsample
2
3
  module Graph
3
4
  class SvgScatterplot < SVG::Graph::Plot # :nodoc:
@@ -7,6 +8,14 @@ module Statsample
7
8
  @ds=ds
8
9
  set_x(@ds.fields[0])
9
10
  end
11
+ def report_building_html(g)
12
+ self.parse()
13
+ tf=Tempfile.new(['image','.svg'])
14
+ tf.write self.burn
15
+ tf.close
16
+ image=ReportBuilder::Image.new(tf.path)
17
+ g.parse_element(image)
18
+ end
10
19
  def set_defaults
11
20
  super
12
21
  init_with(
@@ -27,7 +36,7 @@ module Statsample
27
36
  }
28
37
  data.each{|y,d|
29
38
  add_data({
30
- :data=>d, :title=>@ds.vector_label(y)
39
+ :data=>d, :title=>@ds[y].name
31
40
  })
32
41
  }
33
42
  end
@@ -21,13 +21,44 @@ module Statsample
21
21
  }.to_dataset
22
22
  cronbach_alpha(ds)
23
23
  end
24
+ def cronbach_alpha_from_n_s2_cov(n,s2,cov)
25
+ (n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
26
+ end
27
+ # Returns n necessary to obtain specific alpha
28
+ # given variance and covariance mean of items
29
+ def n_for_desired_alpha(alpha,s2,cov)
30
+ # Start with a regular test : 50 items
31
+ min=2
32
+ max=1000
33
+ n=50
34
+ prev_n=0
35
+ epsilon=0.0001
36
+ dif=1000
37
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
38
+ dif=c_a - alpha
39
+ while(dif.abs>epsilon and n!=prev_n)
40
+ prev_n=n
41
+ if dif<0
42
+ min=n
43
+ n=(n+(max-min).quo(2)).to_i
44
+ else
45
+ max=n
46
+ n=(n-(max-min).quo(2)).to_i
47
+ end
48
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
49
+ dif=c_a - alpha
50
+ #puts "#{n} , #{c_a}"
51
+
52
+ end
53
+ n
54
+ end
24
55
  # First derivative for alfa
25
56
  # Parameters
26
57
  # <tt>n</tt>: Number of items
27
58
  # <tt>sx</tt>: mean of variances
28
59
  # <tt>sxy</tt>: mean of covariances
29
60
 
30
- def alfa_first_derivative(n,sx,sxy)
61
+ def alpha_first_derivative(n,sx,sxy)
31
62
  (sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
32
63
  end
33
64
  # Second derivative for alfa
@@ -75,193 +106,9 @@ module Statsample
75
106
  out[value]=count_value.quo(n)
76
107
  end
77
108
  out
78
- end
79
- end
80
- class ItemAnalysis
81
- attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean
82
- attr_accessor :name
83
- def initialize(ds,opts=Hash.new)
84
- @ds=ds.dup_only_valid
85
- @k=@ds.fields.size
86
- @total=@ds.vector_sum
87
- @item_mean=@ds.vector_mean.mean
88
- @mean=@total.mean
89
- @median=@total.median
90
- @skew=@total.skew
91
- @kurtosis=@total.kurtosis
92
- @sd = @total.sd
93
- @variance=@total.variance
94
- @valid_n = @total.size
95
- opts_default={:name=>"Reliability Analisis"}
96
- @opts=opts_default.merge(opts)
97
- @name=@opts[:name]
98
- # Mean for covariances and variances
99
- @variances=@ds.fields.map {|f| @ds[f].variance}.to_scale
100
- @variances_mean=@variances.mean
101
- @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
102
- begin
103
- @alpha = Statsample::Reliability.cronbach_alpha(ds)
104
- @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
105
- rescue => e
106
- raise DatasetException.new(@ds,e), "Error calculating alpha"
107
- end
108
- end
109
- # Returns a hash with structure
110
- def item_characteristic_curve
111
- i=0
112
- out={}
113
- total={}
114
- @ds.each do |row|
115
- tot=@total[i]
116
- @ds.fields.each do |f|
117
- out[f]||= {}
118
- total[f]||={}
119
- out[f][tot]||= 0
120
- total[f][tot]||=0
121
- out[f][tot]+= row[f]
122
- total[f][tot]+=1
123
- end
124
- i+=1
125
- end
126
- total.each do |f,var|
127
- var.each do |tot,v|
128
- out[f][tot]=out[f][tot].to_f / total[f][tot]
129
- end
130
- end
131
- out
132
- end
133
- def gnuplot_item_characteristic_curve(directory, base="crd",options={})
134
- require 'gnuplot'
135
-
136
- crd=item_characteristic_curve
137
- @ds.fields.each do |f|
138
- x=[]
139
- y=[]
140
- Gnuplot.open do |gp|
141
- Gnuplot::Plot.new( gp ) do |plot|
142
- crd[f].sort.each do |tot,prop|
143
- x.push(tot)
144
- y.push((prop*100).to_i.to_f/100)
145
- end
146
- plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
147
- ds.with = "linespoints"
148
- ds.notitle
149
- end
150
-
151
- end
152
- end
153
- end
154
- end
155
- def svggraph_item_characteristic_curve(directory, base="icc",options={})
156
- require 'statsample/graph/svggraph'
157
- crd=ItemCharacteristicCurve.new(@ds)
158
- @ds.fields.each do |f|
159
- factors=@ds[f].factors.sort
160
- options={
161
- :height=>500,
162
- :width=>800,
163
- :key=>true
164
- }.update(options)
165
- graph = ::SVG::Graph::Plot.new(options)
166
- factors.each do |factor|
167
- factor=factor.to_s
168
- dataset=[]
169
- crd.curve_field(f, factor).each do |tot,prop|
170
- dataset.push(tot)
171
- dataset.push((prop*100).to_i.to_f/100)
172
- end
173
- graph.add_data({
174
- :title=>"#{factor}",
175
- :data=>dataset
176
- })
177
- end
178
- File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
179
- fp.puts(graph.burn())
180
- }
181
- end
182
- end
183
- def item_total_correlation
184
- @ds.fields.inject({}) do |a,v|
185
- vector=@ds[v].dup
186
- ds2=@ds.dup
187
- ds2.delete_vector(v)
188
- total=ds2.vector_sum
189
- a[v]=Statsample::Bivariate.pearson(vector,total)
190
- a
191
- end
192
- end
193
- def item_statistics
194
- @ds.fields.inject({}) do |a,v|
195
- a[v]={:mean=>@ds[v].mean,:sds=>@ds[v].sds}
196
- a
197
- end
198
- end
199
- # Returns a dataset with cases ordered by score
200
- # and variables ordered by difficulty
201
-
202
- def item_difficulty_analysis
203
- dif={}
204
- @ds.fields.each{|f| dif[f]=@ds[f].mean }
205
- dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
206
- scores_sort={}
207
- scores=@ds.vector_mean
208
- scores.each_index{|i| scores_sort[i]=scores[i] }
209
- scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
210
- ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
211
- scores_sort.each do |i,score|
212
- row=[i, score]
213
- case_row=@ds.case_as_hash(i)
214
- dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
215
- ds_new.add_case_array(row)
216
- end
217
- ds_new.update_valid_data
218
- ds_new
219
- end
220
- def stats_if_deleted
221
- @ds.fields.inject({}) do |a,v|
222
- ds2=@ds.dup
223
- ds2.delete_vector(v)
224
- total=ds2.vector_sum
225
- a[v]={}
226
- a[v][:mean]=total.mean
227
- a[v][:sds]=total.sds
228
- a[v][:variance_sample]=total.variance_sample
229
- a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
230
- a
231
- end
232
- end
233
- def summary
234
- ReportBuilder.new(:no_title=>true).add(self).to_text
235
- end
236
- def report_building(builder)
237
- builder.section(:name=>@name) do |s|
238
- s.table(:name=>"Summary") do |t|
239
- t.row ["Items", @ds.fields.size]
240
- t.row ["Total Mean", @mean]
241
- t.row ["Total S.D.", @sd]
242
- t.row ["Total Variance", @variance]
243
- t.row ["Item Mean", @item_mean]
244
- t.row ["Median", @median]
245
- t.row ["Skewness", "%0.4f" % @skew]
246
- t.row ["Kurtosis", "%0.4f" % @kurtosis]
247
- t.row ["Valid n", @valid_n]
248
- t.row ["Cronbach's alpha", "%0.4f" % @alpha]
249
- t.row ["Standarized Cronbach's alpha", "%0.4f" % @alpha_standarized]
250
- t.row ["Variances mean", "%g" % @variances_mean]
251
- t.row ["Covariances mean" , "%g" % @covariances_mean]
252
- end
253
-
254
- itc=item_total_correlation
255
- sid=stats_if_deleted
256
- is=item_statistics
257
-
258
- s.table(:name=>"Items report", :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
259
- @ds.fields.each do |f|
260
- t.row(["#{@ds[f].name}(#{f})", sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f",is[f][:sds]), sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds]), sprintf("%0.5f",itc[f]), sprintf("%0.5f",sid[f][:alpha])])
261
- end
262
- end
263
- end
264
- end
265
- end
266
- end
267
- end
109
+ end # def
110
+ end # self
111
+ end # Reliability
112
+ end # Statsample
113
+ require 'statsample/reliability/scaleanalysis.rb'
114
+ require 'statsample/reliability/multiscaleanalysis.rb'
@@ -0,0 +1,87 @@
1
+ module Statsample
2
+ module Reliability
3
+ # DSL for analysis of multiple scales analysis. Analoge of Scale Reliability analysis on SPSS.
4
+ # Returns several statistics for complete scale and each item
5
+ # == Usage
6
+ # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
7
+ # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
8
+ # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
9
+ # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
10
+ # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
11
+ # msa=Statsample::Reliability::MultiScaleAnalysis.new(:name=>"Scales") do |m|
12
+ # m.scale :s1, "Section 1", ds.clone(%w{x1 x2})
13
+ # m.scale :s2, "Section 2", ds.clone(%w{x3 x4})
14
+ # m.correlation_matrix
15
+ # m.factor_analysis
16
+ # end
17
+ # puts msa.summary
18
+ class MultiScaleAnalysis
19
+ include Statsample::Summarizable
20
+ attr_reader :scales
21
+ attr_accessor :name
22
+ attr_accessor :summary_correlation_matrix
23
+ attr_accessor :summary_pca
24
+ attr_accessor :pca_options
25
+ def initialize(opts=Hash.new, &block)
26
+ @scales=Hash.new
27
+ opts_default={ :name=>_("Multiple Scale analysis"),
28
+ :summary_correlation_matrix=>false,
29
+ :summary_pca=>false,
30
+ :pca_options=>Hash.new}
31
+ @opts=opts_default.merge(opts)
32
+ @opts.each{|k,v|
33
+ self.send("#{k}=",v) if self.respond_to? k
34
+ }
35
+
36
+ if block
37
+ block.arity<1 ? instance_eval(&block) : block.call(self)
38
+ end
39
+ end
40
+ def scale(code,ds=nil, opts=nil)
41
+ if ds.nil?
42
+ @scales[code]
43
+ else
44
+ opts={:name=>_("Scale %s") % code} if opts.nil?
45
+ @scales[code]=ScaleAnalysis.new(ds, opts)
46
+ end
47
+ end
48
+ def delete_scale(code)
49
+ @scales.delete code
50
+ end
51
+ def pca(opts=Hash.new)
52
+ Statsample::Factor::PCA.new(correlation_matrix,opts)
53
+ end
54
+ def factor_analysis(opts=nil)
55
+ opts||=pca_options
56
+ Statsample::Factor::FactorAnalysis.new(correlation_matrix,opts)
57
+ end
58
+
59
+ def correlation_matrix
60
+ vectors=Hash.new
61
+ @scales.each_pair do |code,scale|
62
+ vectors[code.to_s]=scale.ds.vector_sum
63
+ end
64
+ Statsample::Bivariate.correlation_matrix(vectors.to_dataset)
65
+ end
66
+ def report_building(b)
67
+ b.section(:name=>name) do |s|
68
+ s.section(:name=>_("Reliability analysis of scales")) do |s2|
69
+ @scales.each_pair do |k,scale|
70
+ s2.parse_element(scale)
71
+ end
72
+ end
73
+ if summary_correlation_matrix
74
+ s.section(:name=>_("Correlation matrix for %s") % name) do |s2|
75
+ s2.parse_element(correlation_matrix)
76
+ end
77
+ end
78
+ if summary_pca
79
+ s.section(:name=>_("PCA for %s") % name) do |s2|
80
+ s2.parse_element(pca)
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,204 @@
1
+ module Statsample
2
+ module Reliability
3
+ # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
4
+ # Returns several statistics for complete scale and each item
5
+ # == Usage
6
+ # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
7
+ # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
8
+ # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
9
+ # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
10
+ # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
11
+ # ia=Statsample::Reliability::ScaleAnalysis.new(ds)
12
+ # puts ia.summary
13
+ class ScaleAnalysis
14
+ include Summarizable
15
+ attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean
16
+ attr_accessor :name
17
+ def initialize(ds, opts=Hash.new)
18
+ @ds=ds.dup_only_valid
19
+ @k=@ds.fields.size
20
+ @total=@ds.vector_sum
21
+ @item_mean=@ds.vector_mean.mean
22
+ @mean=@total.mean
23
+ @median=@total.median
24
+ @skew=@total.skew
25
+ @kurtosis=@total.kurtosis
26
+ @sd = @total.sd
27
+ @variance=@total.variance
28
+ @valid_n = @total.size
29
+ opts_default={:name=>"Reliability Analisis"}
30
+ @opts=opts_default.merge(opts)
31
+ @name=@opts[:name]
32
+ # Mean for covariances and variances
33
+ @variances=@ds.fields.map {|f| @ds[f].variance}.to_scale
34
+ @variances_mean=@variances.mean
35
+ @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
36
+ begin
37
+ @alpha = Statsample::Reliability.cronbach_alpha(ds)
38
+ @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
39
+ rescue => e
40
+ raise DatasetException.new(@ds,e), "Error calculating alpha"
41
+ end
42
+ end
43
+ # Returns a hash with structure
44
+ def item_characteristic_curve
45
+ i=0
46
+ out={}
47
+ total={}
48
+ @ds.each do |row|
49
+ tot=@total[i]
50
+ @ds.fields.each do |f|
51
+ out[f]||= {}
52
+ total[f]||={}
53
+ out[f][tot]||= 0
54
+ total[f][tot]||=0
55
+ out[f][tot]+= row[f]
56
+ total[f][tot]+=1
57
+ end
58
+ i+=1
59
+ end
60
+ total.each do |f,var|
61
+ var.each do |tot,v|
62
+ out[f][tot]=out[f][tot].to_f / total[f][tot]
63
+ end
64
+ end
65
+ out
66
+ end
67
+ def gnuplot_item_characteristic_curve(directory, base="crd",options={})
68
+ require 'gnuplot'
69
+
70
+ crd=item_characteristic_curve
71
+ @ds.fields.each do |f|
72
+ x=[]
73
+ y=[]
74
+ Gnuplot.open do |gp|
75
+ Gnuplot::Plot.new( gp ) do |plot|
76
+ crd[f].sort.each do |tot,prop|
77
+ x.push(tot)
78
+ y.push((prop*100).to_i.to_f/100)
79
+ end
80
+ plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
81
+ ds.with = "linespoints"
82
+ ds.notitle
83
+ end
84
+
85
+ end
86
+ end
87
+ end
88
+ end
89
+ def svggraph_item_characteristic_curve(directory, base="icc",options={})
90
+ require 'statsample/graph/svggraph'
91
+ crd=ItemCharacteristicCurve.new(@ds)
92
+ @ds.fields.each do |f|
93
+ factors=@ds[f].factors.sort
94
+ options={
95
+ :height=>500,
96
+ :width=>800,
97
+ :key=>true
98
+ }.update(options)
99
+ graph = ::SVG::Graph::Plot.new(options)
100
+ factors.each do |factor|
101
+ factor=factor.to_s
102
+ dataset=[]
103
+ crd.curve_field(f, factor).each do |tot,prop|
104
+ dataset.push(tot)
105
+ dataset.push((prop*100).to_i.to_f/100)
106
+ end
107
+ graph.add_data({
108
+ :title=>"#{factor}",
109
+ :data=>dataset
110
+ })
111
+ end
112
+ File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
113
+ fp.puts(graph.burn())
114
+ }
115
+ end
116
+ end
117
+ def item_total_correlation
118
+ @ds.fields.inject({}) do |a,v|
119
+ vector=@ds[v].dup
120
+ ds2=@ds.dup
121
+ ds2.delete_vector(v)
122
+ total=ds2.vector_sum
123
+ a[v]=Statsample::Bivariate.pearson(vector,total)
124
+ a
125
+ end
126
+ end
127
+ def item_statistics
128
+ @ds.fields.inject({}) do |a,v|
129
+ a[v]={:mean=>@ds[v].mean,:sds=>@ds[v].sds}
130
+ a
131
+ end
132
+ end
133
+ # Returns a dataset with cases ordered by score
134
+ # and variables ordered by difficulty
135
+
136
+ def item_difficulty_analysis
137
+ dif={}
138
+ @ds.fields.each{|f| dif[f]=@ds[f].mean }
139
+ dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
140
+ scores_sort={}
141
+ scores=@ds.vector_mean
142
+ scores.each_index{|i| scores_sort[i]=scores[i] }
143
+ scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
144
+ ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
145
+ scores_sort.each do |i,score|
146
+ row=[i, score]
147
+ case_row=@ds.case_as_hash(i)
148
+ dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
149
+ ds_new.add_case_array(row)
150
+ end
151
+ ds_new.update_valid_data
152
+ ds_new
153
+ end
154
+ def stats_if_deleted
155
+ @ds.fields.inject({}) do |a,v|
156
+ ds2=@ds.dup
157
+ ds2.delete_vector(v)
158
+ total=ds2.vector_sum
159
+ a[v]={}
160
+ a[v][:mean]=total.mean
161
+ a[v][:sds]=total.sds
162
+ a[v][:variance_sample]=total.variance_sample
163
+ a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
164
+ a
165
+ end
166
+ end
167
+ def report_building(builder)
168
+ builder.section(:name=>@name) do |s|
169
+ s.table(:name=>_("Summary for %s") % @name) do |t|
170
+ t.row [_("Items"), @ds.fields.size]
171
+ t.row [_("Sum mean"), @mean]
172
+ t.row [_("Sum sd"), @sd]
173
+ t.row [_("Sum variance"), @variance]
174
+ t.row [_("Sum median"), @median]
175
+ t.hr
176
+ t.row [_("Item mean"), @item_mean]
177
+ t.row [_("Skewness"), "%0.4f" % @skew]
178
+ t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
179
+ t.hr
180
+ t.row [_("Valid n"), @valid_n]
181
+ t.row [_("Cronbach's alpha"), "%0.4f" % @alpha]
182
+ t.row [_("Standarized Cronbach's alpha"), "%0.4f" % @alpha_standarized]
183
+ t.hr
184
+ t.row [_("Variances mean"), "%g" % @variances_mean]
185
+ t.row [_("Covariances mean") , "%g" % @covariances_mean]
186
+ end
187
+ s.text _("items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_alpha(0.8, @variances_mean,@covariances_mean))
188
+ s.text _("items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_alpha(0.9, @variances_mean,@covariances_mean))
189
+ itc=item_total_correlation
190
+ sid=stats_if_deleted
191
+ is=item_statistics
192
+
193
+
194
+
195
+ s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
196
+ @ds.fields.each do |f|
197
+ t.row(["#{@ds[f].name}(#{f})", sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f",is[f][:sds]), sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds]), sprintf("%0.5f",itc[f]), sprintf("%0.5f",sid[f][:alpha])])
198
+ end # end each
199
+ end # table
200
+ end # section
201
+ end # def
202
+ end # class
203
+ end # module
204
+ end # module