statsample 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +1 -2
- data/History.txt +11 -0
- data/Manifest.txt +4 -0
- data/README.txt +14 -5
- data/Rakefile +24 -3
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/reliability.rb +1 -1
- data/lib/distribution.rb +5 -1
- data/lib/distribution/normalbivariate.rb +7 -1
- data/lib/distribution/normalmultivariate.rb +73 -0
- data/lib/distribution/t.rb +34 -1
- data/lib/statsample.rb +2 -1
- data/lib/statsample/anova/twoway.rb +1 -1
- data/lib/statsample/bivariate/polychoric.rb +190 -69
- data/lib/statsample/factor/pca.rb +1 -1
- data/lib/statsample/graph/svgscatterplot.rb +10 -1
- data/lib/statsample/reliability.rb +38 -191
- data/lib/statsample/reliability/multiscaleanalysis.rb +87 -0
- data/lib/statsample/reliability/scaleanalysis.rb +204 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +193 -49
- data/po/statsample.pot +173 -40
- data/test/test_bivariate_polychoric.rb +6 -6
- data/test/test_distribution.rb +1 -1
- data/test/test_reliability.rb +87 -8
- data/test/test_vector.rb +0 -8
- metadata +44 -36
- metadata.gz.sig +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'tempfile'
|
1
2
|
module Statsample
|
2
3
|
module Graph
|
3
4
|
class SvgScatterplot < SVG::Graph::Plot # :nodoc:
|
@@ -7,6 +8,14 @@ module Statsample
|
|
7
8
|
@ds=ds
|
8
9
|
set_x(@ds.fields[0])
|
9
10
|
end
|
11
|
+
def report_building_html(g)
|
12
|
+
self.parse()
|
13
|
+
tf=Tempfile.new(['image','.svg'])
|
14
|
+
tf.write self.burn
|
15
|
+
tf.close
|
16
|
+
image=ReportBuilder::Image.new(tf.path)
|
17
|
+
g.parse_element(image)
|
18
|
+
end
|
10
19
|
def set_defaults
|
11
20
|
super
|
12
21
|
init_with(
|
@@ -27,7 +36,7 @@ module Statsample
|
|
27
36
|
}
|
28
37
|
data.each{|y,d|
|
29
38
|
add_data({
|
30
|
-
|
39
|
+
:data=>d, :title=>@ds[y].name
|
31
40
|
})
|
32
41
|
}
|
33
42
|
end
|
@@ -21,13 +21,44 @@ module Statsample
|
|
21
21
|
}.to_dataset
|
22
22
|
cronbach_alpha(ds)
|
23
23
|
end
|
24
|
+
def cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
25
|
+
(n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
|
26
|
+
end
|
27
|
+
# Returns n necessary to obtain specific alpha
|
28
|
+
# given variance and covariance mean of items
|
29
|
+
def n_for_desired_alpha(alpha,s2,cov)
|
30
|
+
# Start with a regular test : 50 items
|
31
|
+
min=2
|
32
|
+
max=1000
|
33
|
+
n=50
|
34
|
+
prev_n=0
|
35
|
+
epsilon=0.0001
|
36
|
+
dif=1000
|
37
|
+
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
38
|
+
dif=c_a - alpha
|
39
|
+
while(dif.abs>epsilon and n!=prev_n)
|
40
|
+
prev_n=n
|
41
|
+
if dif<0
|
42
|
+
min=n
|
43
|
+
n=(n+(max-min).quo(2)).to_i
|
44
|
+
else
|
45
|
+
max=n
|
46
|
+
n=(n-(max-min).quo(2)).to_i
|
47
|
+
end
|
48
|
+
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
49
|
+
dif=c_a - alpha
|
50
|
+
#puts "#{n} , #{c_a}"
|
51
|
+
|
52
|
+
end
|
53
|
+
n
|
54
|
+
end
|
24
55
|
# First derivative for alfa
|
25
56
|
# Parameters
|
26
57
|
# <tt>n</tt>: Number of items
|
27
58
|
# <tt>sx</tt>: mean of variances
|
28
59
|
# <tt>sxy</tt>: mean of covariances
|
29
60
|
|
30
|
-
def
|
61
|
+
def alpha_first_derivative(n,sx,sxy)
|
31
62
|
(sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
|
32
63
|
end
|
33
64
|
# Second derivative for alfa
|
@@ -75,193 +106,9 @@ module Statsample
|
|
75
106
|
out[value]=count_value.quo(n)
|
76
107
|
end
|
77
108
|
out
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
@ds=ds.dup_only_valid
|
85
|
-
@k=@ds.fields.size
|
86
|
-
@total=@ds.vector_sum
|
87
|
-
@item_mean=@ds.vector_mean.mean
|
88
|
-
@mean=@total.mean
|
89
|
-
@median=@total.median
|
90
|
-
@skew=@total.skew
|
91
|
-
@kurtosis=@total.kurtosis
|
92
|
-
@sd = @total.sd
|
93
|
-
@variance=@total.variance
|
94
|
-
@valid_n = @total.size
|
95
|
-
opts_default={:name=>"Reliability Analisis"}
|
96
|
-
@opts=opts_default.merge(opts)
|
97
|
-
@name=@opts[:name]
|
98
|
-
# Mean for covariances and variances
|
99
|
-
@variances=@ds.fields.map {|f| @ds[f].variance}.to_scale
|
100
|
-
@variances_mean=@variances.mean
|
101
|
-
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
102
|
-
begin
|
103
|
-
@alpha = Statsample::Reliability.cronbach_alpha(ds)
|
104
|
-
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
|
105
|
-
rescue => e
|
106
|
-
raise DatasetException.new(@ds,e), "Error calculating alpha"
|
107
|
-
end
|
108
|
-
end
|
109
|
-
# Returns a hash with structure
|
110
|
-
def item_characteristic_curve
|
111
|
-
i=0
|
112
|
-
out={}
|
113
|
-
total={}
|
114
|
-
@ds.each do |row|
|
115
|
-
tot=@total[i]
|
116
|
-
@ds.fields.each do |f|
|
117
|
-
out[f]||= {}
|
118
|
-
total[f]||={}
|
119
|
-
out[f][tot]||= 0
|
120
|
-
total[f][tot]||=0
|
121
|
-
out[f][tot]+= row[f]
|
122
|
-
total[f][tot]+=1
|
123
|
-
end
|
124
|
-
i+=1
|
125
|
-
end
|
126
|
-
total.each do |f,var|
|
127
|
-
var.each do |tot,v|
|
128
|
-
out[f][tot]=out[f][tot].to_f / total[f][tot]
|
129
|
-
end
|
130
|
-
end
|
131
|
-
out
|
132
|
-
end
|
133
|
-
def gnuplot_item_characteristic_curve(directory, base="crd",options={})
|
134
|
-
require 'gnuplot'
|
135
|
-
|
136
|
-
crd=item_characteristic_curve
|
137
|
-
@ds.fields.each do |f|
|
138
|
-
x=[]
|
139
|
-
y=[]
|
140
|
-
Gnuplot.open do |gp|
|
141
|
-
Gnuplot::Plot.new( gp ) do |plot|
|
142
|
-
crd[f].sort.each do |tot,prop|
|
143
|
-
x.push(tot)
|
144
|
-
y.push((prop*100).to_i.to_f/100)
|
145
|
-
end
|
146
|
-
plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
|
147
|
-
ds.with = "linespoints"
|
148
|
-
ds.notitle
|
149
|
-
end
|
150
|
-
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
154
|
-
end
|
155
|
-
def svggraph_item_characteristic_curve(directory, base="icc",options={})
|
156
|
-
require 'statsample/graph/svggraph'
|
157
|
-
crd=ItemCharacteristicCurve.new(@ds)
|
158
|
-
@ds.fields.each do |f|
|
159
|
-
factors=@ds[f].factors.sort
|
160
|
-
options={
|
161
|
-
:height=>500,
|
162
|
-
:width=>800,
|
163
|
-
:key=>true
|
164
|
-
}.update(options)
|
165
|
-
graph = ::SVG::Graph::Plot.new(options)
|
166
|
-
factors.each do |factor|
|
167
|
-
factor=factor.to_s
|
168
|
-
dataset=[]
|
169
|
-
crd.curve_field(f, factor).each do |tot,prop|
|
170
|
-
dataset.push(tot)
|
171
|
-
dataset.push((prop*100).to_i.to_f/100)
|
172
|
-
end
|
173
|
-
graph.add_data({
|
174
|
-
:title=>"#{factor}",
|
175
|
-
:data=>dataset
|
176
|
-
})
|
177
|
-
end
|
178
|
-
File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
|
179
|
-
fp.puts(graph.burn())
|
180
|
-
}
|
181
|
-
end
|
182
|
-
end
|
183
|
-
def item_total_correlation
|
184
|
-
@ds.fields.inject({}) do |a,v|
|
185
|
-
vector=@ds[v].dup
|
186
|
-
ds2=@ds.dup
|
187
|
-
ds2.delete_vector(v)
|
188
|
-
total=ds2.vector_sum
|
189
|
-
a[v]=Statsample::Bivariate.pearson(vector,total)
|
190
|
-
a
|
191
|
-
end
|
192
|
-
end
|
193
|
-
def item_statistics
|
194
|
-
@ds.fields.inject({}) do |a,v|
|
195
|
-
a[v]={:mean=>@ds[v].mean,:sds=>@ds[v].sds}
|
196
|
-
a
|
197
|
-
end
|
198
|
-
end
|
199
|
-
# Returns a dataset with cases ordered by score
|
200
|
-
# and variables ordered by difficulty
|
201
|
-
|
202
|
-
def item_difficulty_analysis
|
203
|
-
dif={}
|
204
|
-
@ds.fields.each{|f| dif[f]=@ds[f].mean }
|
205
|
-
dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
|
206
|
-
scores_sort={}
|
207
|
-
scores=@ds.vector_mean
|
208
|
-
scores.each_index{|i| scores_sort[i]=scores[i] }
|
209
|
-
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
210
|
-
ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
|
211
|
-
scores_sort.each do |i,score|
|
212
|
-
row=[i, score]
|
213
|
-
case_row=@ds.case_as_hash(i)
|
214
|
-
dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
|
215
|
-
ds_new.add_case_array(row)
|
216
|
-
end
|
217
|
-
ds_new.update_valid_data
|
218
|
-
ds_new
|
219
|
-
end
|
220
|
-
def stats_if_deleted
|
221
|
-
@ds.fields.inject({}) do |a,v|
|
222
|
-
ds2=@ds.dup
|
223
|
-
ds2.delete_vector(v)
|
224
|
-
total=ds2.vector_sum
|
225
|
-
a[v]={}
|
226
|
-
a[v][:mean]=total.mean
|
227
|
-
a[v][:sds]=total.sds
|
228
|
-
a[v][:variance_sample]=total.variance_sample
|
229
|
-
a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
|
230
|
-
a
|
231
|
-
end
|
232
|
-
end
|
233
|
-
def summary
|
234
|
-
ReportBuilder.new(:no_title=>true).add(self).to_text
|
235
|
-
end
|
236
|
-
def report_building(builder)
|
237
|
-
builder.section(:name=>@name) do |s|
|
238
|
-
s.table(:name=>"Summary") do |t|
|
239
|
-
t.row ["Items", @ds.fields.size]
|
240
|
-
t.row ["Total Mean", @mean]
|
241
|
-
t.row ["Total S.D.", @sd]
|
242
|
-
t.row ["Total Variance", @variance]
|
243
|
-
t.row ["Item Mean", @item_mean]
|
244
|
-
t.row ["Median", @median]
|
245
|
-
t.row ["Skewness", "%0.4f" % @skew]
|
246
|
-
t.row ["Kurtosis", "%0.4f" % @kurtosis]
|
247
|
-
t.row ["Valid n", @valid_n]
|
248
|
-
t.row ["Cronbach's alpha", "%0.4f" % @alpha]
|
249
|
-
t.row ["Standarized Cronbach's alpha", "%0.4f" % @alpha_standarized]
|
250
|
-
t.row ["Variances mean", "%g" % @variances_mean]
|
251
|
-
t.row ["Covariances mean" , "%g" % @covariances_mean]
|
252
|
-
end
|
253
|
-
|
254
|
-
itc=item_total_correlation
|
255
|
-
sid=stats_if_deleted
|
256
|
-
is=item_statistics
|
257
|
-
|
258
|
-
s.table(:name=>"Items report", :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
259
|
-
@ds.fields.each do |f|
|
260
|
-
t.row(["#{@ds[f].name}(#{f})", sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f",is[f][:sds]), sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds]), sprintf("%0.5f",itc[f]), sprintf("%0.5f",sid[f][:alpha])])
|
261
|
-
end
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end
|
109
|
+
end # def
|
110
|
+
end # self
|
111
|
+
end # Reliability
|
112
|
+
end # Statsample
|
113
|
+
require 'statsample/reliability/scaleanalysis.rb'
|
114
|
+
require 'statsample/reliability/multiscaleanalysis.rb'
|
@@ -0,0 +1,87 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# DSL for analysis of multiple scales analysis. Analoge of Scale Reliability analysis on SPSS.
|
4
|
+
# Returns several statistics for complete scale and each item
|
5
|
+
# == Usage
|
6
|
+
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
|
7
|
+
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
|
8
|
+
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
|
9
|
+
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
|
10
|
+
# ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
|
11
|
+
# msa=Statsample::Reliability::MultiScaleAnalysis.new(:name=>"Scales") do |m|
|
12
|
+
# m.scale :s1, "Section 1", ds.clone(%w{x1 x2})
|
13
|
+
# m.scale :s2, "Section 2", ds.clone(%w{x3 x4})
|
14
|
+
# m.correlation_matrix
|
15
|
+
# m.factor_analysis
|
16
|
+
# end
|
17
|
+
# puts msa.summary
|
18
|
+
class MultiScaleAnalysis
|
19
|
+
include Statsample::Summarizable
|
20
|
+
attr_reader :scales
|
21
|
+
attr_accessor :name
|
22
|
+
attr_accessor :summary_correlation_matrix
|
23
|
+
attr_accessor :summary_pca
|
24
|
+
attr_accessor :pca_options
|
25
|
+
def initialize(opts=Hash.new, &block)
|
26
|
+
@scales=Hash.new
|
27
|
+
opts_default={ :name=>_("Multiple Scale analysis"),
|
28
|
+
:summary_correlation_matrix=>false,
|
29
|
+
:summary_pca=>false,
|
30
|
+
:pca_options=>Hash.new}
|
31
|
+
@opts=opts_default.merge(opts)
|
32
|
+
@opts.each{|k,v|
|
33
|
+
self.send("#{k}=",v) if self.respond_to? k
|
34
|
+
}
|
35
|
+
|
36
|
+
if block
|
37
|
+
block.arity<1 ? instance_eval(&block) : block.call(self)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
def scale(code,ds=nil, opts=nil)
|
41
|
+
if ds.nil?
|
42
|
+
@scales[code]
|
43
|
+
else
|
44
|
+
opts={:name=>_("Scale %s") % code} if opts.nil?
|
45
|
+
@scales[code]=ScaleAnalysis.new(ds, opts)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
def delete_scale(code)
|
49
|
+
@scales.delete code
|
50
|
+
end
|
51
|
+
def pca(opts=Hash.new)
|
52
|
+
Statsample::Factor::PCA.new(correlation_matrix,opts)
|
53
|
+
end
|
54
|
+
def factor_analysis(opts=nil)
|
55
|
+
opts||=pca_options
|
56
|
+
Statsample::Factor::FactorAnalysis.new(correlation_matrix,opts)
|
57
|
+
end
|
58
|
+
|
59
|
+
def correlation_matrix
|
60
|
+
vectors=Hash.new
|
61
|
+
@scales.each_pair do |code,scale|
|
62
|
+
vectors[code.to_s]=scale.ds.vector_sum
|
63
|
+
end
|
64
|
+
Statsample::Bivariate.correlation_matrix(vectors.to_dataset)
|
65
|
+
end
|
66
|
+
def report_building(b)
|
67
|
+
b.section(:name=>name) do |s|
|
68
|
+
s.section(:name=>_("Reliability analysis of scales")) do |s2|
|
69
|
+
@scales.each_pair do |k,scale|
|
70
|
+
s2.parse_element(scale)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
if summary_correlation_matrix
|
74
|
+
s.section(:name=>_("Correlation matrix for %s") % name) do |s2|
|
75
|
+
s2.parse_element(correlation_matrix)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
if summary_pca
|
79
|
+
s.section(:name=>_("PCA for %s") % name) do |s2|
|
80
|
+
s2.parse_element(pca)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
|
4
|
+
# Returns several statistics for complete scale and each item
|
5
|
+
# == Usage
|
6
|
+
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:scale)
|
7
|
+
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:scale)
|
8
|
+
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:scale)
|
9
|
+
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:scale)
|
10
|
+
# ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
|
11
|
+
# ia=Statsample::Reliability::ScaleAnalysis.new(ds)
|
12
|
+
# puts ia.summary
|
13
|
+
class ScaleAnalysis
|
14
|
+
include Summarizable
|
15
|
+
attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean
|
16
|
+
attr_accessor :name
|
17
|
+
def initialize(ds, opts=Hash.new)
|
18
|
+
@ds=ds.dup_only_valid
|
19
|
+
@k=@ds.fields.size
|
20
|
+
@total=@ds.vector_sum
|
21
|
+
@item_mean=@ds.vector_mean.mean
|
22
|
+
@mean=@total.mean
|
23
|
+
@median=@total.median
|
24
|
+
@skew=@total.skew
|
25
|
+
@kurtosis=@total.kurtosis
|
26
|
+
@sd = @total.sd
|
27
|
+
@variance=@total.variance
|
28
|
+
@valid_n = @total.size
|
29
|
+
opts_default={:name=>"Reliability Analisis"}
|
30
|
+
@opts=opts_default.merge(opts)
|
31
|
+
@name=@opts[:name]
|
32
|
+
# Mean for covariances and variances
|
33
|
+
@variances=@ds.fields.map {|f| @ds[f].variance}.to_scale
|
34
|
+
@variances_mean=@variances.mean
|
35
|
+
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
36
|
+
begin
|
37
|
+
@alpha = Statsample::Reliability.cronbach_alpha(ds)
|
38
|
+
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
|
39
|
+
rescue => e
|
40
|
+
raise DatasetException.new(@ds,e), "Error calculating alpha"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
# Returns a hash with structure
|
44
|
+
def item_characteristic_curve
|
45
|
+
i=0
|
46
|
+
out={}
|
47
|
+
total={}
|
48
|
+
@ds.each do |row|
|
49
|
+
tot=@total[i]
|
50
|
+
@ds.fields.each do |f|
|
51
|
+
out[f]||= {}
|
52
|
+
total[f]||={}
|
53
|
+
out[f][tot]||= 0
|
54
|
+
total[f][tot]||=0
|
55
|
+
out[f][tot]+= row[f]
|
56
|
+
total[f][tot]+=1
|
57
|
+
end
|
58
|
+
i+=1
|
59
|
+
end
|
60
|
+
total.each do |f,var|
|
61
|
+
var.each do |tot,v|
|
62
|
+
out[f][tot]=out[f][tot].to_f / total[f][tot]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
out
|
66
|
+
end
|
67
|
+
def gnuplot_item_characteristic_curve(directory, base="crd",options={})
|
68
|
+
require 'gnuplot'
|
69
|
+
|
70
|
+
crd=item_characteristic_curve
|
71
|
+
@ds.fields.each do |f|
|
72
|
+
x=[]
|
73
|
+
y=[]
|
74
|
+
Gnuplot.open do |gp|
|
75
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
76
|
+
crd[f].sort.each do |tot,prop|
|
77
|
+
x.push(tot)
|
78
|
+
y.push((prop*100).to_i.to_f/100)
|
79
|
+
end
|
80
|
+
plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
|
81
|
+
ds.with = "linespoints"
|
82
|
+
ds.notitle
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
def svggraph_item_characteristic_curve(directory, base="icc",options={})
|
90
|
+
require 'statsample/graph/svggraph'
|
91
|
+
crd=ItemCharacteristicCurve.new(@ds)
|
92
|
+
@ds.fields.each do |f|
|
93
|
+
factors=@ds[f].factors.sort
|
94
|
+
options={
|
95
|
+
:height=>500,
|
96
|
+
:width=>800,
|
97
|
+
:key=>true
|
98
|
+
}.update(options)
|
99
|
+
graph = ::SVG::Graph::Plot.new(options)
|
100
|
+
factors.each do |factor|
|
101
|
+
factor=factor.to_s
|
102
|
+
dataset=[]
|
103
|
+
crd.curve_field(f, factor).each do |tot,prop|
|
104
|
+
dataset.push(tot)
|
105
|
+
dataset.push((prop*100).to_i.to_f/100)
|
106
|
+
end
|
107
|
+
graph.add_data({
|
108
|
+
:title=>"#{factor}",
|
109
|
+
:data=>dataset
|
110
|
+
})
|
111
|
+
end
|
112
|
+
File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
|
113
|
+
fp.puts(graph.burn())
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
def item_total_correlation
|
118
|
+
@ds.fields.inject({}) do |a,v|
|
119
|
+
vector=@ds[v].dup
|
120
|
+
ds2=@ds.dup
|
121
|
+
ds2.delete_vector(v)
|
122
|
+
total=ds2.vector_sum
|
123
|
+
a[v]=Statsample::Bivariate.pearson(vector,total)
|
124
|
+
a
|
125
|
+
end
|
126
|
+
end
|
127
|
+
def item_statistics
|
128
|
+
@ds.fields.inject({}) do |a,v|
|
129
|
+
a[v]={:mean=>@ds[v].mean,:sds=>@ds[v].sds}
|
130
|
+
a
|
131
|
+
end
|
132
|
+
end
|
133
|
+
# Returns a dataset with cases ordered by score
|
134
|
+
# and variables ordered by difficulty
|
135
|
+
|
136
|
+
def item_difficulty_analysis
|
137
|
+
dif={}
|
138
|
+
@ds.fields.each{|f| dif[f]=@ds[f].mean }
|
139
|
+
dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
|
140
|
+
scores_sort={}
|
141
|
+
scores=@ds.vector_mean
|
142
|
+
scores.each_index{|i| scores_sort[i]=scores[i] }
|
143
|
+
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
144
|
+
ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
|
145
|
+
scores_sort.each do |i,score|
|
146
|
+
row=[i, score]
|
147
|
+
case_row=@ds.case_as_hash(i)
|
148
|
+
dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
|
149
|
+
ds_new.add_case_array(row)
|
150
|
+
end
|
151
|
+
ds_new.update_valid_data
|
152
|
+
ds_new
|
153
|
+
end
|
154
|
+
def stats_if_deleted
|
155
|
+
@ds.fields.inject({}) do |a,v|
|
156
|
+
ds2=@ds.dup
|
157
|
+
ds2.delete_vector(v)
|
158
|
+
total=ds2.vector_sum
|
159
|
+
a[v]={}
|
160
|
+
a[v][:mean]=total.mean
|
161
|
+
a[v][:sds]=total.sds
|
162
|
+
a[v][:variance_sample]=total.variance_sample
|
163
|
+
a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
|
164
|
+
a
|
165
|
+
end
|
166
|
+
end
|
167
|
+
def report_building(builder)
|
168
|
+
builder.section(:name=>@name) do |s|
|
169
|
+
s.table(:name=>_("Summary for %s") % @name) do |t|
|
170
|
+
t.row [_("Items"), @ds.fields.size]
|
171
|
+
t.row [_("Sum mean"), @mean]
|
172
|
+
t.row [_("Sum sd"), @sd]
|
173
|
+
t.row [_("Sum variance"), @variance]
|
174
|
+
t.row [_("Sum median"), @median]
|
175
|
+
t.hr
|
176
|
+
t.row [_("Item mean"), @item_mean]
|
177
|
+
t.row [_("Skewness"), "%0.4f" % @skew]
|
178
|
+
t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
|
179
|
+
t.hr
|
180
|
+
t.row [_("Valid n"), @valid_n]
|
181
|
+
t.row [_("Cronbach's alpha"), "%0.4f" % @alpha]
|
182
|
+
t.row [_("Standarized Cronbach's alpha"), "%0.4f" % @alpha_standarized]
|
183
|
+
t.hr
|
184
|
+
t.row [_("Variances mean"), "%g" % @variances_mean]
|
185
|
+
t.row [_("Covariances mean") , "%g" % @covariances_mean]
|
186
|
+
end
|
187
|
+
s.text _("items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_alpha(0.8, @variances_mean,@covariances_mean))
|
188
|
+
s.text _("items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_alpha(0.9, @variances_mean,@covariances_mean))
|
189
|
+
itc=item_total_correlation
|
190
|
+
sid=stats_if_deleted
|
191
|
+
is=item_statistics
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
196
|
+
@ds.fields.each do |f|
|
197
|
+
t.row(["#{@ds[f].name}(#{f})", sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f",is[f][:sds]), sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds]), sprintf("%0.5f",itc[f]), sprintf("%0.5f",sid[f][:alpha])])
|
198
|
+
end # end each
|
199
|
+
end # table
|
200
|
+
end # section
|
201
|
+
end # def
|
202
|
+
end # class
|
203
|
+
end # module
|
204
|
+
end # module
|