statsample-ekatena 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
module Reliability
|
|
3
|
+
# Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
|
|
4
|
+
# Returns several statistics for complete scale and each item
|
|
5
|
+
# == Usage
|
|
6
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
|
7
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
|
8
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
|
9
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
|
11
|
+
# ia = Statsample::Reliability::ScaleAnalysis.new(ds)
|
|
12
|
+
# puts ia.summary
|
|
13
|
+
class ScaleAnalysis
|
|
14
|
+
include Summarizable
|
|
15
|
+
attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
|
|
16
|
+
attr_accessor :name
|
|
17
|
+
attr_accessor :summary_histogram
|
|
18
|
+
def initialize(ds, opts=Hash.new)
|
|
19
|
+
@dumped=ds.vectors.to_a.find_all {|f|
|
|
20
|
+
ds[f].variance == 0
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@ods = ds
|
|
24
|
+
@ds = ds.reject_values(*Daru::MISSING_VALUES).dup(ds.vectors.to_a - @dumped)
|
|
25
|
+
@ds.rename ds.name
|
|
26
|
+
|
|
27
|
+
@k = @ds.ncols
|
|
28
|
+
@total = @ds.vector_sum
|
|
29
|
+
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
|
30
|
+
|
|
31
|
+
@vector_mean = @ds.vector_mean
|
|
32
|
+
@item_mean = @vector_mean.mean
|
|
33
|
+
@item_sd = @vector_mean.sd
|
|
34
|
+
|
|
35
|
+
@mean = @total.mean
|
|
36
|
+
@median = @total.median
|
|
37
|
+
@skew = @total.skew
|
|
38
|
+
@kurtosis = @total.kurtosis
|
|
39
|
+
@sd = @total.sd
|
|
40
|
+
@variance = @total.variance
|
|
41
|
+
@valid_n = @total.size
|
|
42
|
+
|
|
43
|
+
opts_default = {
|
|
44
|
+
:name => _("Reliability Analysis"),
|
|
45
|
+
:summary_histogram => true
|
|
46
|
+
}
|
|
47
|
+
@opts = opts_default.merge(opts)
|
|
48
|
+
@opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
|
|
49
|
+
|
|
50
|
+
@cov_m=Statsample::Bivariate.covariance_matrix(@ds)
|
|
51
|
+
# Mean for covariances and variances
|
|
52
|
+
@variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
|
|
53
|
+
@variances_mean=@variances.mean
|
|
54
|
+
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
|
55
|
+
#begin
|
|
56
|
+
@alpha = Statsample::Reliability.cronbach_alpha(@ds)
|
|
57
|
+
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
|
58
|
+
#rescue => e
|
|
59
|
+
# raise DatasetException.new(@ds,e), "Error calculating alpha"
|
|
60
|
+
#end
|
|
61
|
+
end
|
|
62
|
+
# Returns a hash with structure
|
|
63
|
+
def item_characteristic_curve
|
|
64
|
+
i=0
|
|
65
|
+
out={}
|
|
66
|
+
total={}
|
|
67
|
+
@ds.each do |row|
|
|
68
|
+
tot=@total[i]
|
|
69
|
+
@ds.vectors.each do |f|
|
|
70
|
+
out[f]||= {}
|
|
71
|
+
total[f]||={}
|
|
72
|
+
out[f][tot]||= 0
|
|
73
|
+
total[f][tot]||=0
|
|
74
|
+
out[f][tot]+= row[f]
|
|
75
|
+
total[f][tot]+=1
|
|
76
|
+
end
|
|
77
|
+
i+=1
|
|
78
|
+
end
|
|
79
|
+
total.each do |f,var|
|
|
80
|
+
var.each do |tot,v|
|
|
81
|
+
out[f][tot]=out[f][tot].quo(total[f][tot])
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
out
|
|
85
|
+
end
|
|
86
|
+
# =Adjusted R.P.B. for each item
|
|
87
|
+
# Adjusted RPB(Point biserial-correlation) for each item
|
|
88
|
+
#
|
|
89
|
+
def item_total_correlation
|
|
90
|
+
vecs = @ds.vectors.to_a
|
|
91
|
+
@itc ||= vecs.inject({}) do |a,v|
|
|
92
|
+
total=@ds.vector_sum(vecs - [v])
|
|
93
|
+
a[v]=Statsample::Bivariate.pearson(@ds[v],total)
|
|
94
|
+
a
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
def mean_rpb
|
|
98
|
+
Daru::Vector.new(item_total_correlation.values).mean
|
|
99
|
+
end
|
|
100
|
+
def item_statistics
|
|
101
|
+
@is||=@ds.vectors.to_a.inject({}) do |a,v|
|
|
102
|
+
a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
|
|
103
|
+
a
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
# Returns a dataset with cases ordered by score
|
|
107
|
+
# and variables ordered by difficulty
|
|
108
|
+
|
|
109
|
+
def item_difficulty_analysis
|
|
110
|
+
dif={}
|
|
111
|
+
@ds.vectors.each{|f| dif[f]=@ds[f].mean }
|
|
112
|
+
dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
|
|
113
|
+
scores_sort={}
|
|
114
|
+
scores=@ds.vector_mean
|
|
115
|
+
scores.each_index{ |i| scores_sort[i]=scores[i] }
|
|
116
|
+
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
|
117
|
+
ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
|
|
118
|
+
scores_sort.each do |i,score|
|
|
119
|
+
row = [i, score]
|
|
120
|
+
case_row = @ds.row[i].to_hash
|
|
121
|
+
dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
|
|
122
|
+
ds_new.add_row(row)
|
|
123
|
+
end
|
|
124
|
+
ds_new
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def stats_if_deleted
|
|
128
|
+
@sif||=stats_if_deleted_intern
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def stats_if_deleted_intern # :nodoc:
|
|
132
|
+
return Hash.new if @ds.ncols == 1
|
|
133
|
+
vecs = @ds.vectors.to_a
|
|
134
|
+
vecs.inject({}) do |a,v|
|
|
135
|
+
cov_2=@cov_m.submatrix(vecs - [v])
|
|
136
|
+
#ds2=@ds.clone
|
|
137
|
+
#ds2.delete_vector(v)
|
|
138
|
+
#total=ds2.vector_sum
|
|
139
|
+
a[v]={}
|
|
140
|
+
#a[v][:mean]=total.mean
|
|
141
|
+
a[v][:mean]=@mean-item_statistics[v][:mean]
|
|
142
|
+
a[v][:variance_sample]=cov_2.total_sum
|
|
143
|
+
a[v][:sds]=Math::sqrt(a[v][:variance_sample])
|
|
144
|
+
n=cov_2.row_size
|
|
145
|
+
a[v][:alpha] = (n>=2) ? Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov_2) : nil
|
|
146
|
+
a
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
def report_building(builder) #:nodoc:
|
|
150
|
+
builder.section(:name=>@name) do |s|
|
|
151
|
+
|
|
152
|
+
if @dumped.size>0
|
|
153
|
+
s.section(:name=>"Items with variance=0") do |s1|
|
|
154
|
+
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
|
155
|
+
t.row [_("Items"), @ods.ncols]
|
|
156
|
+
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
|
157
|
+
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
|
158
|
+
end
|
|
159
|
+
s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
|
|
160
|
+
@dumped.each do |f|
|
|
161
|
+
t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
s.parse_element(Statsample::Graph::Histogram.new(@o_total, :name=>"Histogram (complete data) for %s" % @name)) if @summary_histogram
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
s.table(:name=>_("Summary for %s") % @name) do |t|
|
|
170
|
+
t.row [_("Valid Items"), @ds.ncols]
|
|
171
|
+
|
|
172
|
+
t.row [_("Valid cases"), @valid_n]
|
|
173
|
+
t.row [_("Sum mean"), "%0.4f" % @mean]
|
|
174
|
+
t.row [_("Sum sd"), "%0.4f" % @sd ]
|
|
175
|
+
# t.row [_("Sum variance"), "%0.4f" % @variance]
|
|
176
|
+
t.row [_("Sum median"), @median]
|
|
177
|
+
t.hr
|
|
178
|
+
t.row [_("Item mean"), "%0.4f" % @item_mean]
|
|
179
|
+
t.row [_("Item sd"), "%0.4f" % @item_sd]
|
|
180
|
+
t.hr
|
|
181
|
+
t.row [_("Skewness"), "%0.4f" % @skew]
|
|
182
|
+
t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
|
|
183
|
+
t.hr
|
|
184
|
+
t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
|
|
185
|
+
t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
|
|
186
|
+
t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
|
|
187
|
+
|
|
188
|
+
t.row [_("Variances mean"), "%g" % @variances_mean]
|
|
189
|
+
t.row [_("Covariances mean") , "%g" % @covariances_mean]
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
if (@alpha)
|
|
193
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
|
|
194
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
sid=stats_if_deleted
|
|
199
|
+
is=item_statistics
|
|
200
|
+
itc=item_total_correlation
|
|
201
|
+
|
|
202
|
+
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
|
203
|
+
@ds.vectors.each do |f|
|
|
204
|
+
row=["#{@ds[f].name}(#{f})"]
|
|
205
|
+
if is[f]
|
|
206
|
+
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|
|
207
|
+
else
|
|
208
|
+
row+=["-","-"]
|
|
209
|
+
end
|
|
210
|
+
if sid[f]
|
|
211
|
+
row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
|
|
212
|
+
else
|
|
213
|
+
row+=%w{- - -}
|
|
214
|
+
end
|
|
215
|
+
if itc[f]
|
|
216
|
+
row+= [sprintf("%0.5f",itc[f])]
|
|
217
|
+
else
|
|
218
|
+
row+=['-']
|
|
219
|
+
end
|
|
220
|
+
if sid[f] and !sid[f][:alpha].nil?
|
|
221
|
+
row+=[sprintf("%0.5f",sid[f][:alpha])]
|
|
222
|
+
else
|
|
223
|
+
row+=["-"]
|
|
224
|
+
end
|
|
225
|
+
t.row row
|
|
226
|
+
end # end each
|
|
227
|
+
end # table
|
|
228
|
+
s.parse_element(Statsample::Graph::Histogram.new(@total, :name=>"Histogram (valid data) for %s" % @name)) if @summary_histogram
|
|
229
|
+
end # section
|
|
230
|
+
end # def
|
|
231
|
+
end # class
|
|
232
|
+
end # module
|
|
233
|
+
end # module
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
module Reliability
|
|
3
|
+
# Analysis of a Skill Scale
|
|
4
|
+
# Given a dataset with results and a correct answers hash,
|
|
5
|
+
# generates a ScaleAnalysis
|
|
6
|
+
# == Usage
|
|
7
|
+
# x1 = Daru::Vector.new(%{a b b c})
|
|
8
|
+
# x2 = Daru::Vector.new(%{b a b c})
|
|
9
|
+
# x3 = Daru::Vector.new(%{a c b a})
|
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1, :x2 => @x2, :x3 => @x3})
|
|
11
|
+
# key={ :x1 => 'a',:x2 => 'b', :x3 => 'a'}
|
|
12
|
+
# ssa=Statsample::Reliability::SkillScaleAnalysis.new(ds,key)
|
|
13
|
+
# puts ssa.summary
|
|
14
|
+
class SkillScaleAnalysis
|
|
15
|
+
include Summarizable
|
|
16
|
+
attr_accessor :name
|
|
17
|
+
attr_accessor :summary_minimal_item_correlation
|
|
18
|
+
attr_accessor :summary_show_problematic_items
|
|
19
|
+
def initialize(ds,key,opts=Hash.new)
|
|
20
|
+
opts_default={
|
|
21
|
+
:name=>_("Skill Scale Reliability Analysis (%s)") % ds.name,
|
|
22
|
+
:summary_minimal_item_correlation=>0.10,
|
|
23
|
+
:summary_show_problematic_items=>true
|
|
24
|
+
}
|
|
25
|
+
@ds=ds
|
|
26
|
+
@key=key
|
|
27
|
+
@opts=opts_default.merge(opts)
|
|
28
|
+
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
|
29
|
+
@cds=nil
|
|
30
|
+
end
|
|
31
|
+
# Dataset only corrected vectors
|
|
32
|
+
def corrected_dataset_minimal
|
|
33
|
+
cds = corrected_dataset
|
|
34
|
+
dsm = Daru::DataFrame.new(
|
|
35
|
+
@key.keys.inject({}) do |ac,v|
|
|
36
|
+
ac[v] = cds[v]
|
|
37
|
+
ac
|
|
38
|
+
end
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
dsm.rename _("Corrected dataset from %s") % @ds.name
|
|
42
|
+
dsm
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def vector_sum
|
|
46
|
+
corrected_dataset_minimal.vector_sum
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def vector_mean
|
|
50
|
+
corrected_dataset_minimal.vector_mean
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def scale_analysis
|
|
54
|
+
sa = ScaleAnalysis.new(corrected_dataset_minimal)
|
|
55
|
+
sa.name=_("%s (Scale Analysis)") % @name
|
|
56
|
+
sa
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def corrected_dataset
|
|
60
|
+
if @cds.nil?
|
|
61
|
+
@cds = Daru::DataFrame.new({}, order: @ds.vectors, name: @ds.name)
|
|
62
|
+
@ds.each_row do |row|
|
|
63
|
+
out = {}
|
|
64
|
+
row.each_with_index do |v, k|
|
|
65
|
+
if @key.has_key? k
|
|
66
|
+
if @ds[k].reject_values(*Daru::MISSING_VALUES).include_values? v
|
|
67
|
+
out[k]= @key[k] == v ? 1 : 0
|
|
68
|
+
else
|
|
69
|
+
out[k] = nil
|
|
70
|
+
end
|
|
71
|
+
else
|
|
72
|
+
out[k] = v
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
@cds.add_row(Daru::Vector.new(out))
|
|
77
|
+
end
|
|
78
|
+
@cds.update
|
|
79
|
+
end
|
|
80
|
+
@cds
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def report_building(builder)
|
|
84
|
+
builder.section(:name=>@name) do |s|
|
|
85
|
+
sa = scale_analysis
|
|
86
|
+
s.parse_element(sa)
|
|
87
|
+
if summary_show_problematic_items
|
|
88
|
+
s.section(:name=>_("Problematic Items")) do |spi|
|
|
89
|
+
count=0
|
|
90
|
+
sa.item_total_correlation.each do |k,v|
|
|
91
|
+
if v < summary_minimal_item_correlation
|
|
92
|
+
count+=1
|
|
93
|
+
spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
|
|
94
|
+
spii.text _("Correct answer: %s") % @key[k]
|
|
95
|
+
spii.text _("p: %0.3f") % corrected_dataset[k].mean
|
|
96
|
+
props=@ds[k].proportions.inject({}) {|ac,v| ac[v[0]] = v[1].to_f;ac}
|
|
97
|
+
|
|
98
|
+
spi.table(:name=>"Proportions",:header=>[_("Value"), _("%")]) do |table|
|
|
99
|
+
props.each do |k1,v|
|
|
100
|
+
table.row [ @ds[k].index_of(k1), "%0.3f" % v]
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
spi.text _("No problematic items") if count==0
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
module Resample
|
|
3
|
+
class << self
|
|
4
|
+
def repeat_and_save(times,&action)
|
|
5
|
+
(1..times).inject([]) {|a,x| a.push(action.call); a}
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def generate (size,low,upper)
|
|
9
|
+
range=upper-low+1
|
|
10
|
+
Daru::Vector.new((0...size).collect {|x| rand(range)+low })
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
class Object
|
|
2
|
+
# Shorthand for Statsample::Analysis.store(*args,&block)
|
|
3
|
+
def ss_analysis(*args,&block)
|
|
4
|
+
Statsample::Analysis.store(*args,&block)
|
|
5
|
+
end
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
module Statsample
|
|
9
|
+
# Module which provide shorthands for many methods.
|
|
10
|
+
module Shorthand
|
|
11
|
+
###
|
|
12
|
+
# :section: R like methods
|
|
13
|
+
###
|
|
14
|
+
|
|
15
|
+
# Import an Excel file. Cache result by default
|
|
16
|
+
def read_excel(filename, opts=Hash.new)
|
|
17
|
+
Daru::DataFrame.from_excel filename, opts
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Import an CSV file. Cache result by default
|
|
21
|
+
def read_csv(filename, opts=Hash.new)
|
|
22
|
+
Daru::DataFrame.from_csv filename, opts
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Retrieve names (fields) from dataset
|
|
26
|
+
def names(ds)
|
|
27
|
+
ds.vectors.to_a
|
|
28
|
+
end
|
|
29
|
+
# Create a correlation matrix from a dataset
|
|
30
|
+
def cor(ds)
|
|
31
|
+
Statsample::Bivariate.correlation_matrix(ds)
|
|
32
|
+
end
|
|
33
|
+
# Create a variance/covariance matrix from a dataset
|
|
34
|
+
def cov(ds)
|
|
35
|
+
Statsample::Bivariate.covariate_matrix(ds)
|
|
36
|
+
end
|
|
37
|
+
# Create a Daru::Vector
|
|
38
|
+
# Analog to R's c
|
|
39
|
+
def vector(*args)
|
|
40
|
+
Daru::Vector[*args]
|
|
41
|
+
end
|
|
42
|
+
# Random generation for the normal distribution
|
|
43
|
+
def rnorm(n,mean=0,sd=1)
|
|
44
|
+
rng=Distribution::Normal.rng(mean,sd)
|
|
45
|
+
Daru::Vector.new_with_size(n) { rng.call}
|
|
46
|
+
end
|
|
47
|
+
# Creates a new Daru::DataFrame
|
|
48
|
+
# Each key is transformed into a Symbol wherever possible.
|
|
49
|
+
def dataset(vectors=Hash.new)
|
|
50
|
+
vectors = vectors.inject({}) do |ac,v|
|
|
51
|
+
n = v[0].respond_to?(:to_sym) ? v[0].to_sym : v[0]
|
|
52
|
+
ac[n] = v[1]
|
|
53
|
+
ac
|
|
54
|
+
end
|
|
55
|
+
Daru::DataFrame.new(vectors)
|
|
56
|
+
end
|
|
57
|
+
alias :data_frame :dataset
|
|
58
|
+
# Returns a Statsample::Graph::Boxplot
|
|
59
|
+
def boxplot(*args)
|
|
60
|
+
Statsample::Graph::Boxplot.new(*args)
|
|
61
|
+
end
|
|
62
|
+
# Returns a Statsample::Graph::Histogram
|
|
63
|
+
def histogram(*args)
|
|
64
|
+
Statsample::Graph::Histogram.new(*args)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Returns a Statsample::Graph::Scatterplot
|
|
68
|
+
def scatterplot(*args)
|
|
69
|
+
Statsample::Graph::Scatterplot.new(*args)
|
|
70
|
+
end
|
|
71
|
+
# Returns a Statsample::Test::Levene
|
|
72
|
+
def levene(*args)
|
|
73
|
+
Statsample::Test::Levene.new(*args)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def principal_axis(*args)
|
|
77
|
+
Statsample::Factor::PrincipalAxis.new(*args)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def polychoric(*args)
|
|
81
|
+
Statsample::Bivariate::Polychoric.new(*args)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def tetrachoric(*args)
|
|
85
|
+
Statsample::Bivariate::Tetrachoric.new(*args)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
###
|
|
89
|
+
# Other Shortcuts
|
|
90
|
+
###
|
|
91
|
+
def lr(*args)
|
|
92
|
+
Statsample::Regression.multiple(*args)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def pca(ds,opts=Hash.new)
|
|
96
|
+
Statsample::Factor::PCA.new(ds,opts)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def dominance_analysis(*args)
|
|
100
|
+
Statsample::DominanceAnalysis.new(*args)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def dominance_analysis_bootstrap(*args)
|
|
104
|
+
Statsample::DominanceAnalysis::Bootstrap.new(*args)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def scale_analysis(*args)
|
|
108
|
+
Statsample::Reliability::ScaleAnalysis.new(*args)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def skill_scale_analysis(*args)
|
|
112
|
+
Statsample::Reliability::SkillScaleAnalysis.new(*args)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def multiscale_analysis(*args,&block)
|
|
116
|
+
Statsample::Reliability::MultiScaleAnalysis.new(*args,&block)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def test_u(*args)
|
|
120
|
+
Statsample::Test::UMannWhitney.new(*args)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
module_function :test_u, :rnorm
|
|
124
|
+
end
|
|
125
|
+
end
|