statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,233 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
|
4
|
+
# Returns several statistics for complete scale and each item
|
5
|
+
# == Usage
|
6
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
7
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
8
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
9
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
11
|
+
# ia = Statsample::Reliability::ScaleAnalysis.new(ds)
|
12
|
+
# puts ia.summary
|
13
|
+
class ScaleAnalysis
|
14
|
+
include Summarizable
|
15
|
+
attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
|
16
|
+
attr_accessor :name
|
17
|
+
attr_accessor :summary_histogram
|
18
|
+
def initialize(ds, opts=Hash.new)
|
19
|
+
@dumped=ds.vectors.to_a.find_all {|f|
|
20
|
+
ds[f].variance == 0
|
21
|
+
}
|
22
|
+
|
23
|
+
@ods = ds
|
24
|
+
@ds = ds.reject_values(*Daru::MISSING_VALUES).dup(ds.vectors.to_a - @dumped)
|
25
|
+
@ds.rename ds.name
|
26
|
+
|
27
|
+
@k = @ds.ncols
|
28
|
+
@total = @ds.vector_sum
|
29
|
+
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
30
|
+
|
31
|
+
@vector_mean = @ds.vector_mean
|
32
|
+
@item_mean = @vector_mean.mean
|
33
|
+
@item_sd = @vector_mean.sd
|
34
|
+
|
35
|
+
@mean = @total.mean
|
36
|
+
@median = @total.median
|
37
|
+
@skew = @total.skew
|
38
|
+
@kurtosis = @total.kurtosis
|
39
|
+
@sd = @total.sd
|
40
|
+
@variance = @total.variance
|
41
|
+
@valid_n = @total.size
|
42
|
+
|
43
|
+
opts_default = {
|
44
|
+
:name => _("Reliability Analysis"),
|
45
|
+
:summary_histogram => true
|
46
|
+
}
|
47
|
+
@opts = opts_default.merge(opts)
|
48
|
+
@opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
|
49
|
+
|
50
|
+
@cov_m=Statsample::Bivariate.covariance_matrix(@ds)
|
51
|
+
# Mean for covariances and variances
|
52
|
+
@variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
|
53
|
+
@variances_mean=@variances.mean
|
54
|
+
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
55
|
+
#begin
|
56
|
+
@alpha = Statsample::Reliability.cronbach_alpha(@ds)
|
57
|
+
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
58
|
+
#rescue => e
|
59
|
+
# raise DatasetException.new(@ds,e), "Error calculating alpha"
|
60
|
+
#end
|
61
|
+
end
|
62
|
+
# Returns a hash with structure
|
63
|
+
def item_characteristic_curve
|
64
|
+
i=0
|
65
|
+
out={}
|
66
|
+
total={}
|
67
|
+
@ds.each do |row|
|
68
|
+
tot=@total[i]
|
69
|
+
@ds.vectors.each do |f|
|
70
|
+
out[f]||= {}
|
71
|
+
total[f]||={}
|
72
|
+
out[f][tot]||= 0
|
73
|
+
total[f][tot]||=0
|
74
|
+
out[f][tot]+= row[f]
|
75
|
+
total[f][tot]+=1
|
76
|
+
end
|
77
|
+
i+=1
|
78
|
+
end
|
79
|
+
total.each do |f,var|
|
80
|
+
var.each do |tot,v|
|
81
|
+
out[f][tot]=out[f][tot].quo(total[f][tot])
|
82
|
+
end
|
83
|
+
end
|
84
|
+
out
|
85
|
+
end
|
86
|
+
# =Adjusted R.P.B. for each item
|
87
|
+
# Adjusted RPB(Point biserial-correlation) for each item
|
88
|
+
#
|
89
|
+
def item_total_correlation
|
90
|
+
vecs = @ds.vectors.to_a
|
91
|
+
@itc ||= vecs.inject({}) do |a,v|
|
92
|
+
total=@ds.vector_sum(vecs - [v])
|
93
|
+
a[v]=Statsample::Bivariate.pearson(@ds[v],total)
|
94
|
+
a
|
95
|
+
end
|
96
|
+
end
|
97
|
+
def mean_rpb
|
98
|
+
Daru::Vector.new(item_total_correlation.values).mean
|
99
|
+
end
|
100
|
+
def item_statistics
|
101
|
+
@is||=@ds.vectors.to_a.inject({}) do |a,v|
|
102
|
+
a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
|
103
|
+
a
|
104
|
+
end
|
105
|
+
end
|
106
|
+
# Returns a dataset with cases ordered by score
|
107
|
+
# and variables ordered by difficulty
|
108
|
+
|
109
|
+
def item_difficulty_analysis
|
110
|
+
dif={}
|
111
|
+
@ds.vectors.each{|f| dif[f]=@ds[f].mean }
|
112
|
+
dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
|
113
|
+
scores_sort={}
|
114
|
+
scores=@ds.vector_mean
|
115
|
+
scores.each_index{ |i| scores_sort[i]=scores[i] }
|
116
|
+
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
117
|
+
ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
|
118
|
+
scores_sort.each do |i,score|
|
119
|
+
row = [i, score]
|
120
|
+
case_row = @ds.row[i].to_hash
|
121
|
+
dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
|
122
|
+
ds_new.add_row(row)
|
123
|
+
end
|
124
|
+
ds_new
|
125
|
+
end
|
126
|
+
|
127
|
+
def stats_if_deleted
|
128
|
+
@sif||=stats_if_deleted_intern
|
129
|
+
end
|
130
|
+
|
131
|
+
def stats_if_deleted_intern # :nodoc:
|
132
|
+
return Hash.new if @ds.ncols == 1
|
133
|
+
vecs = @ds.vectors.to_a
|
134
|
+
vecs.inject({}) do |a,v|
|
135
|
+
cov_2=@cov_m.submatrix(vecs - [v])
|
136
|
+
#ds2=@ds.clone
|
137
|
+
#ds2.delete_vector(v)
|
138
|
+
#total=ds2.vector_sum
|
139
|
+
a[v]={}
|
140
|
+
#a[v][:mean]=total.mean
|
141
|
+
a[v][:mean]=@mean-item_statistics[v][:mean]
|
142
|
+
a[v][:variance_sample]=cov_2.total_sum
|
143
|
+
a[v][:sds]=Math::sqrt(a[v][:variance_sample])
|
144
|
+
n=cov_2.row_size
|
145
|
+
a[v][:alpha] = (n>=2) ? Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov_2) : nil
|
146
|
+
a
|
147
|
+
end
|
148
|
+
end
|
149
|
+
def report_building(builder) #:nodoc:
|
150
|
+
builder.section(:name=>@name) do |s|
|
151
|
+
|
152
|
+
if @dumped.size>0
|
153
|
+
s.section(:name=>"Items with variance=0") do |s1|
|
154
|
+
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
155
|
+
t.row [_("Items"), @ods.ncols]
|
156
|
+
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
157
|
+
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
158
|
+
end
|
159
|
+
s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
|
160
|
+
@dumped.each do |f|
|
161
|
+
t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
|
162
|
+
end
|
163
|
+
end
|
164
|
+
s.parse_element(Statsample::Graph::Histogram.new(@o_total, :name=>"Histogram (complete data) for %s" % @name)) if @summary_histogram
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
s.table(:name=>_("Summary for %s") % @name) do |t|
|
170
|
+
t.row [_("Valid Items"), @ds.ncols]
|
171
|
+
|
172
|
+
t.row [_("Valid cases"), @valid_n]
|
173
|
+
t.row [_("Sum mean"), "%0.4f" % @mean]
|
174
|
+
t.row [_("Sum sd"), "%0.4f" % @sd ]
|
175
|
+
# t.row [_("Sum variance"), "%0.4f" % @variance]
|
176
|
+
t.row [_("Sum median"), @median]
|
177
|
+
t.hr
|
178
|
+
t.row [_("Item mean"), "%0.4f" % @item_mean]
|
179
|
+
t.row [_("Item sd"), "%0.4f" % @item_sd]
|
180
|
+
t.hr
|
181
|
+
t.row [_("Skewness"), "%0.4f" % @skew]
|
182
|
+
t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
|
183
|
+
t.hr
|
184
|
+
t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
|
185
|
+
t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
|
186
|
+
t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
|
187
|
+
|
188
|
+
t.row [_("Variances mean"), "%g" % @variances_mean]
|
189
|
+
t.row [_("Covariances mean") , "%g" % @covariances_mean]
|
190
|
+
end
|
191
|
+
|
192
|
+
if (@alpha)
|
193
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
|
194
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
sid=stats_if_deleted
|
199
|
+
is=item_statistics
|
200
|
+
itc=item_total_correlation
|
201
|
+
|
202
|
+
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
203
|
+
@ds.vectors.each do |f|
|
204
|
+
row=["#{@ds[f].name}(#{f})"]
|
205
|
+
if is[f]
|
206
|
+
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|
207
|
+
else
|
208
|
+
row+=["-","-"]
|
209
|
+
end
|
210
|
+
if sid[f]
|
211
|
+
row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
|
212
|
+
else
|
213
|
+
row+=%w{- - -}
|
214
|
+
end
|
215
|
+
if itc[f]
|
216
|
+
row+= [sprintf("%0.5f",itc[f])]
|
217
|
+
else
|
218
|
+
row+=['-']
|
219
|
+
end
|
220
|
+
if sid[f] and !sid[f][:alpha].nil?
|
221
|
+
row+=[sprintf("%0.5f",sid[f][:alpha])]
|
222
|
+
else
|
223
|
+
row+=["-"]
|
224
|
+
end
|
225
|
+
t.row row
|
226
|
+
end # end each
|
227
|
+
end # table
|
228
|
+
s.parse_element(Statsample::Graph::Histogram.new(@total, :name=>"Histogram (valid data) for %s" % @name)) if @summary_histogram
|
229
|
+
end # section
|
230
|
+
end # def
|
231
|
+
end # class
|
232
|
+
end # module
|
233
|
+
end # module
|
@@ -0,0 +1,114 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Reliability
|
3
|
+
# Analysis of a Skill Scale
|
4
|
+
# Given a dataset with results and a correct answers hash,
|
5
|
+
# generates a ScaleAnalysis
|
6
|
+
# == Usage
|
7
|
+
# x1 = Daru::Vector.new(%{a b b c})
|
8
|
+
# x2 = Daru::Vector.new(%{b a b c})
|
9
|
+
# x3 = Daru::Vector.new(%{a c b a})
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1, :x2 => @x2, :x3 => @x3})
|
11
|
+
# key={ :x1 => 'a',:x2 => 'b', :x3 => 'a'}
|
12
|
+
# ssa=Statsample::Reliability::SkillScaleAnalysis.new(ds,key)
|
13
|
+
# puts ssa.summary
|
14
|
+
class SkillScaleAnalysis
|
15
|
+
include Summarizable
|
16
|
+
attr_accessor :name
|
17
|
+
attr_accessor :summary_minimal_item_correlation
|
18
|
+
attr_accessor :summary_show_problematic_items
|
19
|
+
def initialize(ds,key,opts=Hash.new)
|
20
|
+
opts_default={
|
21
|
+
:name=>_("Skill Scale Reliability Analysis (%s)") % ds.name,
|
22
|
+
:summary_minimal_item_correlation=>0.10,
|
23
|
+
:summary_show_problematic_items=>true
|
24
|
+
}
|
25
|
+
@ds=ds
|
26
|
+
@key=key
|
27
|
+
@opts=opts_default.merge(opts)
|
28
|
+
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
29
|
+
@cds=nil
|
30
|
+
end
|
31
|
+
# Dataset only corrected vectors
|
32
|
+
def corrected_dataset_minimal
|
33
|
+
cds = corrected_dataset
|
34
|
+
dsm = Daru::DataFrame.new(
|
35
|
+
@key.keys.inject({}) do |ac,v|
|
36
|
+
ac[v] = cds[v]
|
37
|
+
ac
|
38
|
+
end
|
39
|
+
)
|
40
|
+
|
41
|
+
dsm.rename _("Corrected dataset from %s") % @ds.name
|
42
|
+
dsm
|
43
|
+
end
|
44
|
+
|
45
|
+
def vector_sum
|
46
|
+
corrected_dataset_minimal.vector_sum
|
47
|
+
end
|
48
|
+
|
49
|
+
def vector_mean
|
50
|
+
corrected_dataset_minimal.vector_mean
|
51
|
+
end
|
52
|
+
|
53
|
+
def scale_analysis
|
54
|
+
sa = ScaleAnalysis.new(corrected_dataset_minimal)
|
55
|
+
sa.name=_("%s (Scale Analysis)") % @name
|
56
|
+
sa
|
57
|
+
end
|
58
|
+
|
59
|
+
def corrected_dataset
|
60
|
+
if @cds.nil?
|
61
|
+
@cds = Daru::DataFrame.new({}, order: @ds.vectors, name: @ds.name)
|
62
|
+
@ds.each_row do |row|
|
63
|
+
out = {}
|
64
|
+
row.each_with_index do |v, k|
|
65
|
+
if @key.has_key? k
|
66
|
+
if @ds[k].reject_values(*Daru::MISSING_VALUES).include_values? v
|
67
|
+
out[k]= @key[k] == v ? 1 : 0
|
68
|
+
else
|
69
|
+
out[k] = nil
|
70
|
+
end
|
71
|
+
else
|
72
|
+
out[k] = v
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
@cds.add_row(Daru::Vector.new(out))
|
77
|
+
end
|
78
|
+
@cds.update
|
79
|
+
end
|
80
|
+
@cds
|
81
|
+
end
|
82
|
+
|
83
|
+
def report_building(builder)
|
84
|
+
builder.section(:name=>@name) do |s|
|
85
|
+
sa = scale_analysis
|
86
|
+
s.parse_element(sa)
|
87
|
+
if summary_show_problematic_items
|
88
|
+
s.section(:name=>_("Problematic Items")) do |spi|
|
89
|
+
count=0
|
90
|
+
sa.item_total_correlation.each do |k,v|
|
91
|
+
if v < summary_minimal_item_correlation
|
92
|
+
count+=1
|
93
|
+
spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
|
94
|
+
spii.text _("Correct answer: %s") % @key[k]
|
95
|
+
spii.text _("p: %0.3f") % corrected_dataset[k].mean
|
96
|
+
props=@ds[k].proportions.inject({}) {|ac,v| ac[v[0]] = v[1].to_f;ac}
|
97
|
+
|
98
|
+
spi.table(:name=>"Proportions",:header=>[_("Value"), _("%")]) do |table|
|
99
|
+
props.each do |k1,v|
|
100
|
+
table.row [ @ds[k].index_of(k1), "%0.3f" % v]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
spi.text _("No problematic items") if count==0
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Resample
|
3
|
+
class << self
|
4
|
+
def repeat_and_save(times,&action)
|
5
|
+
(1..times).inject([]) {|a,x| a.push(action.call); a}
|
6
|
+
end
|
7
|
+
|
8
|
+
def generate (size,low,upper)
|
9
|
+
range=upper-low+1
|
10
|
+
Daru::Vector.new((0...size).collect {|x| rand(range)+low })
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
class Object
|
2
|
+
# Shorthand for Statsample::Analysis.store(*args,&block)
|
3
|
+
def ss_analysis(*args,&block)
|
4
|
+
Statsample::Analysis.store(*args,&block)
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
module Statsample
|
9
|
+
# Module which provide shorthands for many methods.
|
10
|
+
module Shorthand
|
11
|
+
###
|
12
|
+
# :section: R like methods
|
13
|
+
###
|
14
|
+
|
15
|
+
# Import an Excel file. Cache result by default
|
16
|
+
def read_excel(filename, opts=Hash.new)
|
17
|
+
Daru::DataFrame.from_excel filename, opts
|
18
|
+
end
|
19
|
+
|
20
|
+
# Import an CSV file. Cache result by default
|
21
|
+
def read_csv(filename, opts=Hash.new)
|
22
|
+
Daru::DataFrame.from_csv filename, opts
|
23
|
+
end
|
24
|
+
|
25
|
+
# Retrieve names (fields) from dataset
|
26
|
+
def names(ds)
|
27
|
+
ds.vectors.to_a
|
28
|
+
end
|
29
|
+
# Create a correlation matrix from a dataset
|
30
|
+
def cor(ds)
|
31
|
+
Statsample::Bivariate.correlation_matrix(ds)
|
32
|
+
end
|
33
|
+
# Create a variance/covariance matrix from a dataset
|
34
|
+
def cov(ds)
|
35
|
+
Statsample::Bivariate.covariate_matrix(ds)
|
36
|
+
end
|
37
|
+
# Create a Daru::Vector
|
38
|
+
# Analog to R's c
|
39
|
+
def vector(*args)
|
40
|
+
Daru::Vector[*args]
|
41
|
+
end
|
42
|
+
# Random generation for the normal distribution
|
43
|
+
def rnorm(n,mean=0,sd=1)
|
44
|
+
rng=Distribution::Normal.rng(mean,sd)
|
45
|
+
Daru::Vector.new_with_size(n) { rng.call}
|
46
|
+
end
|
47
|
+
# Creates a new Daru::DataFrame
|
48
|
+
# Each key is transformed into a Symbol wherever possible.
|
49
|
+
def dataset(vectors=Hash.new)
|
50
|
+
vectors = vectors.inject({}) do |ac,v|
|
51
|
+
n = v[0].respond_to?(:to_sym) ? v[0].to_sym : v[0]
|
52
|
+
ac[n] = v[1]
|
53
|
+
ac
|
54
|
+
end
|
55
|
+
Daru::DataFrame.new(vectors)
|
56
|
+
end
|
57
|
+
alias :data_frame :dataset
|
58
|
+
# Returns a Statsample::Graph::Boxplot
|
59
|
+
def boxplot(*args)
|
60
|
+
Statsample::Graph::Boxplot.new(*args)
|
61
|
+
end
|
62
|
+
# Returns a Statsample::Graph::Histogram
|
63
|
+
def histogram(*args)
|
64
|
+
Statsample::Graph::Histogram.new(*args)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns a Statsample::Graph::Scatterplot
|
68
|
+
def scatterplot(*args)
|
69
|
+
Statsample::Graph::Scatterplot.new(*args)
|
70
|
+
end
|
71
|
+
# Returns a Statsample::Test::Levene
|
72
|
+
def levene(*args)
|
73
|
+
Statsample::Test::Levene.new(*args)
|
74
|
+
end
|
75
|
+
|
76
|
+
def principal_axis(*args)
|
77
|
+
Statsample::Factor::PrincipalAxis.new(*args)
|
78
|
+
end
|
79
|
+
|
80
|
+
def polychoric(*args)
|
81
|
+
Statsample::Bivariate::Polychoric.new(*args)
|
82
|
+
end
|
83
|
+
|
84
|
+
def tetrachoric(*args)
|
85
|
+
Statsample::Bivariate::Tetrachoric.new(*args)
|
86
|
+
end
|
87
|
+
|
88
|
+
###
|
89
|
+
# Other Shortcuts
|
90
|
+
###
|
91
|
+
def lr(*args)
|
92
|
+
Statsample::Regression.multiple(*args)
|
93
|
+
end
|
94
|
+
|
95
|
+
def pca(ds,opts=Hash.new)
|
96
|
+
Statsample::Factor::PCA.new(ds,opts)
|
97
|
+
end
|
98
|
+
|
99
|
+
def dominance_analysis(*args)
|
100
|
+
Statsample::DominanceAnalysis.new(*args)
|
101
|
+
end
|
102
|
+
|
103
|
+
def dominance_analysis_bootstrap(*args)
|
104
|
+
Statsample::DominanceAnalysis::Bootstrap.new(*args)
|
105
|
+
end
|
106
|
+
|
107
|
+
def scale_analysis(*args)
|
108
|
+
Statsample::Reliability::ScaleAnalysis.new(*args)
|
109
|
+
end
|
110
|
+
|
111
|
+
def skill_scale_analysis(*args)
|
112
|
+
Statsample::Reliability::SkillScaleAnalysis.new(*args)
|
113
|
+
end
|
114
|
+
|
115
|
+
def multiscale_analysis(*args,&block)
|
116
|
+
Statsample::Reliability::MultiScaleAnalysis.new(*args,&block)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_u(*args)
|
120
|
+
Statsample::Test::UMannWhitney.new(*args)
|
121
|
+
end
|
122
|
+
|
123
|
+
module_function :test_u, :rnorm
|
124
|
+
end
|
125
|
+
end
|