statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -59,8 +59,6 @@ class MatrixEngine < BaseEngine
|
|
59
59
|
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
|
60
60
|
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
|
61
61
|
|
62
|
-
|
63
|
-
|
64
62
|
@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
|
65
63
|
|
66
64
|
@x_sd=@n_predictors.times.inject({}) {|ac,i|
|
@@ -77,14 +75,14 @@ class MatrixEngine < BaseEngine
|
|
77
75
|
@y_mean=0.0
|
78
76
|
@name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
|
79
77
|
|
80
|
-
opts_default={:digits=>3}
|
81
|
-
opts=opts_default.merge opts
|
78
|
+
opts_default = {:digits=>3}
|
79
|
+
opts = opts_default.merge opts
|
82
80
|
opts.each{|k,v|
|
83
81
|
self.send("#{k}=",v) if self.respond_to? k
|
84
82
|
}
|
85
83
|
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
|
86
84
|
|
87
|
-
if matrix._type
|
85
|
+
if matrix._type == :covariance
|
88
86
|
@coeffs=result_matrix.column(0).to_a
|
89
87
|
@coeffs_stan=coeffs.collect {|k,v|
|
90
88
|
coeffs[k]*@x_sd[k].quo(@y_sd)
|
@@ -116,12 +114,12 @@ class MatrixEngine < BaseEngine
|
|
116
114
|
end
|
117
115
|
# Value of constant
|
118
116
|
def constant
|
119
|
-
c=coeffs
|
120
|
-
@y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
|
117
|
+
c = coeffs
|
118
|
+
@y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
|
121
119
|
end
|
122
120
|
# Hash of b or raw coefficients
|
123
121
|
def coeffs
|
124
|
-
assign_names(@coeffs)
|
122
|
+
assign_names(@coeffs)
|
125
123
|
end
|
126
124
|
# Hash of beta or standarized coefficients
|
127
125
|
|
@@ -185,7 +183,7 @@ class MatrixEngine < BaseEngine
|
|
185
183
|
sd[:constant]=0
|
186
184
|
fields=[:constant]+@matrix_cov.fields-[@y_var]
|
187
185
|
# Recreate X'X using the variance-covariance matrix
|
188
|
-
xt_x
|
186
|
+
xt_x=::Matrix.rows(fields.collect {|i|
|
189
187
|
fields.collect {|j|
|
190
188
|
if i==:constant or j==:constant
|
191
189
|
cov=0
|
@@ -8,76 +8,74 @@ module Multiple
|
|
8
8
|
#
|
9
9
|
# Example:
|
10
10
|
#
|
11
|
-
# @a=[1,3,2,4,3,5,4,6,5,7]
|
12
|
-
# @b=[3,3,4,4,5,5,6,6,4,4]
|
13
|
-
# @c=[11,22,30,40,50,65,78,79,99,100]
|
14
|
-
# @y=[3,4,5,6,7,8,9,10,20,30]
|
15
|
-
# ds={
|
16
|
-
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds
|
11
|
+
# @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
|
12
|
+
# @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
|
13
|
+
# @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
|
14
|
+
# @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
|
15
|
+
# ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
|
16
|
+
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
|
17
17
|
|
18
18
|
class RubyEngine < MatrixEngine
|
19
19
|
def initialize(ds,y_var, opts=Hash.new)
|
20
|
-
matrix=
|
21
|
-
fields_indep=ds.
|
22
|
-
default={
|
23
|
-
:y_mean=>ds[y_var].mean,
|
24
|
-
:x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
-
:y_sd=>ds[y_var].sd,
|
26
|
-
:x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
-
:cases=>Statsample::Bivariate.min_n_valid(ds)
|
20
|
+
matrix = Statsample::Bivariate.correlation_matrix ds
|
21
|
+
fields_indep=ds.vectors.to_a - [y_var]
|
22
|
+
default= {
|
23
|
+
:y_mean => ds[y_var].mean,
|
24
|
+
:x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
+
:y_sd => ds[y_var].sd,
|
26
|
+
:x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
+
:cases => Statsample::Bivariate.min_n_valid(ds)
|
28
28
|
}
|
29
|
-
opts=opts.merge(default)
|
29
|
+
opts = opts.merge(default)
|
30
30
|
super(matrix, y_var, opts)
|
31
|
-
@ds=ds
|
32
|
-
@dy=ds[@y_var]
|
33
|
-
@ds_valid=ds.dup_only_valid
|
34
|
-
@total_cases
|
35
|
-
@valid_cases
|
36
|
-
@ds_indep
|
31
|
+
@ds = ds
|
32
|
+
@dy = ds[@y_var]
|
33
|
+
@ds_valid = ds.dup_only_valid
|
34
|
+
@total_cases = @ds.nrows
|
35
|
+
@valid_cases = @ds_valid.nrows
|
36
|
+
@ds_indep = ds.dup(ds.vectors.to_a - [y_var])
|
37
37
|
set_dep_columns
|
38
38
|
end
|
39
39
|
|
40
40
|
def set_dep_columns
|
41
|
-
@dep_columns=[]
|
42
|
-
@ds_indep.each_vector{|
|
43
|
-
@dep_columns.push(v.data_with_nils)
|
44
|
-
}
|
41
|
+
@dep_columns = []
|
42
|
+
@ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
|
45
43
|
end
|
46
44
|
|
47
45
|
def fix_with_mean
|
48
46
|
i=0
|
49
|
-
@ds_indep.each do |row|
|
47
|
+
@ds_indep.each(:row) do |row|
|
50
48
|
empty=[]
|
51
49
|
row.each do |k,v|
|
52
50
|
empty.push(k) if v.nil?
|
53
51
|
end
|
52
|
+
|
54
53
|
if empty.size==1
|
55
54
|
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
56
55
|
end
|
57
|
-
i+=1
|
56
|
+
i += 1
|
58
57
|
end
|
59
|
-
@ds_indep.
|
58
|
+
@ds_indep.update
|
60
59
|
set_dep_columns
|
61
60
|
end
|
62
61
|
def fix_with_regression
|
63
|
-
i=0
|
64
|
-
@ds_indep.each
|
65
|
-
empty=[]
|
66
|
-
row.each{|k,v|
|
67
|
-
empty.push(k) if v.nil?
|
68
|
-
}
|
62
|
+
i = 0
|
63
|
+
@ds_indep.each(:row) do |row|
|
64
|
+
empty = []
|
65
|
+
row.each { |k,v| empty.push(k) if v.nil? }
|
69
66
|
if empty.size==1
|
70
|
-
field=empty[0]
|
71
|
-
lr=MultipleRegression.new(@ds_indep,field)
|
72
|
-
fields=[]
|
73
|
-
@ds_indep.
|
74
|
-
|
67
|
+
field = empty[0]
|
68
|
+
lr = MultipleRegression.new(@ds_indep,field)
|
69
|
+
fields = []
|
70
|
+
@ds_indep.vectors.each { |f|
|
71
|
+
fields.push(row[f]) unless f == field
|
75
72
|
}
|
73
|
+
|
76
74
|
@ds_indep[field][i]=lr.process(fields)
|
77
75
|
end
|
78
76
|
i+=1
|
79
|
-
|
80
|
-
@ds_indep.
|
77
|
+
end
|
78
|
+
@ds_indep.update
|
81
79
|
set_dep_columns
|
82
80
|
end
|
83
81
|
# Standard error for constant
|
@@ -4,30 +4,30 @@ module Statsample
|
|
4
4
|
# Calculate Chonbach's alpha for a given dataset.
|
5
5
|
# only uses tuples without missing data
|
6
6
|
def cronbach_alpha(ods)
|
7
|
-
ds=ods.dup_only_valid
|
8
|
-
n_items=ds.
|
9
|
-
return nil if n_items<=1
|
10
|
-
s2_items=ds.
|
11
|
-
|
12
|
-
total=ds.vector_sum
|
7
|
+
ds = ods.dup_only_valid
|
8
|
+
n_items = ds.ncols
|
9
|
+
return nil if n_items <= 1
|
10
|
+
s2_items = ds.to_hash.values.inject(0) { |ac,v|
|
11
|
+
ac + v.variance }
|
12
|
+
total = ds.vector_sum
|
13
13
|
|
14
|
-
(n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
|
14
|
+
(n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
|
15
15
|
end
|
16
16
|
# Calculate Chonbach's alpha for a given dataset
|
17
17
|
# using standarized values for every vector.
|
18
18
|
# Only uses tuples without missing data
|
19
19
|
# Return nil if one or more vectors has 0 variance
|
20
20
|
def cronbach_alpha_standarized(ods)
|
21
|
+
ds = ods.dup_only_valid
|
22
|
+
return nil if ds.any? { |v| v.variance==0}
|
21
23
|
|
22
|
-
ds=
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
}.to_dataset
|
30
|
-
|
24
|
+
ds = Daru::DataFrame.new(
|
25
|
+
ds.vectors.to_a.inject({}) { |a,i|
|
26
|
+
a[i] = ods[i].standardize
|
27
|
+
a
|
28
|
+
}
|
29
|
+
)
|
30
|
+
|
31
31
|
cronbach_alpha(ds)
|
32
32
|
end
|
33
33
|
# Predicted reliability of a test by replicating
|
@@ -54,10 +54,10 @@ module Statsample
|
|
54
54
|
end
|
55
55
|
# Get Cronbach's alpha from a covariance matrix
|
56
56
|
def cronbach_alpha_from_covariance_matrix(cov)
|
57
|
-
n=cov.row_size
|
57
|
+
n = cov.row_size
|
58
58
|
raise "covariance matrix should have at least 2 variables" if n < 2
|
59
|
-
s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
|
60
|
-
(n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
|
59
|
+
s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
|
60
|
+
(n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
|
61
61
|
end
|
62
62
|
# Returns n necessary to obtain specific alpha
|
63
63
|
# given variance and covariance mean of items
|
@@ -82,8 +82,6 @@ module Statsample
|
|
82
82
|
end
|
83
83
|
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
84
84
|
dif=c_a - alpha
|
85
|
-
#puts "#{n} , #{c_a}"
|
86
|
-
|
87
85
|
end
|
88
86
|
n
|
89
87
|
end
|
@@ -110,20 +108,20 @@ module Statsample
|
|
110
108
|
attr_reader :totals, :counts, :vector_total
|
111
109
|
def initialize (ds, vector_total=nil)
|
112
110
|
vector_total||=ds.vector_sum
|
113
|
-
raise ArgumentError, "Total size != Dataset size" if vector_total.size!=ds.
|
111
|
+
raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
|
114
112
|
@vector_total=vector_total
|
115
113
|
@ds=ds
|
116
114
|
@totals={}
|
117
|
-
@counts=@ds.
|
115
|
+
@counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
|
118
116
|
process
|
119
117
|
end
|
120
118
|
def process
|
121
119
|
i=0
|
122
|
-
@ds.
|
120
|
+
@ds.each_row do |row|
|
123
121
|
tot=@vector_total[i]
|
124
122
|
@totals[tot]||=0
|
125
123
|
@totals[tot]+=1
|
126
|
-
@ds.
|
124
|
+
@ds.vectors.each do |f|
|
127
125
|
item=row[f].to_s
|
128
126
|
@counts[f][tot]||={}
|
129
127
|
@counts[f][tot][item]||=0
|
@@ -6,12 +6,12 @@ module Statsample
|
|
6
6
|
# several ratings) on a target and another measurement obtained on that target"
|
7
7
|
# == Usage
|
8
8
|
# require 'statsample'
|
9
|
-
# size=1000
|
10
|
-
# a = size.times.map {rand(10)}
|
9
|
+
# size = 1000
|
10
|
+
# a = Daru::Vector.new(size.times.map {rand(10)})
|
11
11
|
# b = a.recode{|i|i+rand(4)-2}
|
12
|
-
# c
|
12
|
+
# c = a.recode{|i|i+rand(4)-2}
|
13
13
|
# d = a.recode{|i|i+rand(4)-2}
|
14
|
-
# ds={
|
14
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
15
15
|
# # Use :type attribute to set type to summarize
|
16
16
|
# icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
|
17
17
|
# puts icc.summary
|
@@ -96,10 +96,11 @@ module Statsample
|
|
96
96
|
attr_accessor :alpha
|
97
97
|
attr_accessor :name
|
98
98
|
def initialize(ds, opts=Hash.new)
|
99
|
+
ds.update
|
99
100
|
@ds=ds.dup_only_valid
|
100
|
-
@vectors=@ds.
|
101
|
-
@n=@ds.
|
102
|
-
@k=@ds.
|
101
|
+
@vectors=@ds.map { |e| e }
|
102
|
+
@n=@ds.nrows
|
103
|
+
@k=@ds.ncols
|
103
104
|
compute
|
104
105
|
@g_rho=0
|
105
106
|
@alpha=0.05
|
@@ -6,17 +6,17 @@ module Statsample
|
|
6
6
|
# PCA and Factor Analysis.
|
7
7
|
#
|
8
8
|
# == Usage
|
9
|
-
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30]
|
10
|
-
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50]
|
11
|
-
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40]
|
12
|
-
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30]
|
13
|
-
# ds={
|
9
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
10
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
11
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
12
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
13
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
14
14
|
# opts={:name=>"Scales", # Name of analysis
|
15
15
|
# :summary_correlation_matrix=>true, # Add correlation matrix
|
16
16
|
# :summary_pca } # Add PCA between scales
|
17
17
|
# msa=Statsample::Reliability::MultiScaleAnalysis.new(opts) do |m|
|
18
|
-
# m.scale :s1, ds.clone(
|
19
|
-
# m.scale :s2, ds.clone(
|
18
|
+
# m.scale :s1, ds.clone([:x1, :x2])
|
19
|
+
# m.scale :s2, ds.clone([:x3, :x4]), {:name=>"Scale 2"}
|
20
20
|
# end
|
21
21
|
# # Retrieve summary
|
22
22
|
# puts msa.summary
|
@@ -107,7 +107,7 @@ module Statsample
|
|
107
107
|
# Retrieves a Principal Component Analysis (Factor::PCA)
|
108
108
|
# using all scales, using <tt>opts</tt> a options.
|
109
109
|
def pca(opts=nil)
|
110
|
-
opts||=pca_options
|
110
|
+
opts ||= pca_options
|
111
111
|
Statsample::Factor::PCA.new(correlation_matrix, opts)
|
112
112
|
end
|
113
113
|
# Retrieve Velicer's MAP
|
@@ -123,14 +123,15 @@ module Statsample
|
|
123
123
|
Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
|
124
124
|
end
|
125
125
|
def dataset_from_scales
|
126
|
-
ds=
|
126
|
+
ds = Daru::DataFrame.new({}, order: @scales_keys.map(&:to_sym))
|
127
127
|
@scales.each_pair do |code,scale|
|
128
|
-
ds[code.
|
129
|
-
ds[code.to_s].name=scale.name
|
128
|
+
ds[code.to_sym] = scale.ds.vector_sum
|
130
129
|
end
|
131
|
-
|
130
|
+
|
131
|
+
ds.update
|
132
132
|
ds
|
133
133
|
end
|
134
|
+
|
134
135
|
def parallel_analysis(opts=nil)
|
135
136
|
opts||=parallel_analysis_options
|
136
137
|
Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
|
@@ -140,6 +141,7 @@ module Statsample
|
|
140
141
|
def correlation_matrix
|
141
142
|
Statsample::Bivariate.correlation_matrix(dataset_from_scales)
|
142
143
|
end
|
144
|
+
|
143
145
|
def report_building(b) # :nodoc:
|
144
146
|
b.section(:name=>name) do |s|
|
145
147
|
s.section(:name=>_("Reliability analysis of scales")) do |s2|
|
@@ -3,12 +3,12 @@ module Statsample
|
|
3
3
|
# Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
|
4
4
|
# Returns several statistics for complete scale and each item
|
5
5
|
# == Usage
|
6
|
-
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30]
|
7
|
-
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50]
|
8
|
-
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40]
|
9
|
-
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30]
|
10
|
-
# ds={
|
11
|
-
# ia=Statsample::Reliability::ScaleAnalysis.new(ds)
|
6
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
7
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
8
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
9
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
11
|
+
# ia = Statsample::Reliability::ScaleAnalysis.new(ds)
|
12
12
|
# puts ia.summary
|
13
13
|
class ScaleAnalysis
|
14
14
|
include Summarizable
|
@@ -16,40 +16,40 @@ module Statsample
|
|
16
16
|
attr_accessor :name
|
17
17
|
attr_accessor :summary_histogram
|
18
18
|
def initialize(ds, opts=Hash.new)
|
19
|
-
@dumped=ds.
|
20
|
-
ds[f].variance==0
|
19
|
+
@dumped=ds.vectors.to_a.find_all {|f|
|
20
|
+
ds[f].variance == 0
|
21
21
|
}
|
22
22
|
|
23
|
-
@ods=ds
|
24
|
-
@ds=ds.dup_only_valid(ds.
|
25
|
-
@ds.
|
23
|
+
@ods = ds
|
24
|
+
@ds = ds.dup_only_valid(ds.vectors.to_a - @dumped)
|
25
|
+
@ds.rename ds.name
|
26
26
|
|
27
|
-
@k
|
28
|
-
@total
|
27
|
+
@k = @ds.ncols
|
28
|
+
@total = @ds.vector_sum
|
29
29
|
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
30
30
|
|
31
|
-
@vector_mean
|
32
|
-
@item_mean
|
33
|
-
@item_sd
|
31
|
+
@vector_mean = @ds.vector_mean
|
32
|
+
@item_mean = @vector_mean.mean
|
33
|
+
@item_sd = @vector_mean.sd
|
34
34
|
|
35
|
-
@mean
|
36
|
-
@median
|
37
|
-
|
38
|
-
@
|
39
|
-
@
|
40
|
-
@
|
41
|
-
@
|
42
|
-
|
43
|
-
opts_default={
|
44
|
-
:name=>_("Reliability Analysis"),
|
45
|
-
:summary_histogram=>true
|
35
|
+
@mean = @total.mean
|
36
|
+
@median = @total.median
|
37
|
+
@skew = @total.skew
|
38
|
+
@kurtosis = @total.kurtosis
|
39
|
+
@sd = @total.sd
|
40
|
+
@variance = @total.variance
|
41
|
+
@valid_n = @total.size
|
42
|
+
|
43
|
+
opts_default = {
|
44
|
+
:name => _("Reliability Analysis"),
|
45
|
+
:summary_histogram => true
|
46
46
|
}
|
47
|
-
@opts=opts_default.merge(opts)
|
48
|
-
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
47
|
+
@opts = opts_default.merge(opts)
|
48
|
+
@opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
|
49
49
|
|
50
50
|
@cov_m=Statsample::Bivariate.covariance_matrix(@ds)
|
51
51
|
# Mean for covariances and variances
|
52
|
-
@variances
|
52
|
+
@variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
|
53
53
|
@variances_mean=@variances.mean
|
54
54
|
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
55
55
|
#begin
|
@@ -66,7 +66,7 @@ module Statsample
|
|
66
66
|
total={}
|
67
67
|
@ds.each do |row|
|
68
68
|
tot=@total[i]
|
69
|
-
@ds.
|
69
|
+
@ds.vectors.each do |f|
|
70
70
|
out[f]||= {}
|
71
71
|
total[f]||={}
|
72
72
|
out[f][tot]||= 0
|
@@ -87,43 +87,41 @@ module Statsample
|
|
87
87
|
# Adjusted RPB(Point biserial-correlation) for each item
|
88
88
|
#
|
89
89
|
def item_total_correlation
|
90
|
-
@
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
total=ds2.vector_sum
|
95
|
-
a[v]=Statsample::Bivariate.pearson(vector,total)
|
90
|
+
vecs = @ds.vectors.to_a
|
91
|
+
@itc ||= vecs.inject({}) do |a,v|
|
92
|
+
total=@ds.vector_sum(vecs - [v])
|
93
|
+
a[v]=Statsample::Bivariate.pearson(@ds[v],total)
|
96
94
|
a
|
97
95
|
end
|
98
96
|
end
|
99
97
|
def mean_rpb
|
100
|
-
item_total_correlation.values.
|
98
|
+
Daru::Vector.new(item_total_correlation.values).mean
|
101
99
|
end
|
102
100
|
def item_statistics
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
101
|
+
@is||=@ds.vectors.to_a.inject({}) do |a,v|
|
102
|
+
a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
|
103
|
+
a
|
104
|
+
end
|
107
105
|
end
|
108
106
|
# Returns a dataset with cases ordered by score
|
109
107
|
# and variables ordered by difficulty
|
110
108
|
|
111
109
|
def item_difficulty_analysis
|
112
110
|
dif={}
|
113
|
-
@ds.
|
114
|
-
dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
|
111
|
+
@ds.vectors.each{|f| dif[f]=@ds[f].mean }
|
112
|
+
dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
|
115
113
|
scores_sort={}
|
116
114
|
scores=@ds.vector_mean
|
117
|
-
scores.each_index{|i| scores_sort[i]=scores[i] }
|
115
|
+
scores.each_index{ |i| scores_sort[i]=scores[i] }
|
118
116
|
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
119
|
-
ds_new=
|
117
|
+
ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
|
120
118
|
scores_sort.each do |i,score|
|
121
|
-
row=[i, score]
|
122
|
-
case_row
|
123
|
-
dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
|
124
|
-
ds_new.
|
119
|
+
row = [i, score]
|
120
|
+
case_row = @ds.row[i].to_hash
|
121
|
+
dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
|
122
|
+
ds_new.add_row(row)
|
125
123
|
end
|
126
|
-
ds_new.
|
124
|
+
ds_new.update
|
127
125
|
ds_new
|
128
126
|
end
|
129
127
|
|
@@ -132,9 +130,10 @@ module Statsample
|
|
132
130
|
end
|
133
131
|
|
134
132
|
def stats_if_deleted_intern # :nodoc:
|
135
|
-
return Hash.new if @ds.
|
136
|
-
@ds.
|
137
|
-
|
133
|
+
return Hash.new if @ds.ncols == 1
|
134
|
+
vecs = @ds.vectors.to_a
|
135
|
+
vecs.inject({}) do |a,v|
|
136
|
+
cov_2=@cov_m.submatrix(vecs - [v])
|
138
137
|
#ds2=@ds.clone
|
139
138
|
#ds2.delete_vector(v)
|
140
139
|
#total=ds2.vector_sum
|
@@ -151,11 +150,10 @@ module Statsample
|
|
151
150
|
def report_building(builder) #:nodoc:
|
152
151
|
builder.section(:name=>@name) do |s|
|
153
152
|
|
154
|
-
|
155
153
|
if @dumped.size>0
|
156
154
|
s.section(:name=>"Items with variance=0") do |s1|
|
157
155
|
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
158
|
-
t.row [_("Items"), @ods.
|
156
|
+
t.row [_("Items"), @ods.ncols]
|
159
157
|
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
160
158
|
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
161
159
|
end
|
@@ -170,7 +168,7 @@ module Statsample
|
|
170
168
|
|
171
169
|
|
172
170
|
s.table(:name=>_("Summary for %s") % @name) do |t|
|
173
|
-
t.row [_("Valid Items"), @ds.
|
171
|
+
t.row [_("Valid Items"), @ds.ncols]
|
174
172
|
|
175
173
|
t.row [_("Valid cases"), @valid_n]
|
176
174
|
t.row [_("Sum mean"), "%0.4f" % @mean]
|
@@ -193,8 +191,8 @@ module Statsample
|
|
193
191
|
end
|
194
192
|
|
195
193
|
if (@alpha)
|
196
|
-
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.
|
197
|
-
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.
|
194
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
|
195
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
|
198
196
|
end
|
199
197
|
|
200
198
|
|
@@ -203,7 +201,7 @@ module Statsample
|
|
203
201
|
itc=item_total_correlation
|
204
202
|
|
205
203
|
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
206
|
-
@ds.
|
204
|
+
@ds.vectors.each do |f|
|
207
205
|
row=["#{@ds[f].name}(#{f})"]
|
208
206
|
if is[f]
|
209
207
|
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|