statsample 1.5.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -59,8 +59,6 @@ class MatrixEngine < BaseEngine
|
|
59
59
|
@matrix_y = @matrix_cor.submatrix(@fields, [y_var])
|
60
60
|
@matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
|
61
61
|
|
62
|
-
|
63
|
-
|
64
62
|
@y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
|
65
63
|
|
66
64
|
@x_sd=@n_predictors.times.inject({}) {|ac,i|
|
@@ -77,14 +75,14 @@ class MatrixEngine < BaseEngine
|
|
77
75
|
@y_mean=0.0
|
78
76
|
@name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
|
79
77
|
|
80
|
-
opts_default={:digits=>3}
|
81
|
-
opts=opts_default.merge opts
|
78
|
+
opts_default = {:digits=>3}
|
79
|
+
opts = opts_default.merge opts
|
82
80
|
opts.each{|k,v|
|
83
81
|
self.send("#{k}=",v) if self.respond_to? k
|
84
82
|
}
|
85
83
|
result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
|
86
84
|
|
87
|
-
if matrix._type
|
85
|
+
if matrix._type == :covariance
|
88
86
|
@coeffs=result_matrix.column(0).to_a
|
89
87
|
@coeffs_stan=coeffs.collect {|k,v|
|
90
88
|
coeffs[k]*@x_sd[k].quo(@y_sd)
|
@@ -116,12 +114,12 @@ class MatrixEngine < BaseEngine
|
|
116
114
|
end
|
117
115
|
# Value of constant
|
118
116
|
def constant
|
119
|
-
c=coeffs
|
120
|
-
@y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
|
117
|
+
c = coeffs
|
118
|
+
@y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
|
121
119
|
end
|
122
120
|
# Hash of b or raw coefficients
|
123
121
|
def coeffs
|
124
|
-
assign_names(@coeffs)
|
122
|
+
assign_names(@coeffs)
|
125
123
|
end
|
126
124
|
# Hash of beta or standarized coefficients
|
127
125
|
|
@@ -185,7 +183,7 @@ class MatrixEngine < BaseEngine
|
|
185
183
|
sd[:constant]=0
|
186
184
|
fields=[:constant]+@matrix_cov.fields-[@y_var]
|
187
185
|
# Recreate X'X using the variance-covariance matrix
|
188
|
-
xt_x
|
186
|
+
xt_x=::Matrix.rows(fields.collect {|i|
|
189
187
|
fields.collect {|j|
|
190
188
|
if i==:constant or j==:constant
|
191
189
|
cov=0
|
@@ -8,76 +8,74 @@ module Multiple
|
|
8
8
|
#
|
9
9
|
# Example:
|
10
10
|
#
|
11
|
-
# @a=[1,3,2,4,3,5,4,6,5,7]
|
12
|
-
# @b=[3,3,4,4,5,5,6,6,4,4]
|
13
|
-
# @c=[11,22,30,40,50,65,78,79,99,100]
|
14
|
-
# @y=[3,4,5,6,7,8,9,10,20,30]
|
15
|
-
# ds={
|
16
|
-
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds
|
11
|
+
# @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
|
12
|
+
# @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
|
13
|
+
# @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
|
14
|
+
# @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
|
15
|
+
# ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
|
16
|
+
# lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
|
17
17
|
|
18
18
|
class RubyEngine < MatrixEngine
|
19
19
|
def initialize(ds,y_var, opts=Hash.new)
|
20
|
-
matrix=
|
21
|
-
fields_indep=ds.
|
22
|
-
default={
|
23
|
-
:y_mean=>ds[y_var].mean,
|
24
|
-
:x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
-
:y_sd=>ds[y_var].sd,
|
26
|
-
:x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
-
:cases=>Statsample::Bivariate.min_n_valid(ds)
|
20
|
+
matrix = Statsample::Bivariate.correlation_matrix ds
|
21
|
+
fields_indep=ds.vectors.to_a - [y_var]
|
22
|
+
default= {
|
23
|
+
:y_mean => ds[y_var].mean,
|
24
|
+
:x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
|
25
|
+
:y_sd => ds[y_var].sd,
|
26
|
+
:x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
|
27
|
+
:cases => Statsample::Bivariate.min_n_valid(ds)
|
28
28
|
}
|
29
|
-
opts=opts.merge(default)
|
29
|
+
opts = opts.merge(default)
|
30
30
|
super(matrix, y_var, opts)
|
31
|
-
@ds=ds
|
32
|
-
@dy=ds[@y_var]
|
33
|
-
@ds_valid=ds.dup_only_valid
|
34
|
-
@total_cases
|
35
|
-
@valid_cases
|
36
|
-
@ds_indep
|
31
|
+
@ds = ds
|
32
|
+
@dy = ds[@y_var]
|
33
|
+
@ds_valid = ds.dup_only_valid
|
34
|
+
@total_cases = @ds.nrows
|
35
|
+
@valid_cases = @ds_valid.nrows
|
36
|
+
@ds_indep = ds.dup(ds.vectors.to_a - [y_var])
|
37
37
|
set_dep_columns
|
38
38
|
end
|
39
39
|
|
40
40
|
def set_dep_columns
|
41
|
-
@dep_columns=[]
|
42
|
-
@ds_indep.each_vector{|
|
43
|
-
@dep_columns.push(v.data_with_nils)
|
44
|
-
}
|
41
|
+
@dep_columns = []
|
42
|
+
@ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
|
45
43
|
end
|
46
44
|
|
47
45
|
def fix_with_mean
|
48
46
|
i=0
|
49
|
-
@ds_indep.each do |row|
|
47
|
+
@ds_indep.each(:row) do |row|
|
50
48
|
empty=[]
|
51
49
|
row.each do |k,v|
|
52
50
|
empty.push(k) if v.nil?
|
53
51
|
end
|
52
|
+
|
54
53
|
if empty.size==1
|
55
54
|
@ds_indep[empty[0]][i]=@ds[empty[0]].mean
|
56
55
|
end
|
57
|
-
i+=1
|
56
|
+
i += 1
|
58
57
|
end
|
59
|
-
@ds_indep.
|
58
|
+
@ds_indep.update
|
60
59
|
set_dep_columns
|
61
60
|
end
|
62
61
|
def fix_with_regression
|
63
|
-
i=0
|
64
|
-
@ds_indep.each
|
65
|
-
empty=[]
|
66
|
-
row.each{|k,v|
|
67
|
-
empty.push(k) if v.nil?
|
68
|
-
}
|
62
|
+
i = 0
|
63
|
+
@ds_indep.each(:row) do |row|
|
64
|
+
empty = []
|
65
|
+
row.each { |k,v| empty.push(k) if v.nil? }
|
69
66
|
if empty.size==1
|
70
|
-
field=empty[0]
|
71
|
-
lr=MultipleRegression.new(@ds_indep,field)
|
72
|
-
fields=[]
|
73
|
-
@ds_indep.
|
74
|
-
|
67
|
+
field = empty[0]
|
68
|
+
lr = MultipleRegression.new(@ds_indep,field)
|
69
|
+
fields = []
|
70
|
+
@ds_indep.vectors.each { |f|
|
71
|
+
fields.push(row[f]) unless f == field
|
75
72
|
}
|
73
|
+
|
76
74
|
@ds_indep[field][i]=lr.process(fields)
|
77
75
|
end
|
78
76
|
i+=1
|
79
|
-
|
80
|
-
@ds_indep.
|
77
|
+
end
|
78
|
+
@ds_indep.update
|
81
79
|
set_dep_columns
|
82
80
|
end
|
83
81
|
# Standard error for constant
|
@@ -4,30 +4,30 @@ module Statsample
|
|
4
4
|
# Calculate Chonbach's alpha for a given dataset.
|
5
5
|
# only uses tuples without missing data
|
6
6
|
def cronbach_alpha(ods)
|
7
|
-
ds=ods.dup_only_valid
|
8
|
-
n_items=ds.
|
9
|
-
return nil if n_items<=1
|
10
|
-
s2_items=ds.
|
11
|
-
|
12
|
-
total=ds.vector_sum
|
7
|
+
ds = ods.dup_only_valid
|
8
|
+
n_items = ds.ncols
|
9
|
+
return nil if n_items <= 1
|
10
|
+
s2_items = ds.to_hash.values.inject(0) { |ac,v|
|
11
|
+
ac + v.variance }
|
12
|
+
total = ds.vector_sum
|
13
13
|
|
14
|
-
(n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
|
14
|
+
(n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
|
15
15
|
end
|
16
16
|
# Calculate Chonbach's alpha for a given dataset
|
17
17
|
# using standarized values for every vector.
|
18
18
|
# Only uses tuples without missing data
|
19
19
|
# Return nil if one or more vectors has 0 variance
|
20
20
|
def cronbach_alpha_standarized(ods)
|
21
|
+
ds = ods.dup_only_valid
|
22
|
+
return nil if ds.any? { |v| v.variance==0}
|
21
23
|
|
22
|
-
ds=
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
}.to_dataset
|
30
|
-
|
24
|
+
ds = Daru::DataFrame.new(
|
25
|
+
ds.vectors.to_a.inject({}) { |a,i|
|
26
|
+
a[i] = ods[i].standardize
|
27
|
+
a
|
28
|
+
}
|
29
|
+
)
|
30
|
+
|
31
31
|
cronbach_alpha(ds)
|
32
32
|
end
|
33
33
|
# Predicted reliability of a test by replicating
|
@@ -54,10 +54,10 @@ module Statsample
|
|
54
54
|
end
|
55
55
|
# Get Cronbach's alpha from a covariance matrix
|
56
56
|
def cronbach_alpha_from_covariance_matrix(cov)
|
57
|
-
n=cov.row_size
|
57
|
+
n = cov.row_size
|
58
58
|
raise "covariance matrix should have at least 2 variables" if n < 2
|
59
|
-
s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
|
60
|
-
(n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
|
59
|
+
s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
|
60
|
+
(n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
|
61
61
|
end
|
62
62
|
# Returns n necessary to obtain specific alpha
|
63
63
|
# given variance and covariance mean of items
|
@@ -82,8 +82,6 @@ module Statsample
|
|
82
82
|
end
|
83
83
|
c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
|
84
84
|
dif=c_a - alpha
|
85
|
-
#puts "#{n} , #{c_a}"
|
86
|
-
|
87
85
|
end
|
88
86
|
n
|
89
87
|
end
|
@@ -110,20 +108,20 @@ module Statsample
|
|
110
108
|
attr_reader :totals, :counts, :vector_total
|
111
109
|
def initialize (ds, vector_total=nil)
|
112
110
|
vector_total||=ds.vector_sum
|
113
|
-
raise ArgumentError, "Total size != Dataset size" if vector_total.size!=ds.
|
111
|
+
raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
|
114
112
|
@vector_total=vector_total
|
115
113
|
@ds=ds
|
116
114
|
@totals={}
|
117
|
-
@counts=@ds.
|
115
|
+
@counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
|
118
116
|
process
|
119
117
|
end
|
120
118
|
def process
|
121
119
|
i=0
|
122
|
-
@ds.
|
120
|
+
@ds.each_row do |row|
|
123
121
|
tot=@vector_total[i]
|
124
122
|
@totals[tot]||=0
|
125
123
|
@totals[tot]+=1
|
126
|
-
@ds.
|
124
|
+
@ds.vectors.each do |f|
|
127
125
|
item=row[f].to_s
|
128
126
|
@counts[f][tot]||={}
|
129
127
|
@counts[f][tot][item]||=0
|
@@ -6,12 +6,12 @@ module Statsample
|
|
6
6
|
# several ratings) on a target and another measurement obtained on that target"
|
7
7
|
# == Usage
|
8
8
|
# require 'statsample'
|
9
|
-
# size=1000
|
10
|
-
# a = size.times.map {rand(10)}
|
9
|
+
# size = 1000
|
10
|
+
# a = Daru::Vector.new(size.times.map {rand(10)})
|
11
11
|
# b = a.recode{|i|i+rand(4)-2}
|
12
|
-
# c
|
12
|
+
# c = a.recode{|i|i+rand(4)-2}
|
13
13
|
# d = a.recode{|i|i+rand(4)-2}
|
14
|
-
# ds={
|
14
|
+
# ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
15
15
|
# # Use :type attribute to set type to summarize
|
16
16
|
# icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
|
17
17
|
# puts icc.summary
|
@@ -96,10 +96,11 @@ module Statsample
|
|
96
96
|
attr_accessor :alpha
|
97
97
|
attr_accessor :name
|
98
98
|
def initialize(ds, opts=Hash.new)
|
99
|
+
ds.update
|
99
100
|
@ds=ds.dup_only_valid
|
100
|
-
@vectors=@ds.
|
101
|
-
@n=@ds.
|
102
|
-
@k=@ds.
|
101
|
+
@vectors=@ds.map { |e| e }
|
102
|
+
@n=@ds.nrows
|
103
|
+
@k=@ds.ncols
|
103
104
|
compute
|
104
105
|
@g_rho=0
|
105
106
|
@alpha=0.05
|
@@ -6,17 +6,17 @@ module Statsample
|
|
6
6
|
# PCA and Factor Analysis.
|
7
7
|
#
|
8
8
|
# == Usage
|
9
|
-
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30]
|
10
|
-
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50]
|
11
|
-
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40]
|
12
|
-
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30]
|
13
|
-
# ds={
|
9
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
10
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
11
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
12
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
13
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
14
14
|
# opts={:name=>"Scales", # Name of analysis
|
15
15
|
# :summary_correlation_matrix=>true, # Add correlation matrix
|
16
16
|
# :summary_pca } # Add PCA between scales
|
17
17
|
# msa=Statsample::Reliability::MultiScaleAnalysis.new(opts) do |m|
|
18
|
-
# m.scale :s1, ds.clone(
|
19
|
-
# m.scale :s2, ds.clone(
|
18
|
+
# m.scale :s1, ds.clone([:x1, :x2])
|
19
|
+
# m.scale :s2, ds.clone([:x3, :x4]), {:name=>"Scale 2"}
|
20
20
|
# end
|
21
21
|
# # Retrieve summary
|
22
22
|
# puts msa.summary
|
@@ -107,7 +107,7 @@ module Statsample
|
|
107
107
|
# Retrieves a Principal Component Analysis (Factor::PCA)
|
108
108
|
# using all scales, using <tt>opts</tt> a options.
|
109
109
|
def pca(opts=nil)
|
110
|
-
opts||=pca_options
|
110
|
+
opts ||= pca_options
|
111
111
|
Statsample::Factor::PCA.new(correlation_matrix, opts)
|
112
112
|
end
|
113
113
|
# Retrieve Velicer's MAP
|
@@ -123,14 +123,15 @@ module Statsample
|
|
123
123
|
Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
|
124
124
|
end
|
125
125
|
def dataset_from_scales
|
126
|
-
ds=
|
126
|
+
ds = Daru::DataFrame.new({}, order: @scales_keys.map(&:to_sym))
|
127
127
|
@scales.each_pair do |code,scale|
|
128
|
-
ds[code.
|
129
|
-
ds[code.to_s].name=scale.name
|
128
|
+
ds[code.to_sym] = scale.ds.vector_sum
|
130
129
|
end
|
131
|
-
|
130
|
+
|
131
|
+
ds.update
|
132
132
|
ds
|
133
133
|
end
|
134
|
+
|
134
135
|
def parallel_analysis(opts=nil)
|
135
136
|
opts||=parallel_analysis_options
|
136
137
|
Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
|
@@ -140,6 +141,7 @@ module Statsample
|
|
140
141
|
def correlation_matrix
|
141
142
|
Statsample::Bivariate.correlation_matrix(dataset_from_scales)
|
142
143
|
end
|
144
|
+
|
143
145
|
def report_building(b) # :nodoc:
|
144
146
|
b.section(:name=>name) do |s|
|
145
147
|
s.section(:name=>_("Reliability analysis of scales")) do |s2|
|
@@ -3,12 +3,12 @@ module Statsample
|
|
3
3
|
# Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
|
4
4
|
# Returns several statistics for complete scale and each item
|
5
5
|
# == Usage
|
6
|
-
# @x1=[1,1,1,1,2,2,2,2,3,3,3,30]
|
7
|
-
# @x2=[1,1,1,2,2,3,3,3,3,4,4,50]
|
8
|
-
# @x3=[2,2,1,1,1,2,2,2,3,4,5,40]
|
9
|
-
# @x4=[1,2,3,4,4,4,4,3,4,4,5,30]
|
10
|
-
# ds={
|
11
|
-
# ia=Statsample::Reliability::ScaleAnalysis.new(ds)
|
6
|
+
# @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
|
7
|
+
# @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
|
8
|
+
# @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
|
9
|
+
# @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
|
10
|
+
# ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
|
11
|
+
# ia = Statsample::Reliability::ScaleAnalysis.new(ds)
|
12
12
|
# puts ia.summary
|
13
13
|
class ScaleAnalysis
|
14
14
|
include Summarizable
|
@@ -16,40 +16,40 @@ module Statsample
|
|
16
16
|
attr_accessor :name
|
17
17
|
attr_accessor :summary_histogram
|
18
18
|
def initialize(ds, opts=Hash.new)
|
19
|
-
@dumped=ds.
|
20
|
-
ds[f].variance==0
|
19
|
+
@dumped=ds.vectors.to_a.find_all {|f|
|
20
|
+
ds[f].variance == 0
|
21
21
|
}
|
22
22
|
|
23
|
-
@ods=ds
|
24
|
-
@ds=ds.dup_only_valid(ds.
|
25
|
-
@ds.
|
23
|
+
@ods = ds
|
24
|
+
@ds = ds.dup_only_valid(ds.vectors.to_a - @dumped)
|
25
|
+
@ds.rename ds.name
|
26
26
|
|
27
|
-
@k
|
28
|
-
@total
|
27
|
+
@k = @ds.ncols
|
28
|
+
@total = @ds.vector_sum
|
29
29
|
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
30
30
|
|
31
|
-
@vector_mean
|
32
|
-
@item_mean
|
33
|
-
@item_sd
|
31
|
+
@vector_mean = @ds.vector_mean
|
32
|
+
@item_mean = @vector_mean.mean
|
33
|
+
@item_sd = @vector_mean.sd
|
34
34
|
|
35
|
-
@mean
|
36
|
-
@median
|
37
|
-
|
38
|
-
@
|
39
|
-
@
|
40
|
-
@
|
41
|
-
@
|
42
|
-
|
43
|
-
opts_default={
|
44
|
-
:name=>_("Reliability Analysis"),
|
45
|
-
:summary_histogram=>true
|
35
|
+
@mean = @total.mean
|
36
|
+
@median = @total.median
|
37
|
+
@skew = @total.skew
|
38
|
+
@kurtosis = @total.kurtosis
|
39
|
+
@sd = @total.sd
|
40
|
+
@variance = @total.variance
|
41
|
+
@valid_n = @total.size
|
42
|
+
|
43
|
+
opts_default = {
|
44
|
+
:name => _("Reliability Analysis"),
|
45
|
+
:summary_histogram => true
|
46
46
|
}
|
47
|
-
@opts=opts_default.merge(opts)
|
48
|
-
@opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
|
47
|
+
@opts = opts_default.merge(opts)
|
48
|
+
@opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
|
49
49
|
|
50
50
|
@cov_m=Statsample::Bivariate.covariance_matrix(@ds)
|
51
51
|
# Mean for covariances and variances
|
52
|
-
@variances
|
52
|
+
@variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
|
53
53
|
@variances_mean=@variances.mean
|
54
54
|
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
55
55
|
#begin
|
@@ -66,7 +66,7 @@ module Statsample
|
|
66
66
|
total={}
|
67
67
|
@ds.each do |row|
|
68
68
|
tot=@total[i]
|
69
|
-
@ds.
|
69
|
+
@ds.vectors.each do |f|
|
70
70
|
out[f]||= {}
|
71
71
|
total[f]||={}
|
72
72
|
out[f][tot]||= 0
|
@@ -87,43 +87,41 @@ module Statsample
|
|
87
87
|
# Adjusted RPB(Point biserial-correlation) for each item
|
88
88
|
#
|
89
89
|
def item_total_correlation
|
90
|
-
@
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
total=ds2.vector_sum
|
95
|
-
a[v]=Statsample::Bivariate.pearson(vector,total)
|
90
|
+
vecs = @ds.vectors.to_a
|
91
|
+
@itc ||= vecs.inject({}) do |a,v|
|
92
|
+
total=@ds.vector_sum(vecs - [v])
|
93
|
+
a[v]=Statsample::Bivariate.pearson(@ds[v],total)
|
96
94
|
a
|
97
95
|
end
|
98
96
|
end
|
99
97
|
def mean_rpb
|
100
|
-
item_total_correlation.values.
|
98
|
+
Daru::Vector.new(item_total_correlation.values).mean
|
101
99
|
end
|
102
100
|
def item_statistics
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
101
|
+
@is||=@ds.vectors.to_a.inject({}) do |a,v|
|
102
|
+
a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
|
103
|
+
a
|
104
|
+
end
|
107
105
|
end
|
108
106
|
# Returns a dataset with cases ordered by score
|
109
107
|
# and variables ordered by difficulty
|
110
108
|
|
111
109
|
def item_difficulty_analysis
|
112
110
|
dif={}
|
113
|
-
@ds.
|
114
|
-
dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
|
111
|
+
@ds.vectors.each{|f| dif[f]=@ds[f].mean }
|
112
|
+
dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
|
115
113
|
scores_sort={}
|
116
114
|
scores=@ds.vector_mean
|
117
|
-
scores.each_index{|i| scores_sort[i]=scores[i] }
|
115
|
+
scores.each_index{ |i| scores_sort[i]=scores[i] }
|
118
116
|
scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
|
119
|
-
ds_new=
|
117
|
+
ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
|
120
118
|
scores_sort.each do |i,score|
|
121
|
-
row=[i, score]
|
122
|
-
case_row
|
123
|
-
dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
|
124
|
-
ds_new.
|
119
|
+
row = [i, score]
|
120
|
+
case_row = @ds.row[i].to_hash
|
121
|
+
dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
|
122
|
+
ds_new.add_row(row)
|
125
123
|
end
|
126
|
-
ds_new.
|
124
|
+
ds_new.update
|
127
125
|
ds_new
|
128
126
|
end
|
129
127
|
|
@@ -132,9 +130,10 @@ module Statsample
|
|
132
130
|
end
|
133
131
|
|
134
132
|
def stats_if_deleted_intern # :nodoc:
|
135
|
-
return Hash.new if @ds.
|
136
|
-
@ds.
|
137
|
-
|
133
|
+
return Hash.new if @ds.ncols == 1
|
134
|
+
vecs = @ds.vectors.to_a
|
135
|
+
vecs.inject({}) do |a,v|
|
136
|
+
cov_2=@cov_m.submatrix(vecs - [v])
|
138
137
|
#ds2=@ds.clone
|
139
138
|
#ds2.delete_vector(v)
|
140
139
|
#total=ds2.vector_sum
|
@@ -151,11 +150,10 @@ module Statsample
|
|
151
150
|
def report_building(builder) #:nodoc:
|
152
151
|
builder.section(:name=>@name) do |s|
|
153
152
|
|
154
|
-
|
155
153
|
if @dumped.size>0
|
156
154
|
s.section(:name=>"Items with variance=0") do |s1|
|
157
155
|
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
158
|
-
t.row [_("Items"), @ods.
|
156
|
+
t.row [_("Items"), @ods.ncols]
|
159
157
|
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
160
158
|
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
161
159
|
end
|
@@ -170,7 +168,7 @@ module Statsample
|
|
170
168
|
|
171
169
|
|
172
170
|
s.table(:name=>_("Summary for %s") % @name) do |t|
|
173
|
-
t.row [_("Valid Items"), @ds.
|
171
|
+
t.row [_("Valid Items"), @ds.ncols]
|
174
172
|
|
175
173
|
t.row [_("Valid cases"), @valid_n]
|
176
174
|
t.row [_("Sum mean"), "%0.4f" % @mean]
|
@@ -193,8 +191,8 @@ module Statsample
|
|
193
191
|
end
|
194
192
|
|
195
193
|
if (@alpha)
|
196
|
-
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.
|
197
|
-
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.
|
194
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
|
195
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
|
198
196
|
end
|
199
197
|
|
200
198
|
|
@@ -203,7 +201,7 @@ module Statsample
|
|
203
201
|
itc=item_total_correlation
|
204
202
|
|
205
203
|
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
206
|
-
@ds.
|
204
|
+
@ds.vectors.each do |f|
|
207
205
|
row=["#{@ds[f].name}(#{f})"]
|
208
206
|
if is[f]
|
209
207
|
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|