statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,205 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
5
+ #
6
+ # Use Statsample::Regression::Multiple::RubyEngine if you have a
7
+ # Dataset, to avoid setting all details.
8
+ #
9
+ # <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
10
+ #
11
+ #
12
+ # Example:
13
+ #
14
+ # matrix=[[1.0, 0.5, 0.2], [0.5, 1.0, 0.7], [0.2, 0.7, 1.0]]
15
+ #
16
+ # lr=Statsample::Regression::Multiple::MatrixEngine.new(matrix,2)
17
+
18
+ class MatrixEngine < BaseEngine
19
+ # Hash of standard deviation of predictors.
20
+ # Only useful for Correlation Matrix, because by default is set to 1
21
+ attr_accessor :x_sd
22
+ # Standard deviation of criterion
23
+ # Only useful for Correlation Matrix, because by default is set to 1
24
+ attr_accessor :y_sd
25
+ # Hash of mean for predictors. By default, set to 0
26
+ attr_accessor :x_mean
27
+
28
+ # Mean for criteria. By default, set to 0
29
+ attr_accessor :y_mean
30
+
31
+ # Number of cases
32
+ attr_writer :cases
33
+ attr_writer :digits
34
+ # Create object
35
+ #
36
+ def initialize(matrix,y_var, opts=Hash.new)
37
+ matrix.extend Statsample::CovariateMatrix
38
+ raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
39
+ if matrix._type==:covariance
40
+ @matrix_cov=matrix
41
+ @matrix_cor=matrix.correlation
42
+ @no_covariance=false
43
+ else
44
+ @matrix_cor=matrix
45
+ @matrix_cov=matrix
46
+ @no_covariance=true
47
+ end
48
+
49
+ @y_var=y_var
50
+ @fields=matrix.fields-[y_var]
51
+
52
+ @n_predictors=@fields.size
53
+ @predictors_n=@n_predictors
54
+ @matrix_x= @matrix_cor.submatrix(@fields)
55
+ @matrix_x_cov= @matrix_cov.submatrix(@fields)
56
+ raise LinearDependency, "Regressors are linearly dependent" if @matrix_x.determinant<1e-15
57
+
58
+
59
+ @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
60
+ @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
61
+
62
+ @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
63
+
64
+ @x_sd=@n_predictors.times.inject({}) {|ac,i|
65
+ ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
66
+ ac;
67
+ }
68
+
69
+ @cases=nil
70
+ @x_mean=@fields.inject({}) {|ac,f|
71
+ ac[f]=0.0
72
+ ac;
73
+ }
74
+
75
+ @y_mean=0.0
76
+ @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
77
+
78
+ opts_default = {:digits=>3}
79
+ opts = opts_default.merge opts
80
+ opts.each{|k,v|
81
+ self.send("#{k}=",v) if self.respond_to? k
82
+ }
83
+ result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
84
+
85
+ if matrix._type == :covariance
86
+ @coeffs=result_matrix.column(0).to_a
87
+ @coeffs_stan=coeffs.collect {|k,v|
88
+ coeffs[k]*@x_sd[k].quo(@y_sd)
89
+ }
90
+ else
91
+ @coeffs_stan=result_matrix.column(0).to_a
92
+ @coeffs=standarized_coeffs.collect {|k,v|
93
+ standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
94
+ }
95
+ end
96
+ @total_cases=@valid_cases=@cases
97
+ end
98
+ def cases
99
+ raise "You should define the number of valid cases first" if @cases.nil?
100
+ @cases
101
+ end
102
+ # Get R^2 for the regression
103
+ # For fixed models is the coefficient of determination.
104
+ # On random models, is the 'squared-multiple correlation'
105
+ # Equal to
106
+ # * 1-(|R| / |R_x|) or
107
+ # * Sum(b_i*r_yi) <- used
108
+ def r2
109
+ @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
110
+ end
111
+ # Multiple correlation, on random models.
112
+ def r
113
+ Math::sqrt(r2)
114
+ end
115
+ # Value of constant
116
+ def constant
117
+ c = coeffs
118
+ @y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
119
+ end
120
+ # Hash of b or raw coefficients
121
+ def coeffs
122
+ assign_names(@coeffs)
123
+ end
124
+ # Hash of beta or standarized coefficients
125
+
126
+ def standarized_coeffs
127
+ assign_names(@coeffs_stan)
128
+ end
129
+ # Total sum of squares
130
+ def sst
131
+ @y_sd**2*(cases-1.0)
132
+ end
133
+
134
+ # Degrees of freedom for regression
135
+ def df_r
136
+ @n_predictors
137
+ end
138
+ # Degrees of freedom for error
139
+ def df_e
140
+ cases-@n_predictors-1
141
+ end
142
+ # Tolerance for a given variable
143
+ # defined as (1-R^2) of regression of other independent variables
144
+ # over the selected
145
+ # == Reference:
146
+ # * http://talkstats.com/showthread.php?t=5056
147
+ def tolerance(var)
148
+ return 1 if @matrix_x.column_size==1
149
+ lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
150
+ 1-lr.r2
151
+ end
152
+ # Standard Error for coefficients.
153
+ # Standard error of a coefficients depends on
154
+ # * Tolerance of the coeffients: Higher tolerances implies higher error
155
+ # * Higher r2 implies lower error
156
+ # == Reference:
157
+ # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
158
+ #
159
+ def coeffs_se
160
+ out={}
161
+ #mse=sse.quo(df_e)
162
+ coeffs.each {|k,v|
163
+ out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
164
+ }
165
+ out
166
+ end
167
+ # t value for constant
168
+ def constant_t
169
+ return nil if constant_se.nil?
170
+ constant.to_f / constant_se
171
+ end
172
+ # Standard error for constant.
173
+ # This method recreates the estimaded variance-covariance matrix
174
+ # using means, standard deviation and covariance matrix.
175
+ # So, needs the covariance matrix.
176
+ def constant_se
177
+ return nil if @no_covariance
178
+ means=@x_mean
179
+ #means[@y_var]=@y_mean
180
+ means[:constant]=1
181
+ sd=@x_sd
182
+ #sd[@y_var]=@y_sd
183
+ sd[:constant]=0
184
+ fields=[:constant]+@matrix_cov.fields-[@y_var]
185
+ # Recreate X'X using the variance-covariance matrix
186
+ xt_x=::Matrix.rows(fields.collect {|i|
187
+ fields.collect {|j|
188
+ if i==:constant or j==:constant
189
+ cov=0
190
+ elsif i==j
191
+ cov=sd[i]**2
192
+ else
193
+ cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
194
+ end
195
+ cov*(@cases-1)+@cases*means[i]*means[j]
196
+ }
197
+ })
198
+ matrix=xt_x.inverse * mse
199
+ matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
200
+ end
201
+
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,86 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Pure Ruby Class for Multiple Regression Analysis.
5
+ # Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values.
6
+ # Coeffient calculation uses correlation matrix between the vectors
7
+ # If you need listwise aproach for missing values, use AlglibEngine, because is faster.
8
+ #
9
+ # Example:
10
+ #
11
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
12
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
13
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
14
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
15
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
16
+ # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
17
+
18
+ class RubyEngine < MatrixEngine
19
+ def initialize(ds,y_var, opts=Hash.new)
20
+ matrix = Statsample::Bivariate.correlation_matrix ds
21
+ fields_indep=ds.vectors.to_a - [y_var]
22
+ default= {
23
+ :y_mean => ds[y_var].mean,
24
+ :x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
+ :y_sd => ds[y_var].sd,
26
+ :x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
+ :cases => Statsample::Bivariate.min_n_valid(ds)
28
+ }
29
+ opts = opts.merge(default)
30
+ super(matrix, y_var, opts)
31
+ @ds = ds
32
+ @dy = ds[@y_var]
33
+ @ds_valid = ds.reject_values(*Daru::MISSING_VALUES)
34
+ @total_cases = @ds.nrows
35
+ @valid_cases = @ds_valid.nrows
36
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
37
+ set_dep_columns
38
+ end
39
+
40
+ def set_dep_columns
41
+ @dep_columns = []
42
+ @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
43
+ end
44
+
45
+ def fix_with_mean
46
+ i=0
47
+ @ds_indep.each(:row) do |row|
48
+ empty=[]
49
+ row.each do |k,v|
50
+ empty.push(k) if v.nil?
51
+ end
52
+
53
+ if empty.size==1
54
+ @ds_indep[empty[0]][i]=@ds[empty[0]].mean
55
+ end
56
+ i += 1
57
+ end
58
+ set_dep_columns
59
+ end
60
+ def fix_with_regression
61
+ i = 0
62
+ @ds_indep.each(:row) do |row|
63
+ empty = []
64
+ row.each { |k,v| empty.push(k) if v.nil? }
65
+ if empty.size==1
66
+ field = empty[0]
67
+ lr = MultipleRegression.new(@ds_indep,field)
68
+ fields = []
69
+ @ds_indep.vectors.each { |f|
70
+ fields.push(row[f]) unless f == field
71
+ }
72
+
73
+ @ds_indep[field][i]=lr.process(fields)
74
+ end
75
+ i+=1
76
+ end
77
+ set_dep_columns
78
+ end
79
+ # Standard error for constant
80
+ def constant_se
81
+ estimated_variance_covariance_matrix[0,0]
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,121 @@
1
+ module Statsample
2
+ module Regression
3
+ # Class for calculation of linear regressions with form
4
+ # y = a+bx
5
+ # To create a Statsample::Regression::Simple object:
6
+ # * <tt> Statsample::Regression::Simple.new_from_dataset(ds,x,y)</tt>
7
+ # * <tt> Statsample::Regression::Simple.new_from_vectors(vx,vy)</tt>
8
+ # * <tt> Statsample::Regression::Simple.new_from_gsl(gsl) </tt>
9
+ #
10
+ class Simple
11
+ include Summarizable
12
+ attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
13
+ attr_accessor :name
14
+ attr_accessor :digits
15
+ def initialize(init_method, *argv)
16
+ self.send(init_method, *argv)
17
+ end
18
+ private_class_method :new
19
+ # Obtain y value given x value
20
+ # x=a+bx
21
+
22
+ def y(val_x)
23
+ @a+@b*val_x
24
+ end
25
+ # Obtain x value given y value
26
+ # x=(y-a)/b
27
+ def x(val_y)
28
+ (val_y-@a) / @b.to_f
29
+ end
30
+ # Sum of square error
31
+ def sse
32
+ (0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
33
+ }
34
+ end
35
+ def standard_error
36
+ Math::sqrt(sse / (@vx.size-2).to_f)
37
+ end
38
+ # Sum of square regression
39
+ def ssr
40
+ vy_mean=@vy.mean
41
+ (0...@vx.size).inject(0) {|a,i|
42
+ a+((y(@vx[i])-vy_mean)**2)
43
+ }
44
+
45
+ end
46
+ # Sum of square total
47
+ def sst
48
+ @vy.sum_of_squared_deviation
49
+ end
50
+ # Value of r
51
+ def r
52
+ @b * (@vx.sds / @vy.sds)
53
+ end
54
+ # Value of r^2
55
+ def r2
56
+ r**2
57
+ end
58
+ class << self
59
+ # Create a regression object giving an array with following parameters:
60
+ # <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
61
+ # Useful to obtain x and y values with a and b values.
62
+ def new_from_gsl(ar)
63
+ new(:init_gsl, *ar)
64
+ end
65
+ # Create a simple regression using two vectors
66
+ def new_from_vectors(vx,vy, opts=Hash.new)
67
+ new(:init_vectors,vx,vy, opts)
68
+ end
69
+ # Create a simple regression using a dataset and two vector names.
70
+ def new_from_dataset(ds,x,y, opts=Hash.new)
71
+ new(:init_vectors,ds[x],ds[y], opts)
72
+ end
73
+ end
74
+ def init_vectors(vx,vy, opts=Hash.new)
75
+ @vx,@vy=Statsample.only_valid_clone(vx,vy)
76
+ x_m=@vx.mean
77
+ y_m=@vy.mean
78
+ num=den=0
79
+ (0...@vx.size).each {|i|
80
+ num+=(@vx[i]-x_m)*(@vy[i]-y_m)
81
+ den+=(@vx[i]-x_m)**2
82
+ }
83
+ @b=num.to_f/den
84
+ @a=y_m - @b*x_m
85
+
86
+ opts_default={
87
+ :digits=>3,
88
+ :name=>_("Regression of %s over %s") % [@vx.name, @vy.name]
89
+ }
90
+ @opts=opts_default.merge opts
91
+
92
+ @opts.each{|k,v|
93
+ self.send("#{k}=",v) if self.respond_to? k
94
+ }
95
+
96
+ end
97
+ def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
98
+ @a=a
99
+ @b=b
100
+ @cov00=cov00
101
+ @cov01=cov01
102
+ @covx1=covx1
103
+ @chisq=chisq
104
+ @status=status
105
+ end
106
+ def report_building(gen)
107
+ f="%0.#{digits}f"
108
+ gen.section(:name=>name) do |s|
109
+ s.table(:header=>[_("Variable"), _("Value")]) do |t|
110
+ t.row [_("r"), f % r]
111
+ t.row [_("r^2"), f % r2]
112
+ t.row [_("a"), f % a]
113
+ t.row [_("b"), f % b]
114
+ t.row [_("s.e"), f % standard_error]
115
+ end
116
+ end
117
+ end
118
+ private :init_vectors, :init_gsl
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,150 @@
1
+ module Statsample
2
+ module Reliability
3
+ class << self
4
+ # Calculate Chonbach's alpha for a given dataset.
5
+ # only uses tuples without missing data
6
+ def cronbach_alpha(ods)
7
+ ds = ods.reject_values(*Daru::MISSING_VALUES)
8
+ n_items = ds.ncols
9
+ return nil if n_items <= 1
10
+ s2_items = ds.to_hash.values.inject(0) { |ac,v|
11
+ ac + v.variance }
12
+ total = ds.vector_sum
13
+
14
+ (n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
15
+ end
16
+ # Calculate Chonbach's alpha for a given dataset
17
+ # using standarized values for every vector.
18
+ # Only uses tuples without missing data
19
+ # Return nil if one or more vectors has 0 variance
20
+ def cronbach_alpha_standarized(ods)
21
+ ds = ods.reject_values(*Daru::MISSING_VALUES)
22
+ return nil if ds.any? { |v| v.variance==0}
23
+
24
+ ds = Daru::DataFrame.new(
25
+ ds.vectors.to_a.inject({}) { |a,i|
26
+ a[i] = ods[i].standardize
27
+ a
28
+ }
29
+ )
30
+
31
+ cronbach_alpha(ds)
32
+ end
33
+ # Predicted reliability of a test by replicating
34
+ # +n+ times the number of items
35
+ def spearman_brown_prophecy(r,n)
36
+ (n*r).quo(1+(n-1)*r)
37
+ end
38
+
39
+ alias :sbp :spearman_brown_prophecy
40
+ # Returns the number of items
41
+ # to obtain +r_d+ desired reliability
42
+ # from +r+ current reliability, achieved with
43
+ # +n+ items
44
+ def n_for_desired_reliability(r,r_d,n=1)
45
+ return nil if r.nil?
46
+ (r_d*(1-r)).quo(r*(1-r_d))*n
47
+ end
48
+
49
+ # Get Cronbach alpha from <tt>n</tt> cases,
50
+ # <tt>s2</tt> mean variance and <tt>cov</tt>
51
+ # mean covariance
52
+ def cronbach_alpha_from_n_s2_cov(n,s2,cov)
53
+ (n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
54
+ end
55
+ # Get Cronbach's alpha from a covariance matrix
56
+ def cronbach_alpha_from_covariance_matrix(cov)
57
+ n = cov.row_size
58
+ raise "covariance matrix should have at least 2 variables" if n < 2
59
+ s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
60
+ (n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
61
+ end
62
+ # Returns n necessary to obtain specific alpha
63
+ # given variance and covariance mean of items
64
+ def n_for_desired_alpha(alpha,s2,cov)
65
+ # Start with a regular test : 50 items
66
+ min=2
67
+ max=1000
68
+ n=50
69
+ prev_n=0
70
+ epsilon=0.0001
71
+ dif=1000
72
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
73
+ dif=c_a - alpha
74
+ while(dif.abs>epsilon and n!=prev_n)
75
+ prev_n=n
76
+ if dif<0
77
+ min=n
78
+ n=(n+(max-min).quo(2)).to_i
79
+ else
80
+ max=n
81
+ n=(n-(max-min).quo(2)).to_i
82
+ end
83
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
84
+ dif=c_a - alpha
85
+ end
86
+ n
87
+ end
88
+ # First derivative for alfa
89
+ # Parameters
90
+ # <tt>n</tt>: Number of items
91
+ # <tt>sx</tt>: mean of variances
92
+ # <tt>sxy</tt>: mean of covariances
93
+
94
+ def alpha_first_derivative(n,sx,sxy)
95
+ (sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
96
+ end
97
+ # Second derivative for alfa
98
+ # Parameters
99
+ # <tt>n</tt>: Number of items
100
+ # <tt>sx</tt>: mean of variances
101
+ # <tt>sxy</tt>: mean of covariances
102
+
103
+ def alfa_second_derivative(n,sx,sxy)
104
+ (2*(sxy**2)*(sxy-sx)).quo(((sxy*(n-1))+sx)**3)
105
+ end
106
+ end
107
+ class ItemCharacteristicCurve
108
+ attr_reader :totals, :counts, :vector_total
109
+ def initialize (ds, vector_total=nil)
110
+ vector_total||=ds.vector_sum
111
+ raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
112
+ @vector_total=vector_total
113
+ @ds=ds
114
+ @totals={}
115
+ @counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
116
+ process
117
+ end
118
+ def process
119
+ i=0
120
+ @ds.each_row do |row|
121
+ tot=@vector_total[i]
122
+ @totals[tot]||=0
123
+ @totals[tot]+=1
124
+ @ds.vectors.each do |f|
125
+ item=row[f].to_s
126
+ @counts[f][tot]||={}
127
+ @counts[f][tot][item]||=0
128
+ @counts[f][tot][item] += 1
129
+ end
130
+ i+=1
131
+ end
132
+ end
133
+ # Return a hash with p for each different value on a vector
134
+ def curve_field(field, item)
135
+ out={}
136
+ item=item.to_s
137
+ @totals.each do |value,n|
138
+ count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
139
+ out[value]=count_value.quo(n)
140
+ end
141
+ out
142
+ end # def
143
+ end # self
144
+ end # Reliability
145
+ end # Statsample
146
+
147
+ require 'statsample/reliability/icc.rb'
148
+ require 'statsample/reliability/scaleanalysis.rb'
149
+ require 'statsample/reliability/skillscaleanalysis.rb'
150
+ require 'statsample/reliability/multiscaleanalysis.rb'