statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,205 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Pure Ruby Class for Multiple Regression Analysis, based on a covariance or correlation matrix.
5
+ #
6
+ # Use Statsample::Regression::Multiple::RubyEngine if you have a
7
+ # Dataset, to avoid setting all details.
8
+ #
9
+ # <b>Remember:</b> NEVER use a Covariance data if you have missing data. Use only correlation matrix on that case.
10
+ #
11
+ #
12
+ # Example:
13
+ #
14
+ # matrix=[[1.0, 0.5, 0.2], [0.5, 1.0, 0.7], [0.2, 0.7, 1.0]]
15
+ #
16
+ # lr=Statsample::Regression::Multiple::MatrixEngine.new(matrix,2)
17
+
18
+ class MatrixEngine < BaseEngine
19
+ # Hash of standard deviation of predictors.
20
+ # Only useful for Correlation Matrix, because by default is set to 1
21
+ attr_accessor :x_sd
22
+ # Standard deviation of criterion
23
+ # Only useful for Correlation Matrix, because by default is set to 1
24
+ attr_accessor :y_sd
25
+ # Hash of mean for predictors. By default, set to 0
26
+ attr_accessor :x_mean
27
+
28
+ # Mean for criteria. By default, set to 0
29
+ attr_accessor :y_mean
30
+
31
+ # Number of cases
32
+ attr_writer :cases
33
+ attr_writer :digits
34
+ # Create object
35
+ #
36
+ def initialize(matrix,y_var, opts=Hash.new)
37
+ matrix.extend Statsample::CovariateMatrix
38
+ raise "#{y_var} variable should be on data" unless matrix.fields.include? y_var
39
+ if matrix._type==:covariance
40
+ @matrix_cov=matrix
41
+ @matrix_cor=matrix.correlation
42
+ @no_covariance=false
43
+ else
44
+ @matrix_cor=matrix
45
+ @matrix_cov=matrix
46
+ @no_covariance=true
47
+ end
48
+
49
+ @y_var=y_var
50
+ @fields=matrix.fields-[y_var]
51
+
52
+ @n_predictors=@fields.size
53
+ @predictors_n=@n_predictors
54
+ @matrix_x= @matrix_cor.submatrix(@fields)
55
+ @matrix_x_cov= @matrix_cov.submatrix(@fields)
56
+ raise LinearDependency, "Regressors are linearly dependent" if @matrix_x.determinant<1e-15
57
+
58
+
59
+ @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
60
+ @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
61
+
62
+ @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
63
+
64
+ @x_sd=@n_predictors.times.inject({}) {|ac,i|
65
+ ac[@matrix_x_cov.fields[i]]=Math::sqrt(@matrix_x_cov[i,i])
66
+ ac;
67
+ }
68
+
69
+ @cases=nil
70
+ @x_mean=@fields.inject({}) {|ac,f|
71
+ ac[f]=0.0
72
+ ac;
73
+ }
74
+
75
+ @y_mean=0.0
76
+ @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
77
+
78
+ opts_default = {:digits=>3}
79
+ opts = opts_default.merge opts
80
+ opts.each{|k,v|
81
+ self.send("#{k}=",v) if self.respond_to? k
82
+ }
83
+ result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
84
+
85
+ if matrix._type == :covariance
86
+ @coeffs=result_matrix.column(0).to_a
87
+ @coeffs_stan=coeffs.collect {|k,v|
88
+ coeffs[k]*@x_sd[k].quo(@y_sd)
89
+ }
90
+ else
91
+ @coeffs_stan=result_matrix.column(0).to_a
92
+ @coeffs=standarized_coeffs.collect {|k,v|
93
+ standarized_coeffs[k]*@y_sd.quo(@x_sd[k])
94
+ }
95
+ end
96
+ @total_cases=@valid_cases=@cases
97
+ end
98
+ def cases
99
+ raise "You should define the number of valid cases first" if @cases.nil?
100
+ @cases
101
+ end
102
+ # Get R^2 for the regression
103
+ # For fixed models is the coefficient of determination.
104
+ # On random models, is the 'squared-multiple correlation'
105
+ # Equal to
106
+ # * 1-(|R| / |R_x|) or
107
+ # * Sum(b_i*r_yi) <- used
108
+ def r2
109
+ @n_predictors.times.inject(0) {|ac,i| ac+@coeffs_stan[i]* @matrix_y[i,0]}
110
+ end
111
+ # Multiple correlation, on random models.
112
+ def r
113
+ Math::sqrt(r2)
114
+ end
115
+ # Value of constant
116
+ def constant
117
+ c = coeffs
118
+ @y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
119
+ end
120
+ # Hash of b or raw coefficients
121
+ def coeffs
122
+ assign_names(@coeffs)
123
+ end
124
+ # Hash of beta or standarized coefficients
125
+
126
+ def standarized_coeffs
127
+ assign_names(@coeffs_stan)
128
+ end
129
+ # Total sum of squares
130
+ def sst
131
+ @y_sd**2*(cases-1.0)
132
+ end
133
+
134
+ # Degrees of freedom for regression
135
+ def df_r
136
+ @n_predictors
137
+ end
138
+ # Degrees of freedom for error
139
+ def df_e
140
+ cases-@n_predictors-1
141
+ end
142
+ # Tolerance for a given variable
143
+ # defined as (1-R^2) of regression of other independent variables
144
+ # over the selected
145
+ # == Reference:
146
+ # * http://talkstats.com/showthread.php?t=5056
147
+ def tolerance(var)
148
+ return 1 if @matrix_x.column_size==1
149
+ lr=Statsample::Regression::Multiple::MatrixEngine.new(@matrix_x, var)
150
+ 1-lr.r2
151
+ end
152
+ # Standard Error for coefficients.
153
+ # Standard error of a coefficients depends on
154
+ # * Tolerance of the coeffients: Higher tolerances implies higher error
155
+ # * Higher r2 implies lower error
156
+ # == Reference:
157
+ # * Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
158
+ #
159
+ def coeffs_se
160
+ out={}
161
+ #mse=sse.quo(df_e)
162
+ coeffs.each {|k,v|
163
+ out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
164
+ }
165
+ out
166
+ end
167
+ # t value for constant
168
+ def constant_t
169
+ return nil if constant_se.nil?
170
+ constant.to_f / constant_se
171
+ end
172
+ # Standard error for constant.
173
+ # This method recreates the estimaded variance-covariance matrix
174
+ # using means, standard deviation and covariance matrix.
175
+ # So, needs the covariance matrix.
176
+ def constant_se
177
+ return nil if @no_covariance
178
+ means=@x_mean
179
+ #means[@y_var]=@y_mean
180
+ means[:constant]=1
181
+ sd=@x_sd
182
+ #sd[@y_var]=@y_sd
183
+ sd[:constant]=0
184
+ fields=[:constant]+@matrix_cov.fields-[@y_var]
185
+ # Recreate X'X using the variance-covariance matrix
186
+ xt_x=::Matrix.rows(fields.collect {|i|
187
+ fields.collect {|j|
188
+ if i==:constant or j==:constant
189
+ cov=0
190
+ elsif i==j
191
+ cov=sd[i]**2
192
+ else
193
+ cov=@matrix_cov.submatrix(i..i,j..j)[0,0]
194
+ end
195
+ cov*(@cases-1)+@cases*means[i]*means[j]
196
+ }
197
+ })
198
+ matrix=xt_x.inverse * mse
199
+ matrix.collect {|i| Math::sqrt(i) if i>0 }[0,0]
200
+ end
201
+
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,86 @@
1
+ module Statsample
2
+ module Regression
3
+ module Multiple
4
+ # Pure Ruby Class for Multiple Regression Analysis.
5
+ # Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values.
6
+ # Coeffient calculation uses correlation matrix between the vectors
7
+ # If you need listwise aproach for missing values, use AlglibEngine, because is faster.
8
+ #
9
+ # Example:
10
+ #
11
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
12
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
13
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
14
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
15
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
16
+ # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
17
+
18
+ class RubyEngine < MatrixEngine
19
+ def initialize(ds,y_var, opts=Hash.new)
20
+ matrix = Statsample::Bivariate.correlation_matrix ds
21
+ fields_indep=ds.vectors.to_a - [y_var]
22
+ default= {
23
+ :y_mean => ds[y_var].mean,
24
+ :x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
+ :y_sd => ds[y_var].sd,
26
+ :x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
+ :cases => Statsample::Bivariate.min_n_valid(ds)
28
+ }
29
+ opts = opts.merge(default)
30
+ super(matrix, y_var, opts)
31
+ @ds = ds
32
+ @dy = ds[@y_var]
33
+ @ds_valid = ds.reject_values(*Daru::MISSING_VALUES)
34
+ @total_cases = @ds.nrows
35
+ @valid_cases = @ds_valid.nrows
36
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
37
+ set_dep_columns
38
+ end
39
+
40
+ def set_dep_columns
41
+ @dep_columns = []
42
+ @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
43
+ end
44
+
45
+ def fix_with_mean
46
+ i=0
47
+ @ds_indep.each(:row) do |row|
48
+ empty=[]
49
+ row.each do |k,v|
50
+ empty.push(k) if v.nil?
51
+ end
52
+
53
+ if empty.size==1
54
+ @ds_indep[empty[0]][i]=@ds[empty[0]].mean
55
+ end
56
+ i += 1
57
+ end
58
+ set_dep_columns
59
+ end
60
+ def fix_with_regression
61
+ i = 0
62
+ @ds_indep.each(:row) do |row|
63
+ empty = []
64
+ row.each { |k,v| empty.push(k) if v.nil? }
65
+ if empty.size==1
66
+ field = empty[0]
67
+ lr = MultipleRegression.new(@ds_indep,field)
68
+ fields = []
69
+ @ds_indep.vectors.each { |f|
70
+ fields.push(row[f]) unless f == field
71
+ }
72
+
73
+ @ds_indep[field][i]=lr.process(fields)
74
+ end
75
+ i+=1
76
+ end
77
+ set_dep_columns
78
+ end
79
+ # Standard error for constant
80
+ def constant_se
81
+ estimated_variance_covariance_matrix[0,0]
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,121 @@
1
+ module Statsample
2
+ module Regression
3
+ # Class for calculation of linear regressions with form
4
+ # y = a+bx
5
+ # To create a Statsample::Regression::Simple object:
6
+ # * <tt> Statsample::Regression::Simple.new_from_dataset(ds,x,y)</tt>
7
+ # * <tt> Statsample::Regression::Simple.new_from_vectors(vx,vy)</tt>
8
+ # * <tt> Statsample::Regression::Simple.new_from_gsl(gsl) </tt>
9
+ #
10
+ class Simple
11
+ include Summarizable
12
+ attr_accessor :a,:b,:cov00, :cov01, :covx1, :chisq, :status
13
+ attr_accessor :name
14
+ attr_accessor :digits
15
+ def initialize(init_method, *argv)
16
+ self.send(init_method, *argv)
17
+ end
18
+ private_class_method :new
19
+ # Obtain y value given x value
20
+ # x=a+bx
21
+
22
+ def y(val_x)
23
+ @a+@b*val_x
24
+ end
25
+ # Obtain x value given y value
26
+ # x=(y-a)/b
27
+ def x(val_y)
28
+ (val_y-@a) / @b.to_f
29
+ end
30
+ # Sum of square error
31
+ def sse
32
+ (0...@vx.size).inject(0) {|acum,i| acum+((@vy[i]-y(@vx[i]))**2)
33
+ }
34
+ end
35
+ def standard_error
36
+ Math::sqrt(sse / (@vx.size-2).to_f)
37
+ end
38
+ # Sum of square regression
39
+ def ssr
40
+ vy_mean=@vy.mean
41
+ (0...@vx.size).inject(0) {|a,i|
42
+ a+((y(@vx[i])-vy_mean)**2)
43
+ }
44
+
45
+ end
46
+ # Sum of square total
47
+ def sst
48
+ @vy.sum_of_squared_deviation
49
+ end
50
+ # Value of r
51
+ def r
52
+ @b * (@vx.sds / @vy.sds)
53
+ end
54
+ # Value of r^2
55
+ def r2
56
+ r**2
57
+ end
58
+ class << self
59
+ # Create a regression object giving an array with following parameters:
60
+ # <tt>a,b,cov00, cov01, covx1, chisq, status</tt>
61
+ # Useful to obtain x and y values with a and b values.
62
+ def new_from_gsl(ar)
63
+ new(:init_gsl, *ar)
64
+ end
65
+ # Create a simple regression using two vectors
66
+ def new_from_vectors(vx,vy, opts=Hash.new)
67
+ new(:init_vectors,vx,vy, opts)
68
+ end
69
+ # Create a simple regression using a dataset and two vector names.
70
+ def new_from_dataset(ds,x,y, opts=Hash.new)
71
+ new(:init_vectors,ds[x],ds[y], opts)
72
+ end
73
+ end
74
+ def init_vectors(vx,vy, opts=Hash.new)
75
+ @vx,@vy=Statsample.only_valid_clone(vx,vy)
76
+ x_m=@vx.mean
77
+ y_m=@vy.mean
78
+ num=den=0
79
+ (0...@vx.size).each {|i|
80
+ num+=(@vx[i]-x_m)*(@vy[i]-y_m)
81
+ den+=(@vx[i]-x_m)**2
82
+ }
83
+ @b=num.to_f/den
84
+ @a=y_m - @b*x_m
85
+
86
+ opts_default={
87
+ :digits=>3,
88
+ :name=>_("Regression of %s over %s") % [@vx.name, @vy.name]
89
+ }
90
+ @opts=opts_default.merge opts
91
+
92
+ @opts.each{|k,v|
93
+ self.send("#{k}=",v) if self.respond_to? k
94
+ }
95
+
96
+ end
97
+ def init_gsl(a,b,cov00, cov01, covx1, chisq, status)
98
+ @a=a
99
+ @b=b
100
+ @cov00=cov00
101
+ @cov01=cov01
102
+ @covx1=covx1
103
+ @chisq=chisq
104
+ @status=status
105
+ end
106
+ def report_building(gen)
107
+ f="%0.#{digits}f"
108
+ gen.section(:name=>name) do |s|
109
+ s.table(:header=>[_("Variable"), _("Value")]) do |t|
110
+ t.row [_("r"), f % r]
111
+ t.row [_("r^2"), f % r2]
112
+ t.row [_("a"), f % a]
113
+ t.row [_("b"), f % b]
114
+ t.row [_("s.e"), f % standard_error]
115
+ end
116
+ end
117
+ end
118
+ private :init_vectors, :init_gsl
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,150 @@
1
+ module Statsample
2
+ module Reliability
3
+ class << self
4
+ # Calculate Chonbach's alpha for a given dataset.
5
+ # only uses tuples without missing data
6
+ def cronbach_alpha(ods)
7
+ ds = ods.reject_values(*Daru::MISSING_VALUES)
8
+ n_items = ds.ncols
9
+ return nil if n_items <= 1
10
+ s2_items = ds.to_hash.values.inject(0) { |ac,v|
11
+ ac + v.variance }
12
+ total = ds.vector_sum
13
+
14
+ (n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
15
+ end
16
+ # Calculate Chonbach's alpha for a given dataset
17
+ # using standarized values for every vector.
18
+ # Only uses tuples without missing data
19
+ # Return nil if one or more vectors has 0 variance
20
+ def cronbach_alpha_standarized(ods)
21
+ ds = ods.reject_values(*Daru::MISSING_VALUES)
22
+ return nil if ds.any? { |v| v.variance==0}
23
+
24
+ ds = Daru::DataFrame.new(
25
+ ds.vectors.to_a.inject({}) { |a,i|
26
+ a[i] = ods[i].standardize
27
+ a
28
+ }
29
+ )
30
+
31
+ cronbach_alpha(ds)
32
+ end
33
+ # Predicted reliability of a test by replicating
34
+ # +n+ times the number of items
35
+ def spearman_brown_prophecy(r,n)
36
+ (n*r).quo(1+(n-1)*r)
37
+ end
38
+
39
+ alias :sbp :spearman_brown_prophecy
40
+ # Returns the number of items
41
+ # to obtain +r_d+ desired reliability
42
+ # from +r+ current reliability, achieved with
43
+ # +n+ items
44
+ def n_for_desired_reliability(r,r_d,n=1)
45
+ return nil if r.nil?
46
+ (r_d*(1-r)).quo(r*(1-r_d))*n
47
+ end
48
+
49
+ # Get Cronbach alpha from <tt>n</tt> cases,
50
+ # <tt>s2</tt> mean variance and <tt>cov</tt>
51
+ # mean covariance
52
+ def cronbach_alpha_from_n_s2_cov(n,s2,cov)
53
+ (n.quo(n-1)) * (1-(s2.quo(s2+(n-1)*cov)))
54
+ end
55
+ # Get Cronbach's alpha from a covariance matrix
56
+ def cronbach_alpha_from_covariance_matrix(cov)
57
+ n = cov.row_size
58
+ raise "covariance matrix should have at least 2 variables" if n < 2
59
+ s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
60
+ (n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
61
+ end
62
+ # Returns n necessary to obtain specific alpha
63
+ # given variance and covariance mean of items
64
+ def n_for_desired_alpha(alpha,s2,cov)
65
+ # Start with a regular test : 50 items
66
+ min=2
67
+ max=1000
68
+ n=50
69
+ prev_n=0
70
+ epsilon=0.0001
71
+ dif=1000
72
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
73
+ dif=c_a - alpha
74
+ while(dif.abs>epsilon and n!=prev_n)
75
+ prev_n=n
76
+ if dif<0
77
+ min=n
78
+ n=(n+(max-min).quo(2)).to_i
79
+ else
80
+ max=n
81
+ n=(n-(max-min).quo(2)).to_i
82
+ end
83
+ c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
84
+ dif=c_a - alpha
85
+ end
86
+ n
87
+ end
88
+ # First derivative for alfa
89
+ # Parameters
90
+ # <tt>n</tt>: Number of items
91
+ # <tt>sx</tt>: mean of variances
92
+ # <tt>sxy</tt>: mean of covariances
93
+
94
+ def alpha_first_derivative(n,sx,sxy)
95
+ (sxy*(sx-sxy)).quo(((sxy*(n-1))+sx)**2)
96
+ end
97
+ # Second derivative for alfa
98
+ # Parameters
99
+ # <tt>n</tt>: Number of items
100
+ # <tt>sx</tt>: mean of variances
101
+ # <tt>sxy</tt>: mean of covariances
102
+
103
+ def alfa_second_derivative(n,sx,sxy)
104
+ (2*(sxy**2)*(sxy-sx)).quo(((sxy*(n-1))+sx)**3)
105
+ end
106
+ end
107
+ class ItemCharacteristicCurve
108
+ attr_reader :totals, :counts, :vector_total
109
+ def initialize (ds, vector_total=nil)
110
+ vector_total||=ds.vector_sum
111
+ raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
112
+ @vector_total=vector_total
113
+ @ds=ds
114
+ @totals={}
115
+ @counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
116
+ process
117
+ end
118
+ def process
119
+ i=0
120
+ @ds.each_row do |row|
121
+ tot=@vector_total[i]
122
+ @totals[tot]||=0
123
+ @totals[tot]+=1
124
+ @ds.vectors.each do |f|
125
+ item=row[f].to_s
126
+ @counts[f][tot]||={}
127
+ @counts[f][tot][item]||=0
128
+ @counts[f][tot][item] += 1
129
+ end
130
+ i+=1
131
+ end
132
+ end
133
+ # Return a hash with p for each different value on a vector
134
+ def curve_field(field, item)
135
+ out={}
136
+ item=item.to_s
137
+ @totals.each do |value,n|
138
+ count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
139
+ out[value]=count_value.quo(n)
140
+ end
141
+ out
142
+ end # def
143
+ end # self
144
+ end # Reliability
145
+ end # Statsample
146
+
147
+ require 'statsample/reliability/icc.rb'
148
+ require 'statsample/reliability/scaleanalysis.rb'
149
+ require 'statsample/reliability/skillscaleanalysis.rb'
150
+ require 'statsample/reliability/multiscaleanalysis.rb'