statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,310 @@
1
+ module Statsample
2
+ # Multiset joins multiple dataset with the same fields and vectors
3
+ # but with different number of cases.
4
+ # This is the base class for stratified and cluster sampling estimation
5
+ class Multiset
6
+ # Name of fields
7
+ attr_reader :fields
8
+ # Array with Daru::DataFrame
9
+ attr_reader :datasets
10
+ # To create a multiset
11
+ # * Multiset.new(%w{f1 f2 f3}) # define only fields
12
+ def initialize(fields)
13
+ @fields=fields
14
+ @datasets={}
15
+ end
16
+ def self.new_empty_vectors(fields,ds_names)
17
+ ms = Multiset.new(fields)
18
+ ds_names.each do |d|
19
+ ms.add_dataset(d, Daru::DataFrame.new({}, order: fields))
20
+ end
21
+
22
+ ms
23
+ end
24
+ # Generate a new dataset as a union of partial dataset
25
+ # If block given, this is applied to each dataset before union
26
+ def union(&block)
27
+ union_field={}
28
+ types={}
29
+ names={}
30
+ labels={}
31
+ each do |k,ds|
32
+ if block
33
+ ds = ds.dup
34
+ yield k,ds
35
+ end
36
+ @fields.each do |f|
37
+ union_field[f] ||= Array.new
38
+ union_field[f].concat(ds[f].to_a)
39
+ types[f] ||= ds[f].type
40
+ names[f] ||= ds[f].name
41
+ labels[f] ||= ds[f].index.to_a
42
+ end
43
+ end
44
+
45
+ @fields.each do |f|
46
+ union_field[f] = Daru::Vector.new(union_field[f], name: names[f])
47
+ end
48
+
49
+ ds_union = Daru::DataFrame.new(union_field, order: @fields)
50
+ ds_union
51
+ end
52
+
53
+ def datasets_names
54
+ @datasets.keys.sort
55
+ end
56
+
57
+ def n_datasets
58
+ @datasets.size
59
+ end
60
+
61
+ def add_dataset(key,ds)
62
+ if ds.vectors.to_a != @fields
63
+ raise ArgumentError, "Dataset(#{ds.vectors.to_a.to_s})must have the same fields of the Multiset(#{@fields})"
64
+ else
65
+ @datasets[key] = ds
66
+ end
67
+ end
68
+ def sum_field(field)
69
+ @datasets.inject(0) {|a,da|
70
+ stratum_name = da[0]
71
+ vector = da[1][field]
72
+ val = yield stratum_name,vector
73
+ a + val
74
+ }
75
+ end
76
+ def collect_vector(field)
77
+ @datasets.collect { |k,v| yield k, v[field] }
78
+ end
79
+
80
+ def each_vector(field)
81
+ @datasets.each { |k,v| yield k, v[field] }
82
+ end
83
+
84
+ def [](i)
85
+ @datasets[i]
86
+ end
87
+
88
+ def each(&block)
89
+ @datasets.each {|k,ds|
90
+ next if ds.nrows == 0
91
+ block.call(k,ds)
92
+ }
93
+ end
94
+ end
95
+ class StratifiedSample
96
+ class << self
97
+ # mean for an array of vectors
98
+ def mean(*vectors)
99
+ n_total=0
100
+ means=vectors.inject(0){|a,v|
101
+ n_total+=v.size
102
+ a+v.sum
103
+ }
104
+ means.to_f/n_total
105
+ end
106
+
107
+ def standard_error_ksd_wr(es)
108
+ n_total=0
109
+ sum=es.inject(0){|a,h|
110
+ n_total+=h['N']
111
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
112
+ }
113
+ (1.to_f / n_total)*Math::sqrt(sum)
114
+ end
115
+
116
+
117
+ def variance_ksd_wr(es)
118
+ standard_error_ksd_wr(es)**2
119
+ end
120
+ def calculate_n_total(es)
121
+ es.inject(0) {|a,h| a+h['N'] }
122
+ end
123
+ # Source : Cochran (1972)
124
+
125
+ def variance_ksd_wor(es)
126
+ n_total=calculate_n_total(es)
127
+ es.inject(0){|a,h|
128
+ val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
129
+ a+val
130
+ }
131
+ end
132
+ def standard_error_ksd_wor(es)
133
+ Math::sqrt(variance_ksd_wor(es))
134
+ end
135
+
136
+
137
+
138
+ def variance_esd_wor(es)
139
+ n_total=calculate_n_total(es)
140
+ sum=es.inject(0){|a,h|
141
+ val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
142
+ a+val
143
+ }
144
+ (1.0/(n_total**2))*sum
145
+ end
146
+
147
+
148
+ def standard_error_esd_wor(es)
149
+ Math::sqrt(variance_ksd_wor(es))
150
+ end
151
+ # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
152
+ def variance_esd_wr(es)
153
+ n_total=calculate_n_total(es)
154
+ sum=es.inject(0){|a,h|
155
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
156
+ a+val
157
+ }
158
+ (1.0/(n_total**2))*sum
159
+ end
160
+ def standard_error_esd_wr(es)
161
+ Math::sqrt(variance_esd_wr(es))
162
+ end
163
+
164
+ def proportion_variance_ksd_wor(es)
165
+ n_total=calculate_n_total(es)
166
+ es.inject(0){|a,h|
167
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
168
+ a+val
169
+ }
170
+ end
171
+ def proportion_sd_ksd_wor(es)
172
+ Math::sqrt(proportion_variance_ksd_wor(es))
173
+ end
174
+
175
+
176
+ def proportion_sd_ksd_wr(es)
177
+ n_total=calculate_n_total(es)
178
+ sum=es.inject(0){|a,h|
179
+ val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
180
+ a+val
181
+ }
182
+ Math::sqrt(sum) * (1.0/n_total)
183
+ end
184
+ def proportion_variance_ksd_wr(es)
185
+ proportion_variance_ksd_wor(es)**2
186
+ end
187
+
188
+ def proportion_variance_esd_wor(es)
189
+ n_total=n_total=calculate_n_total(es)
190
+
191
+ sum=es.inject(0){|a,h|
192
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
193
+ a+val
194
+ }
195
+ Math::sqrt(sum) * (1.0/n_total**2)
196
+ end
197
+ def proportion_sd_esd_wor(es)
198
+ Math::sqrt(proportion_variance_ksd_wor(es))
199
+ end
200
+ end
201
+
202
+ def initialize(ms,strata_sizes)
203
+ raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
204
+ @ms=ms
205
+ raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
206
+ @strata_sizes=strata_sizes
207
+ @population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
208
+ @strata_number=@ms.n_datasets
209
+ @sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
210
+ end
211
+ # Number of strata
212
+ def strata_number
213
+ @strata_number
214
+ end
215
+ # Population size. Equal to sum of strata sizes
216
+ # Symbol: N<sub>h</sub>
217
+ def population_size
218
+ @population_size
219
+ end
220
+ # Sample size. Equal to sum of sample of each stratum
221
+ def sample_size
222
+ @sample_size
223
+ end
224
+ # Size of stratum x
225
+ def stratum_size(h)
226
+ @strata_sizes[h]
227
+ end
228
+ def vectors_by_field(field)
229
+ @ms.datasets.collect{|k,ds|
230
+ ds[field]
231
+ }
232
+ end
233
+ # Population proportion based on strata
234
+ def proportion(field, v=1)
235
+ @ms.sum_field(field) {|s_name,vector|
236
+ stratum_ponderation(s_name)*vector.proportion(v)
237
+ }
238
+ end
239
+ # Stratum ponderation.
240
+ # Symbol: W\<sub>h\</sub>
241
+ def stratum_ponderation(h)
242
+ @strata_sizes[h].to_f / @population_size
243
+ end
244
+ alias_method :wh, :stratum_ponderation
245
+
246
+ # Population mean based on strata
247
+ def mean(field)
248
+ @ms.sum_field(field) {|s_name,vector|
249
+ stratum_ponderation(s_name)*vector.mean
250
+ }
251
+ end
252
+ # Standard error with estimated population variance and without replacement.
253
+ # Source: Cochran (1972)
254
+ def standard_error_wor(field)
255
+ es=@ms.collect_vector(field) {|s_n, vector|
256
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
257
+ }
258
+
259
+ StratifiedSample.standard_error_esd_wor(es)
260
+ end
261
+
262
+ # Standard error with estimated population variance and without replacement.
263
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
264
+
265
+ def standard_error_wor_2(field)
266
+ sum=@ms.sum_field(field) {|s_name,vector|
267
+ s_size=@strata_sizes[s_name]
268
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
269
+ }
270
+ (1/@population_size.to_f)*Math::sqrt(sum)
271
+ end
272
+
273
+ def standard_error_wr(field)
274
+ es=@ms.collect_vector(field) {|s_n, vector|
275
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
276
+ }
277
+
278
+ StratifiedSample.standard_error_esd_wr(es)
279
+ end
280
+ def proportion_sd_esd_wor(field,v=1)
281
+ es=@ms.collect_vector(field) {|s_n, vector|
282
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
283
+ }
284
+
285
+ StratifiedSample.proportion_sd_esd_wor(es)
286
+ end
287
+
288
+ def proportion_standard_error(field,v=1)
289
+ prop=proportion(field,v)
290
+ sum=@ms.sum_field(field) {|s_name,vector|
291
+ nh=vector.size
292
+ s_size=@strata_sizes[s_name]
293
+ (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
294
+ }
295
+ (1.quo(@population_size)) * Math::sqrt(sum)
296
+ end
297
+ # Cochran(1971), p. 150
298
+ def variance_pst(field,v=1)
299
+ sum=@ms.datasets.inject(0) {|a,da|
300
+ stratum_name=da[0]
301
+ ds=da[1]
302
+ nh=ds.cases.to_f
303
+ s_size=@strata_sizes[stratum_name]
304
+ prop=ds[field].proportion(v)
305
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
306
+ }
307
+ (1/@population_size.to_f ** 2)*sum
308
+ end
309
+ end
310
+ end
@@ -0,0 +1,65 @@
1
+ require 'statsample/regression/simple'
2
+ require 'statsample/regression/multiple'
3
+
4
+ require 'statsample/regression/multiple/matrixengine'
5
+ require 'statsample/regression/multiple/rubyengine'
6
+ require 'statsample/regression/multiple/gslengine'
7
+
8
+ module Statsample
9
+ # = Module for regression procedures.
10
+ # Use the method on this class to generate
11
+ # analysis.
12
+ # If you need more control, you can
13
+ # create and control directly the objects who computes
14
+ # the regressions.
15
+ #
16
+ # * Simple Regression : Statsample::Regression::Simple
17
+ # * Multiple Regression: Statsample::Regression::Multiple
18
+ module Regression
19
+
20
+ LinearDependency=Class.new(Exception)
21
+
22
+ # Create a Statsample::Regression::Simple object, for simple regression
23
+ # * x: independent Vector
24
+ # * y: dependent Vector
25
+ # <b>Usage:</b>
26
+ # x = Daru::Vector.new(100.times.collect {|i| rand(100)})
27
+ # y = Daru::Vector.new(100.times.collect {|i| 2+x[i]*2+rand()})
28
+ # sr=Statsample::Regression.simple(x,y)
29
+ # sr.a
30
+ # => 2.51763295177808
31
+ # sr.b
32
+ # => 1.99973746599856
33
+ # sr.r
34
+ # => 0.999987881153254
35
+ def self.simple(x,y)
36
+ Statsample::Regression::Simple.new_from_vectors(x,y)
37
+ end
38
+
39
+ # Creates one of the Statsample::Regression::Multiple object,
40
+ # for OLS multiple regression.
41
+ # Parameters:
42
+ # * <tt>ds</tt>: Dataset.
43
+ # * y: Name of dependent variable.
44
+ # * opts: A hash with options
45
+ # * missing_data: Could be
46
+ # * :listwise: delete cases with one or more empty data (default).
47
+ # * :pairwise: uses correlation matrix. Use with caution.
48
+ #
49
+ # <b>Usage:</b>
50
+ # lr=Statsample::Regression::multiple(ds,:y)
51
+ def self.multiple(ds,y_var, opts=Hash.new)
52
+ missing_data= (opts[:missing_data].nil? ) ? :listwise : opts.delete(:missing_data)
53
+ if missing_data==:pairwise
54
+ Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
55
+ else
56
+ if Statsample.has_gsl? and false
57
+ Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
58
+ else
59
+ ds2=ds.reject_values(*Daru::MISSING_VALUES)
60
+ Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var, opts)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,89 @@
1
+ require 'statsample/regression/multiple/baseengine'
2
+ module Statsample
3
+ module Regression
4
+ # Module for OLS Multiple Regression Analysis.
5
+ #
6
+ # Use:.
7
+ #
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(1000.times.collect {rand})
10
+ # b = Daru::Vector.new(1000.times.collect {rand})
11
+ # c = Daru::Vector.new(1000.times.collect {rand})
12
+ # ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
13
+ # ds[:y]=ds.collect{|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
14
+ # lr=Statsample::Regression.multiple(ds, :y)
15
+ # puts lr.summary
16
+ # Summary for regression of a,b,c over y
17
+ # *************************************************************
18
+ # Engine: Statsample::Regression::Multiple::AlglibEngine
19
+ # Cases(listwise)=1000(1000)
20
+ # r=0.986
21
+ # r2=0.973
22
+ # Equation=0.504+5.011a + 2.995b + 1.988c
23
+ # ----------------------------
24
+ # ANOVA TABLE
25
+ # --------------------------------------------------------------
26
+ # | source | ss | df | ms | f | s |
27
+ # --------------------------------------------------------------
28
+ # | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
29
+ # | Error | 82.154 | 996 | 0.082 | | |
30
+ # | Total | 3061.475 | 999 | | | |
31
+ # --------------------------------------------------------------
32
+ # Beta coefficientes
33
+ # -----------------------------------------------
34
+ # | coeff | b | beta | se | t |
35
+ # -----------------------------------------------
36
+ # | Constant | 0.504 | - | 0.030 | 16.968 |
37
+ # | a | 5.011 | 0.832 | 0.031 | 159.486 |
38
+ # | b | 2.995 | 0.492 | 0.032 | 94.367 |
39
+ # | c | 1.988 | 0.323 | 0.032 | 62.132 |
40
+ # -----------------------------------------------
41
+ #
42
+ module Multiple
43
+ # Obtain r2 for regressors
44
+ def self.r2_from_matrices(rxx,rxy)
45
+ matrix=(rxy.transpose*rxx.inverse*rxy)
46
+ matrix[0,0]
47
+ end
48
+
49
+ class MultipleDependent
50
+ def significance
51
+ 0.0
52
+ end
53
+ def initialize(matrix,y_var, opts=Hash.new)
54
+ matrix.extend Statsample::CovariateMatrix
55
+ @matrix=matrix
56
+ @fields=matrix.fields - y_var
57
+ @y_var = y_var
58
+ @q=@y_var.size
59
+ @matrix_cor=matrix.correlation
60
+ @matrix_cor_xx = @matrix_cor.submatrix(@fields)
61
+ @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
62
+
63
+ @sxx = @matrix.submatrix(@fields)
64
+ @syy = @matrix.submatrix(y_var, y_var)
65
+ @sxy = @matrix.submatrix(@fields, y_var)
66
+ @syx = @sxy.t
67
+ end
68
+
69
+ def r2yx
70
+ 1- (@matrix_cor.determinant.quo(@matrix_cor_yy.determinant * @matrix_cor_xx.determinant))
71
+ end
72
+ # Residual covariance of Y after accountin with lineal relation with x
73
+ def syyx
74
+ @syy-@syx*@sxx.inverse*@sxy
75
+ end
76
+ def r2yx_covariance
77
+ 1-(syyx.determinant.quo(@syy.determinant))
78
+ end
79
+
80
+ def vxy
81
+ @q-(@syy.inverse*syyx).trace
82
+ end
83
+ def p2yx
84
+ vxy.quo(@q)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end