statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,310 @@
1
+ module Statsample
2
+ # Multiset joins multiple dataset with the same fields and vectors
3
+ # but with different number of cases.
4
+ # This is the base class for stratified and cluster sampling estimation
5
+ class Multiset
6
+ # Name of fields
7
+ attr_reader :fields
8
+ # Array with Daru::DataFrame
9
+ attr_reader :datasets
10
+ # To create a multiset
11
+ # * Multiset.new(%w{f1 f2 f3}) # define only fields
12
+ def initialize(fields)
13
+ @fields=fields
14
+ @datasets={}
15
+ end
16
+ def self.new_empty_vectors(fields,ds_names)
17
+ ms = Multiset.new(fields)
18
+ ds_names.each do |d|
19
+ ms.add_dataset(d, Daru::DataFrame.new({}, order: fields))
20
+ end
21
+
22
+ ms
23
+ end
24
+ # Generate a new dataset as a union of partial dataset
25
+ # If block given, this is applied to each dataset before union
26
+ def union(&block)
27
+ union_field={}
28
+ types={}
29
+ names={}
30
+ labels={}
31
+ each do |k,ds|
32
+ if block
33
+ ds = ds.dup
34
+ yield k,ds
35
+ end
36
+ @fields.each do |f|
37
+ union_field[f] ||= Array.new
38
+ union_field[f].concat(ds[f].to_a)
39
+ types[f] ||= ds[f].type
40
+ names[f] ||= ds[f].name
41
+ labels[f] ||= ds[f].index.to_a
42
+ end
43
+ end
44
+
45
+ @fields.each do |f|
46
+ union_field[f] = Daru::Vector.new(union_field[f], name: names[f])
47
+ end
48
+
49
+ ds_union = Daru::DataFrame.new(union_field, order: @fields)
50
+ ds_union
51
+ end
52
+
53
+ def datasets_names
54
+ @datasets.keys.sort
55
+ end
56
+
57
+ def n_datasets
58
+ @datasets.size
59
+ end
60
+
61
+ def add_dataset(key,ds)
62
+ if ds.vectors.to_a != @fields
63
+ raise ArgumentError, "Dataset(#{ds.vectors.to_a.to_s})must have the same fields of the Multiset(#{@fields})"
64
+ else
65
+ @datasets[key] = ds
66
+ end
67
+ end
68
+ def sum_field(field)
69
+ @datasets.inject(0) {|a,da|
70
+ stratum_name = da[0]
71
+ vector = da[1][field]
72
+ val = yield stratum_name,vector
73
+ a + val
74
+ }
75
+ end
76
+ def collect_vector(field)
77
+ @datasets.collect { |k,v| yield k, v[field] }
78
+ end
79
+
80
+ def each_vector(field)
81
+ @datasets.each { |k,v| yield k, v[field] }
82
+ end
83
+
84
+ def [](i)
85
+ @datasets[i]
86
+ end
87
+
88
+ def each(&block)
89
+ @datasets.each {|k,ds|
90
+ next if ds.nrows == 0
91
+ block.call(k,ds)
92
+ }
93
+ end
94
+ end
95
+ class StratifiedSample
96
+ class << self
97
+ # mean for an array of vectors
98
+ def mean(*vectors)
99
+ n_total=0
100
+ means=vectors.inject(0){|a,v|
101
+ n_total+=v.size
102
+ a+v.sum
103
+ }
104
+ means.to_f/n_total
105
+ end
106
+
107
+ def standard_error_ksd_wr(es)
108
+ n_total=0
109
+ sum=es.inject(0){|a,h|
110
+ n_total+=h['N']
111
+ a+((h['N']**2 * h['s']**2) / h['n'].to_f)
112
+ }
113
+ (1.to_f / n_total)*Math::sqrt(sum)
114
+ end
115
+
116
+
117
+ def variance_ksd_wr(es)
118
+ standard_error_ksd_wr(es)**2
119
+ end
120
+ def calculate_n_total(es)
121
+ es.inject(0) {|a,h| a+h['N'] }
122
+ end
123
+ # Source : Cochran (1972)
124
+
125
+ def variance_ksd_wor(es)
126
+ n_total=calculate_n_total(es)
127
+ es.inject(0){|a,h|
128
+ val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
129
+ a+val
130
+ }
131
+ end
132
+ def standard_error_ksd_wor(es)
133
+ Math::sqrt(variance_ksd_wor(es))
134
+ end
135
+
136
+
137
+
138
+ def variance_esd_wor(es)
139
+ n_total=calculate_n_total(es)
140
+ sum=es.inject(0){|a,h|
141
+ val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
142
+ a+val
143
+ }
144
+ (1.0/(n_total**2))*sum
145
+ end
146
+
147
+
148
+ def standard_error_esd_wor(es)
149
+ Math::sqrt(variance_ksd_wor(es))
150
+ end
151
+ # Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
152
+ def variance_esd_wr(es)
153
+ n_total=calculate_n_total(es)
154
+ sum=es.inject(0){|a,h|
155
+ val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
156
+ a+val
157
+ }
158
+ (1.0/(n_total**2))*sum
159
+ end
160
+ def standard_error_esd_wr(es)
161
+ Math::sqrt(variance_esd_wr(es))
162
+ end
163
+
164
+ def proportion_variance_ksd_wor(es)
165
+ n_total=calculate_n_total(es)
166
+ es.inject(0){|a,h|
167
+ val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
168
+ a+val
169
+ }
170
+ end
171
+ def proportion_sd_ksd_wor(es)
172
+ Math::sqrt(proportion_variance_ksd_wor(es))
173
+ end
174
+
175
+
176
+ def proportion_sd_ksd_wr(es)
177
+ n_total=calculate_n_total(es)
178
+ sum=es.inject(0){|a,h|
179
+ val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
180
+ a+val
181
+ }
182
+ Math::sqrt(sum) * (1.0/n_total)
183
+ end
184
+ def proportion_variance_ksd_wr(es)
185
+ proportion_variance_ksd_wor(es)**2
186
+ end
187
+
188
+ def proportion_variance_esd_wor(es)
189
+ n_total=n_total=calculate_n_total(es)
190
+
191
+ sum=es.inject(0){|a,h|
192
+ a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
193
+ a+val
194
+ }
195
+ Math::sqrt(sum) * (1.0/n_total**2)
196
+ end
197
+ def proportion_sd_esd_wor(es)
198
+ Math::sqrt(proportion_variance_ksd_wor(es))
199
+ end
200
+ end
201
+
202
+ def initialize(ms,strata_sizes)
203
+ raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
204
+ @ms=ms
205
+ raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
206
+ @strata_sizes=strata_sizes
207
+ @population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
208
+ @strata_number=@ms.n_datasets
209
+ @sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
210
+ end
211
+ # Number of strata
212
+ def strata_number
213
+ @strata_number
214
+ end
215
+ # Population size. Equal to sum of strata sizes
216
+ # Symbol: N<sub>h</sub>
217
+ def population_size
218
+ @population_size
219
+ end
220
+ # Sample size. Equal to sum of sample of each stratum
221
+ def sample_size
222
+ @sample_size
223
+ end
224
+ # Size of stratum x
225
+ def stratum_size(h)
226
+ @strata_sizes[h]
227
+ end
228
+ def vectors_by_field(field)
229
+ @ms.datasets.collect{|k,ds|
230
+ ds[field]
231
+ }
232
+ end
233
+ # Population proportion based on strata
234
+ def proportion(field, v=1)
235
+ @ms.sum_field(field) {|s_name,vector|
236
+ stratum_ponderation(s_name)*vector.proportion(v)
237
+ }
238
+ end
239
+ # Stratum ponderation.
240
+ # Symbol: W\<sub>h\</sub>
241
+ def stratum_ponderation(h)
242
+ @strata_sizes[h].to_f / @population_size
243
+ end
244
+ alias_method :wh, :stratum_ponderation
245
+
246
+ # Population mean based on strata
247
+ def mean(field)
248
+ @ms.sum_field(field) {|s_name,vector|
249
+ stratum_ponderation(s_name)*vector.mean
250
+ }
251
+ end
252
+ # Standard error with estimated population variance and without replacement.
253
+ # Source: Cochran (1972)
254
+ def standard_error_wor(field)
255
+ es=@ms.collect_vector(field) {|s_n, vector|
256
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
257
+ }
258
+
259
+ StratifiedSample.standard_error_esd_wor(es)
260
+ end
261
+
262
+ # Standard error with estimated population variance and without replacement.
263
+ # Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
264
+
265
+ def standard_error_wor_2(field)
266
+ sum=@ms.sum_field(field) {|s_name,vector|
267
+ s_size=@strata_sizes[s_name]
268
+ (s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
269
+ }
270
+ (1/@population_size.to_f)*Math::sqrt(sum)
271
+ end
272
+
273
+ def standard_error_wr(field)
274
+ es=@ms.collect_vector(field) {|s_n, vector|
275
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
276
+ }
277
+
278
+ StratifiedSample.standard_error_esd_wr(es)
279
+ end
280
+ def proportion_sd_esd_wor(field,v=1)
281
+ es=@ms.collect_vector(field) {|s_n, vector|
282
+ {'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
283
+ }
284
+
285
+ StratifiedSample.proportion_sd_esd_wor(es)
286
+ end
287
+
288
+ def proportion_standard_error(field,v=1)
289
+ prop=proportion(field,v)
290
+ sum=@ms.sum_field(field) {|s_name,vector|
291
+ nh=vector.size
292
+ s_size=@strata_sizes[s_name]
293
+ (s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
294
+ }
295
+ (1.quo(@population_size)) * Math::sqrt(sum)
296
+ end
297
+ # Cochran(1971), p. 150
298
+ def variance_pst(field,v=1)
299
+ sum=@ms.datasets.inject(0) {|a,da|
300
+ stratum_name=da[0]
301
+ ds=da[1]
302
+ nh=ds.cases.to_f
303
+ s_size=@strata_sizes[stratum_name]
304
+ prop=ds[field].proportion(v)
305
+ a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
306
+ }
307
+ (1/@population_size.to_f ** 2)*sum
308
+ end
309
+ end
310
+ end
@@ -0,0 +1,65 @@
1
+ require 'statsample/regression/simple'
2
+ require 'statsample/regression/multiple'
3
+
4
+ require 'statsample/regression/multiple/matrixengine'
5
+ require 'statsample/regression/multiple/rubyengine'
6
+ require 'statsample/regression/multiple/gslengine'
7
+
8
+ module Statsample
9
+ # = Module for regression procedures.
10
+ # Use the method on this class to generate
11
+ # analysis.
12
+ # If you need more control, you can
13
+ # create and control directly the objects who computes
14
+ # the regressions.
15
+ #
16
+ # * Simple Regression : Statsample::Regression::Simple
17
+ # * Multiple Regression: Statsample::Regression::Multiple
18
+ module Regression
19
+
20
+ LinearDependency=Class.new(Exception)
21
+
22
+ # Create a Statsample::Regression::Simple object, for simple regression
23
+ # * x: independent Vector
24
+ # * y: dependent Vector
25
+ # <b>Usage:</b>
26
+ # x = Daru::Vector.new(100.times.collect {|i| rand(100)})
27
+ # y = Daru::Vector.new(100.times.collect {|i| 2+x[i]*2+rand()})
28
+ # sr=Statsample::Regression.simple(x,y)
29
+ # sr.a
30
+ # => 2.51763295177808
31
+ # sr.b
32
+ # => 1.99973746599856
33
+ # sr.r
34
+ # => 0.999987881153254
35
+ def self.simple(x,y)
36
+ Statsample::Regression::Simple.new_from_vectors(x,y)
37
+ end
38
+
39
+ # Creates one of the Statsample::Regression::Multiple object,
40
+ # for OLS multiple regression.
41
+ # Parameters:
42
+ # * <tt>ds</tt>: Dataset.
43
+ # * y: Name of dependent variable.
44
+ # * opts: A hash with options
45
+ # * missing_data: Could be
46
+ # * :listwise: delete cases with one or more empty data (default).
47
+ # * :pairwise: uses correlation matrix. Use with caution.
48
+ #
49
+ # <b>Usage:</b>
50
+ # lr=Statsample::Regression::multiple(ds,:y)
51
+ def self.multiple(ds,y_var, opts=Hash.new)
52
+ missing_data= (opts[:missing_data].nil? ) ? :listwise : opts.delete(:missing_data)
53
+ if missing_data==:pairwise
54
+ Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
55
+ else
56
+ if Statsample.has_gsl? and false
57
+ Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
58
+ else
59
+ ds2=ds.reject_values(*Daru::MISSING_VALUES)
60
+ Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var, opts)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,89 @@
1
+ require 'statsample/regression/multiple/baseengine'
2
+ module Statsample
3
+ module Regression
4
+ # Module for OLS Multiple Regression Analysis.
5
+ #
6
+ # Use:.
7
+ #
8
+ # require 'statsample'
9
+ # a = Daru::Vector.new(1000.times.collect {rand})
10
+ # b = Daru::Vector.new(1000.times.collect {rand})
11
+ # c = Daru::Vector.new(1000.times.collect {rand})
12
+ # ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
13
+ # ds[:y]=ds.collect{|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
14
+ # lr=Statsample::Regression.multiple(ds, :y)
15
+ # puts lr.summary
16
+ # Summary for regression of a,b,c over y
17
+ # *************************************************************
18
+ # Engine: Statsample::Regression::Multiple::AlglibEngine
19
+ # Cases(listwise)=1000(1000)
20
+ # r=0.986
21
+ # r2=0.973
22
+ # Equation=0.504+5.011a + 2.995b + 1.988c
23
+ # ----------------------------
24
+ # ANOVA TABLE
25
+ # --------------------------------------------------------------
26
+ # | source | ss | df | ms | f | s |
27
+ # --------------------------------------------------------------
28
+ # | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
29
+ # | Error | 82.154 | 996 | 0.082 | | |
30
+ # | Total | 3061.475 | 999 | | | |
31
+ # --------------------------------------------------------------
32
+ # Beta coefficientes
33
+ # -----------------------------------------------
34
+ # | coeff | b | beta | se | t |
35
+ # -----------------------------------------------
36
+ # | Constant | 0.504 | - | 0.030 | 16.968 |
37
+ # | a | 5.011 | 0.832 | 0.031 | 159.486 |
38
+ # | b | 2.995 | 0.492 | 0.032 | 94.367 |
39
+ # | c | 1.988 | 0.323 | 0.032 | 62.132 |
40
+ # -----------------------------------------------
41
+ #
42
+ module Multiple
43
+ # Obtain r2 for regressors
44
+ def self.r2_from_matrices(rxx,rxy)
45
+ matrix=(rxy.transpose*rxx.inverse*rxy)
46
+ matrix[0,0]
47
+ end
48
+
49
+ class MultipleDependent
50
+ def significance
51
+ 0.0
52
+ end
53
+ def initialize(matrix,y_var, opts=Hash.new)
54
+ matrix.extend Statsample::CovariateMatrix
55
+ @matrix=matrix
56
+ @fields=matrix.fields - y_var
57
+ @y_var = y_var
58
+ @q=@y_var.size
59
+ @matrix_cor=matrix.correlation
60
+ @matrix_cor_xx = @matrix_cor.submatrix(@fields)
61
+ @matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
62
+
63
+ @sxx = @matrix.submatrix(@fields)
64
+ @syy = @matrix.submatrix(y_var, y_var)
65
+ @sxy = @matrix.submatrix(@fields, y_var)
66
+ @syx = @sxy.t
67
+ end
68
+
69
+ def r2yx
70
+ 1- (@matrix_cor.determinant.quo(@matrix_cor_yy.determinant * @matrix_cor_xx.determinant))
71
+ end
72
+ # Residual covariance of Y after accountin with lineal relation with x
73
+ def syyx
74
+ @syy-@syx*@sxx.inverse*@sxy
75
+ end
76
+ def r2yx_covariance
77
+ 1-(syyx.determinant.quo(@syy.determinant))
78
+ end
79
+
80
+ def vxy
81
+ @q-(@syy.inverse*syyx).trace
82
+ end
83
+ def p2yx
84
+ vxy.quo(@q)
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end