statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,310 @@
|
|
1
|
+
module Statsample
|
2
|
+
# Multiset joins multiple dataset with the same fields and vectors
|
3
|
+
# but with different number of cases.
|
4
|
+
# This is the base class for stratified and cluster sampling estimation
|
5
|
+
class Multiset
|
6
|
+
# Name of fields
|
7
|
+
attr_reader :fields
|
8
|
+
# Array with Daru::DataFrame
|
9
|
+
attr_reader :datasets
|
10
|
+
# To create a multiset
|
11
|
+
# * Multiset.new(%w{f1 f2 f3}) # define only fields
|
12
|
+
def initialize(fields)
|
13
|
+
@fields=fields
|
14
|
+
@datasets={}
|
15
|
+
end
|
16
|
+
def self.new_empty_vectors(fields,ds_names)
|
17
|
+
ms = Multiset.new(fields)
|
18
|
+
ds_names.each do |d|
|
19
|
+
ms.add_dataset(d, Daru::DataFrame.new({}, order: fields))
|
20
|
+
end
|
21
|
+
|
22
|
+
ms
|
23
|
+
end
|
24
|
+
# Generate a new dataset as a union of partial dataset
|
25
|
+
# If block given, this is applied to each dataset before union
|
26
|
+
def union(&block)
|
27
|
+
union_field={}
|
28
|
+
types={}
|
29
|
+
names={}
|
30
|
+
labels={}
|
31
|
+
each do |k,ds|
|
32
|
+
if block
|
33
|
+
ds = ds.dup
|
34
|
+
yield k,ds
|
35
|
+
end
|
36
|
+
@fields.each do |f|
|
37
|
+
union_field[f] ||= Array.new
|
38
|
+
union_field[f].concat(ds[f].to_a)
|
39
|
+
types[f] ||= ds[f].type
|
40
|
+
names[f] ||= ds[f].name
|
41
|
+
labels[f] ||= ds[f].index.to_a
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
@fields.each do |f|
|
46
|
+
union_field[f] = Daru::Vector.new(union_field[f], name: names[f])
|
47
|
+
end
|
48
|
+
|
49
|
+
ds_union = Daru::DataFrame.new(union_field, order: @fields)
|
50
|
+
ds_union
|
51
|
+
end
|
52
|
+
|
53
|
+
def datasets_names
|
54
|
+
@datasets.keys.sort
|
55
|
+
end
|
56
|
+
|
57
|
+
def n_datasets
|
58
|
+
@datasets.size
|
59
|
+
end
|
60
|
+
|
61
|
+
def add_dataset(key,ds)
|
62
|
+
if ds.vectors.to_a != @fields
|
63
|
+
raise ArgumentError, "Dataset(#{ds.vectors.to_a.to_s})must have the same fields of the Multiset(#{@fields})"
|
64
|
+
else
|
65
|
+
@datasets[key] = ds
|
66
|
+
end
|
67
|
+
end
|
68
|
+
def sum_field(field)
|
69
|
+
@datasets.inject(0) {|a,da|
|
70
|
+
stratum_name = da[0]
|
71
|
+
vector = da[1][field]
|
72
|
+
val = yield stratum_name,vector
|
73
|
+
a + val
|
74
|
+
}
|
75
|
+
end
|
76
|
+
def collect_vector(field)
|
77
|
+
@datasets.collect { |k,v| yield k, v[field] }
|
78
|
+
end
|
79
|
+
|
80
|
+
def each_vector(field)
|
81
|
+
@datasets.each { |k,v| yield k, v[field] }
|
82
|
+
end
|
83
|
+
|
84
|
+
def [](i)
|
85
|
+
@datasets[i]
|
86
|
+
end
|
87
|
+
|
88
|
+
def each(&block)
|
89
|
+
@datasets.each {|k,ds|
|
90
|
+
next if ds.nrows == 0
|
91
|
+
block.call(k,ds)
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
class StratifiedSample
|
96
|
+
class << self
|
97
|
+
# mean for an array of vectors
|
98
|
+
def mean(*vectors)
|
99
|
+
n_total=0
|
100
|
+
means=vectors.inject(0){|a,v|
|
101
|
+
n_total+=v.size
|
102
|
+
a+v.sum
|
103
|
+
}
|
104
|
+
means.to_f/n_total
|
105
|
+
end
|
106
|
+
|
107
|
+
def standard_error_ksd_wr(es)
|
108
|
+
n_total=0
|
109
|
+
sum=es.inject(0){|a,h|
|
110
|
+
n_total+=h['N']
|
111
|
+
a+((h['N']**2 * h['s']**2) / h['n'].to_f)
|
112
|
+
}
|
113
|
+
(1.to_f / n_total)*Math::sqrt(sum)
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def variance_ksd_wr(es)
|
118
|
+
standard_error_ksd_wr(es)**2
|
119
|
+
end
|
120
|
+
def calculate_n_total(es)
|
121
|
+
es.inject(0) {|a,h| a+h['N'] }
|
122
|
+
end
|
123
|
+
# Source : Cochran (1972)
|
124
|
+
|
125
|
+
def variance_ksd_wor(es)
|
126
|
+
n_total=calculate_n_total(es)
|
127
|
+
es.inject(0){|a,h|
|
128
|
+
val=((h['N'].to_f / n_total)**2) * (h['s']**2 / h['n'].to_f) * (1 - (h['n'].to_f / h['N']))
|
129
|
+
a+val
|
130
|
+
}
|
131
|
+
end
|
132
|
+
def standard_error_ksd_wor(es)
|
133
|
+
Math::sqrt(variance_ksd_wor(es))
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
def variance_esd_wor(es)
|
139
|
+
n_total=calculate_n_total(es)
|
140
|
+
sum=es.inject(0){|a,h|
|
141
|
+
val=h['N']*(h['N']-h['n'])*(h['s']**2 / h['n'].to_f)
|
142
|
+
a+val
|
143
|
+
}
|
144
|
+
(1.0/(n_total**2))*sum
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def standard_error_esd_wor(es)
|
149
|
+
Math::sqrt(variance_ksd_wor(es))
|
150
|
+
end
|
151
|
+
# Based on http://stattrek.com/Lesson6/STRAnalysis.aspx
|
152
|
+
def variance_esd_wr(es)
|
153
|
+
n_total=calculate_n_total(es)
|
154
|
+
sum=es.inject(0){|a,h|
|
155
|
+
val= ((h['s']**2 * h['N']**2) / h['n'].to_f)
|
156
|
+
a+val
|
157
|
+
}
|
158
|
+
(1.0/(n_total**2))*sum
|
159
|
+
end
|
160
|
+
def standard_error_esd_wr(es)
|
161
|
+
Math::sqrt(variance_esd_wr(es))
|
162
|
+
end
|
163
|
+
|
164
|
+
def proportion_variance_ksd_wor(es)
|
165
|
+
n_total=calculate_n_total(es)
|
166
|
+
es.inject(0){|a,h|
|
167
|
+
val= (((h['N'].to_f / n_total)**2 * h['p']*(1-h['p'])) / (h['n'])) * (1- (h['n'].to_f / h['N']))
|
168
|
+
a+val
|
169
|
+
}
|
170
|
+
end
|
171
|
+
def proportion_sd_ksd_wor(es)
|
172
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
def proportion_sd_ksd_wr(es)
|
177
|
+
n_total=calculate_n_total(es)
|
178
|
+
sum=es.inject(0){|a,h|
|
179
|
+
val= (h['N']**2 * h['p']*(1-h['p'])) / h['n'].to_f
|
180
|
+
a+val
|
181
|
+
}
|
182
|
+
Math::sqrt(sum) * (1.0/n_total)
|
183
|
+
end
|
184
|
+
def proportion_variance_ksd_wr(es)
|
185
|
+
proportion_variance_ksd_wor(es)**2
|
186
|
+
end
|
187
|
+
|
188
|
+
def proportion_variance_esd_wor(es)
|
189
|
+
n_total=n_total=calculate_n_total(es)
|
190
|
+
|
191
|
+
sum=es.inject(0){|a,h|
|
192
|
+
a=(h['N']**2 * (h['N']-h['n']) * h['p']*(1.0-h['p'])) / ((h['n']-1)*(h['N']-1))
|
193
|
+
a+val
|
194
|
+
}
|
195
|
+
Math::sqrt(sum) * (1.0/n_total**2)
|
196
|
+
end
|
197
|
+
def proportion_sd_esd_wor(es)
|
198
|
+
Math::sqrt(proportion_variance_ksd_wor(es))
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def initialize(ms,strata_sizes)
|
203
|
+
raise TypeError,"ms should be a Multiset" unless ms.is_a? Statsample::Multiset
|
204
|
+
@ms=ms
|
205
|
+
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
206
|
+
@strata_sizes=strata_sizes
|
207
|
+
@population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
|
208
|
+
@strata_number=@ms.n_datasets
|
209
|
+
@sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
|
210
|
+
end
|
211
|
+
# Number of strata
|
212
|
+
def strata_number
|
213
|
+
@strata_number
|
214
|
+
end
|
215
|
+
# Population size. Equal to sum of strata sizes
|
216
|
+
# Symbol: N<sub>h</sub>
|
217
|
+
def population_size
|
218
|
+
@population_size
|
219
|
+
end
|
220
|
+
# Sample size. Equal to sum of sample of each stratum
|
221
|
+
def sample_size
|
222
|
+
@sample_size
|
223
|
+
end
|
224
|
+
# Size of stratum x
|
225
|
+
def stratum_size(h)
|
226
|
+
@strata_sizes[h]
|
227
|
+
end
|
228
|
+
def vectors_by_field(field)
|
229
|
+
@ms.datasets.collect{|k,ds|
|
230
|
+
ds[field]
|
231
|
+
}
|
232
|
+
end
|
233
|
+
# Population proportion based on strata
|
234
|
+
def proportion(field, v=1)
|
235
|
+
@ms.sum_field(field) {|s_name,vector|
|
236
|
+
stratum_ponderation(s_name)*vector.proportion(v)
|
237
|
+
}
|
238
|
+
end
|
239
|
+
# Stratum ponderation.
|
240
|
+
# Symbol: W\<sub>h\</sub>
|
241
|
+
def stratum_ponderation(h)
|
242
|
+
@strata_sizes[h].to_f / @population_size
|
243
|
+
end
|
244
|
+
alias_method :wh, :stratum_ponderation
|
245
|
+
|
246
|
+
# Population mean based on strata
|
247
|
+
def mean(field)
|
248
|
+
@ms.sum_field(field) {|s_name,vector|
|
249
|
+
stratum_ponderation(s_name)*vector.mean
|
250
|
+
}
|
251
|
+
end
|
252
|
+
# Standard error with estimated population variance and without replacement.
|
253
|
+
# Source: Cochran (1972)
|
254
|
+
def standard_error_wor(field)
|
255
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
256
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
257
|
+
}
|
258
|
+
|
259
|
+
StratifiedSample.standard_error_esd_wor(es)
|
260
|
+
end
|
261
|
+
|
262
|
+
# Standard error with estimated population variance and without replacement.
|
263
|
+
# Source: http://stattrek.com/Lesson6/STRAnalysis.aspx
|
264
|
+
|
265
|
+
def standard_error_wor_2(field)
|
266
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
267
|
+
s_size=@strata_sizes[s_name]
|
268
|
+
(s_size**2 * (1-(vector.size.to_f / s_size)) * vector.variance_sample / vector.size.to_f)
|
269
|
+
}
|
270
|
+
(1/@population_size.to_f)*Math::sqrt(sum)
|
271
|
+
end
|
272
|
+
|
273
|
+
def standard_error_wr(field)
|
274
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
275
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 's'=>vector.sds}
|
276
|
+
}
|
277
|
+
|
278
|
+
StratifiedSample.standard_error_esd_wr(es)
|
279
|
+
end
|
280
|
+
def proportion_sd_esd_wor(field,v=1)
|
281
|
+
es=@ms.collect_vector(field) {|s_n, vector|
|
282
|
+
{'N'=>@strata_sizes[s_n],'n'=>vector.size, 'p'=>vector.proportion(v)}
|
283
|
+
}
|
284
|
+
|
285
|
+
StratifiedSample.proportion_sd_esd_wor(es)
|
286
|
+
end
|
287
|
+
|
288
|
+
def proportion_standard_error(field,v=1)
|
289
|
+
prop=proportion(field,v)
|
290
|
+
sum=@ms.sum_field(field) {|s_name,vector|
|
291
|
+
nh=vector.size
|
292
|
+
s_size=@strata_sizes[s_name]
|
293
|
+
(s_size**2 * (1-(nh / s_size)) * prop * (1-prop) / (nh - 1 ))
|
294
|
+
}
|
295
|
+
(1.quo(@population_size)) * Math::sqrt(sum)
|
296
|
+
end
|
297
|
+
# Cochran(1971), p. 150
|
298
|
+
def variance_pst(field,v=1)
|
299
|
+
sum=@ms.datasets.inject(0) {|a,da|
|
300
|
+
stratum_name=da[0]
|
301
|
+
ds=da[1]
|
302
|
+
nh=ds.cases.to_f
|
303
|
+
s_size=@strata_sizes[stratum_name]
|
304
|
+
prop=ds[field].proportion(v)
|
305
|
+
a + (((s_size**2 * (s_size-nh)) / (s_size-1))*(prop*(1-prop) / (nh-1)))
|
306
|
+
}
|
307
|
+
(1/@population_size.to_f ** 2)*sum
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'statsample/regression/simple'
|
2
|
+
require 'statsample/regression/multiple'
|
3
|
+
|
4
|
+
require 'statsample/regression/multiple/matrixengine'
|
5
|
+
require 'statsample/regression/multiple/rubyengine'
|
6
|
+
require 'statsample/regression/multiple/gslengine'
|
7
|
+
|
8
|
+
module Statsample
|
9
|
+
# = Module for regression procedures.
|
10
|
+
# Use the method on this class to generate
|
11
|
+
# analysis.
|
12
|
+
# If you need more control, you can
|
13
|
+
# create and control directly the objects who computes
|
14
|
+
# the regressions.
|
15
|
+
#
|
16
|
+
# * Simple Regression : Statsample::Regression::Simple
|
17
|
+
# * Multiple Regression: Statsample::Regression::Multiple
|
18
|
+
module Regression
|
19
|
+
|
20
|
+
LinearDependency=Class.new(Exception)
|
21
|
+
|
22
|
+
# Create a Statsample::Regression::Simple object, for simple regression
|
23
|
+
# * x: independent Vector
|
24
|
+
# * y: dependent Vector
|
25
|
+
# <b>Usage:</b>
|
26
|
+
# x = Daru::Vector.new(100.times.collect {|i| rand(100)})
|
27
|
+
# y = Daru::Vector.new(100.times.collect {|i| 2+x[i]*2+rand()})
|
28
|
+
# sr=Statsample::Regression.simple(x,y)
|
29
|
+
# sr.a
|
30
|
+
# => 2.51763295177808
|
31
|
+
# sr.b
|
32
|
+
# => 1.99973746599856
|
33
|
+
# sr.r
|
34
|
+
# => 0.999987881153254
|
35
|
+
def self.simple(x,y)
|
36
|
+
Statsample::Regression::Simple.new_from_vectors(x,y)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates one of the Statsample::Regression::Multiple object,
|
40
|
+
# for OLS multiple regression.
|
41
|
+
# Parameters:
|
42
|
+
# * <tt>ds</tt>: Dataset.
|
43
|
+
# * y: Name of dependent variable.
|
44
|
+
# * opts: A hash with options
|
45
|
+
# * missing_data: Could be
|
46
|
+
# * :listwise: delete cases with one or more empty data (default).
|
47
|
+
# * :pairwise: uses correlation matrix. Use with caution.
|
48
|
+
#
|
49
|
+
# <b>Usage:</b>
|
50
|
+
# lr=Statsample::Regression::multiple(ds,:y)
|
51
|
+
def self.multiple(ds,y_var, opts=Hash.new)
|
52
|
+
missing_data= (opts[:missing_data].nil? ) ? :listwise : opts.delete(:missing_data)
|
53
|
+
if missing_data==:pairwise
|
54
|
+
Statsample::Regression::Multiple::RubyEngine.new(ds,y_var, opts)
|
55
|
+
else
|
56
|
+
if Statsample.has_gsl? and false
|
57
|
+
Statsample::Regression::Multiple::GslEngine.new(ds, y_var, opts)
|
58
|
+
else
|
59
|
+
ds2=ds.reject_values(*Daru::MISSING_VALUES)
|
60
|
+
Statsample::Regression::Multiple::RubyEngine.new(ds2,y_var, opts)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'statsample/regression/multiple/baseengine'
|
2
|
+
module Statsample
|
3
|
+
module Regression
|
4
|
+
# Module for OLS Multiple Regression Analysis.
|
5
|
+
#
|
6
|
+
# Use:.
|
7
|
+
#
|
8
|
+
# require 'statsample'
|
9
|
+
# a = Daru::Vector.new(1000.times.collect {rand})
|
10
|
+
# b = Daru::Vector.new(1000.times.collect {rand})
|
11
|
+
# c = Daru::Vector.new(1000.times.collect {rand})
|
12
|
+
# ds= Daru::DataFrame.new({:a => a,:b => b,:c => c})
|
13
|
+
# ds[:y]=ds.collect{|row| row[:a]*5 + row[:b]*3 + row[:c]*2 + rand()}
|
14
|
+
# lr=Statsample::Regression.multiple(ds, :y)
|
15
|
+
# puts lr.summary
|
16
|
+
# Summary for regression of a,b,c over y
|
17
|
+
# *************************************************************
|
18
|
+
# Engine: Statsample::Regression::Multiple::AlglibEngine
|
19
|
+
# Cases(listwise)=1000(1000)
|
20
|
+
# r=0.986
|
21
|
+
# r2=0.973
|
22
|
+
# Equation=0.504+5.011a + 2.995b + 1.988c
|
23
|
+
# ----------------------------
|
24
|
+
# ANOVA TABLE
|
25
|
+
# --------------------------------------------------------------
|
26
|
+
# | source | ss | df | ms | f | s |
|
27
|
+
# --------------------------------------------------------------
|
28
|
+
# | Regression | 2979.321 | 3 | 993.107 | 12040.067 | 0.000 |
|
29
|
+
# | Error | 82.154 | 996 | 0.082 | | |
|
30
|
+
# | Total | 3061.475 | 999 | | | |
|
31
|
+
# --------------------------------------------------------------
|
32
|
+
# Beta coefficientes
|
33
|
+
# -----------------------------------------------
|
34
|
+
# | coeff | b | beta | se | t |
|
35
|
+
# -----------------------------------------------
|
36
|
+
# | Constant | 0.504 | - | 0.030 | 16.968 |
|
37
|
+
# | a | 5.011 | 0.832 | 0.031 | 159.486 |
|
38
|
+
# | b | 2.995 | 0.492 | 0.032 | 94.367 |
|
39
|
+
# | c | 1.988 | 0.323 | 0.032 | 62.132 |
|
40
|
+
# -----------------------------------------------
|
41
|
+
#
|
42
|
+
module Multiple
|
43
|
+
# Obtain r2 for regressors
|
44
|
+
def self.r2_from_matrices(rxx,rxy)
|
45
|
+
matrix=(rxy.transpose*rxx.inverse*rxy)
|
46
|
+
matrix[0,0]
|
47
|
+
end
|
48
|
+
|
49
|
+
class MultipleDependent
|
50
|
+
def significance
|
51
|
+
0.0
|
52
|
+
end
|
53
|
+
def initialize(matrix,y_var, opts=Hash.new)
|
54
|
+
matrix.extend Statsample::CovariateMatrix
|
55
|
+
@matrix=matrix
|
56
|
+
@fields=matrix.fields - y_var
|
57
|
+
@y_var = y_var
|
58
|
+
@q=@y_var.size
|
59
|
+
@matrix_cor=matrix.correlation
|
60
|
+
@matrix_cor_xx = @matrix_cor.submatrix(@fields)
|
61
|
+
@matrix_cor_yy = @matrix_cor.submatrix(y_var, y_var)
|
62
|
+
|
63
|
+
@sxx = @matrix.submatrix(@fields)
|
64
|
+
@syy = @matrix.submatrix(y_var, y_var)
|
65
|
+
@sxy = @matrix.submatrix(@fields, y_var)
|
66
|
+
@syx = @sxy.t
|
67
|
+
end
|
68
|
+
|
69
|
+
def r2yx
|
70
|
+
1- (@matrix_cor.determinant.quo(@matrix_cor_yy.determinant * @matrix_cor_xx.determinant))
|
71
|
+
end
|
72
|
+
# Residual covariance of Y after accountin with lineal relation with x
|
73
|
+
def syyx
|
74
|
+
@syy-@syx*@sxx.inverse*@sxy
|
75
|
+
end
|
76
|
+
def r2yx_covariance
|
77
|
+
1-(syyx.determinant.quo(@syy.determinant))
|
78
|
+
end
|
79
|
+
|
80
|
+
def vxy
|
81
|
+
@q-(@syy.inverse*syyx).trace
|
82
|
+
end
|
83
|
+
def p2yx
|
84
|
+
vxy.quo(@q)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|