statsample 0.6.1 → 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
@@ -27,7 +27,7 @@ module Statsample
|
|
27
27
|
# See http://www.john-uebersax.com/stat/tetra.htm for extensive
|
28
28
|
# documentation about tetrachoric correlation.
|
29
29
|
#
|
30
|
-
# This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
30
|
+
# This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
31
31
|
#
|
32
32
|
# == References:
|
33
33
|
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
@@ -52,8 +52,10 @@ module Statsample
|
|
52
52
|
|
53
53
|
|
54
54
|
class Tetrachoric
|
55
|
-
|
55
|
+
include GetText
|
56
|
+
bindtextdomain("statsample")
|
56
57
|
attr_reader :r
|
58
|
+
attr_accessor :name
|
57
59
|
|
58
60
|
TWOPI=Math::PI*2
|
59
61
|
SQT2PI= 2.50662827
|
@@ -67,10 +69,14 @@ module Statsample
|
|
67
69
|
NITER = 25
|
68
70
|
X=[0,0.9972638618, 0.9856115115, 0.9647622556, 0.9349060759, 0.8963211558, 0.8493676137, 0.7944837960, 0.7321821187, 0.6630442669, 0.5877157572, 0.5068999089, 0.4213512761, 0.3318686023, 0.2392873623, 0.1444719616, 0.0483076657]
|
69
71
|
W=[0, 0.0070186100, 0.0162743947, 0.0253920653, 0.0342738629, 0.0428358980, 0.0509980593, 0.0586840935, 0.0658222228, 0.0723457941, 0.0781938958, 0.0833119242, 0.0876520930, 0.0911738787, 0.0938443991, 0.0956387201, 0.0965400885]
|
72
|
+
# Creates a Tetrachoric object based on a 2x2 Matrix.
|
73
|
+
def self.new_with_matrix(m)
|
74
|
+
Tetrachoric.new(m[0,0], m[0,1], m[1,0],m[1,1])
|
75
|
+
end
|
70
76
|
# Creates a Tetrachoric object based on two vectors.
|
71
77
|
# The vectors are dichotomized previously.
|
72
78
|
def self.new_with_vectors(v1,v2)
|
73
|
-
v1a,v2a=Statsample.only_valid(v1,v2)
|
79
|
+
v1a, v2a=Statsample.only_valid(v1,v2)
|
74
80
|
v1a=v1a.dichotomize
|
75
81
|
v2a=v2a.dichotomize
|
76
82
|
raise "v1 have only 0" if v1a.factors==[0]
|
@@ -90,17 +96,42 @@ module Statsample
|
|
90
96
|
@sdr
|
91
97
|
end
|
92
98
|
# Threshold for variable x (rows)
|
99
|
+
# Point on gauss curve under X rater select cases
|
93
100
|
def threshold_x
|
94
|
-
@
|
101
|
+
@zab
|
95
102
|
end
|
96
103
|
|
97
104
|
# Threshold for variable y (columns)
|
105
|
+
# Point on gauss curve under Y rater select cases
|
106
|
+
|
98
107
|
def threshold_y
|
99
|
-
@
|
108
|
+
@zac
|
109
|
+
end
|
110
|
+
def summary
|
111
|
+
rp=ReportBuilder.new()
|
112
|
+
rp.add(self)
|
113
|
+
rp.to_text
|
114
|
+
end
|
115
|
+
|
116
|
+
def to_reportbuilder(generator)
|
117
|
+
section=ReportBuilder::Section.new(:name=>@name)
|
118
|
+
t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
|
119
|
+
t.add_row(["X=0", @a,@b,@a+@b])
|
120
|
+
t.add_row(["X=1", @c,@d,@c+@d])
|
121
|
+
t.add_hr
|
122
|
+
t.add_row(["T", @a+@c,@b+@d,@a+@b+@c+@d])
|
123
|
+
section.add(t)
|
124
|
+
#generator.parse_element(t)
|
125
|
+
section.add(sprintf("r: %0.3f",r))
|
126
|
+
section.add(_("SE: %0.3f") % se)
|
127
|
+
section.add(_("Threshold X: %0.3f ") % [threshold_x] )
|
128
|
+
section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
|
129
|
+
generator.parse_element(section)
|
100
130
|
end
|
101
131
|
|
102
132
|
def initialize(a,b,c,d)
|
103
133
|
@a,@b,@c,@d=a,b,c,d
|
134
|
+
@name=_("Tetrachoric correlation")
|
104
135
|
#
|
105
136
|
# CHECK IF ANY CELL FREQUENCY IS NEGATIVE
|
106
137
|
#
|
@@ -396,6 +427,7 @@ module Statsample
|
|
396
427
|
pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
|
397
428
|
@pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
|
398
429
|
@pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
|
430
|
+
|
399
431
|
@sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
|
400
432
|
@sdr=0 if (@sdr<0)
|
401
433
|
@sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
|
@@ -76,6 +76,17 @@ module Statsample
|
|
76
76
|
class SpreadsheetBase
|
77
77
|
class << self
|
78
78
|
def extract_fields(row)
|
79
|
+
=begin
|
80
|
+
fields=[]
|
81
|
+
row.to_a.collect {|c|
|
82
|
+
if c.nil?
|
83
|
+
break
|
84
|
+
else
|
85
|
+
fields.push(c)
|
86
|
+
end
|
87
|
+
}
|
88
|
+
=end
|
89
|
+
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
|
79
90
|
fields=row.to_a.collect{|c| c.downcase}
|
80
91
|
fields.recode_repeated
|
81
92
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
|
-
# Dominance Analysis is a procedure based on an examination of the
|
3
|
+
# Dominance Analysis is a procedure based on an examination of the R^2 values
|
4
4
|
# for all possible subset models, to identify the relevance of one or more
|
5
5
|
# predictors in the prediction of criterium.
|
6
6
|
#
|
7
7
|
# See Budescu(1993) and Azen & Budescu (2003) for more information.
|
8
|
-
#
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
9
11
|
# a=1000.times.collect {rand}.to_scale
|
10
12
|
# b=1000.times.collect {rand}.to_scale
|
11
13
|
# c=1000.times.collect {rand}.to_scale
|
@@ -13,40 +15,42 @@ module Statsample
|
|
13
15
|
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
14
16
|
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
15
17
|
# puts da.summary
|
16
|
-
# ==>
|
17
|
-
# Resultado del Analisis de Dominancia de a, b, c en y
|
18
18
|
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
#
|
28
|
-
# |
|
29
|
-
#
|
30
|
-
# |
|
31
|
-
#
|
32
|
-
# |
|
33
|
-
#
|
34
|
-
# |
|
35
|
-
#
|
36
|
-
# |
|
37
|
-
#
|
38
|
-
# |
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
19
|
+
# Output:
|
20
|
+
#
|
21
|
+
# Report: Report 2010-02-08 19:10:11 -0300
|
22
|
+
# Table: Dominance Analysis result
|
23
|
+
# ------------------------------------------------------------
|
24
|
+
# | | r2 | sign | a | b | c |
|
25
|
+
# ------------------------------------------------------------
|
26
|
+
# | Model 0 | | | 0.648 | 0.265 | 0.109 |
|
27
|
+
# ------------------------------------------------------------
|
28
|
+
# | a | 0.648 | 0.000 | -- | 0.229 | 0.104 |
|
29
|
+
# | b | 0.265 | 0.000 | 0.612 | -- | 0.104 |
|
30
|
+
# | c | 0.109 | 0.000 | 0.643 | 0.260 | -- |
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# | k=1 Average | | | 0.627 | 0.244 | 0.104 |
|
33
|
+
# ------------------------------------------------------------
|
34
|
+
# | a*b | 0.877 | 0.000 | -- | -- | 0.099 |
|
35
|
+
# | a*c | 0.752 | 0.000 | -- | 0.224 | -- |
|
36
|
+
# | b*c | 0.369 | 0.000 | 0.607 | -- | -- |
|
37
|
+
# ------------------------------------------------------------
|
38
|
+
# | k=2 Average | | | 0.607 | 0.224 | 0.099 |
|
39
|
+
# ------------------------------------------------------------
|
40
|
+
# | a*b*c | 0.976 | 0.000 | -- | -- | -- |
|
41
|
+
# ------------------------------------------------------------
|
42
|
+
# | Overall averages | | | 0.628 | 0.245 | 0.104 |
|
43
|
+
# ------------------------------------------------------------
|
42
44
|
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# |
|
48
|
-
# |
|
49
|
-
#
|
45
|
+
# Table: Pairwise dominance
|
46
|
+
# -----------------------------------------
|
47
|
+
# | Pairs | Total | Conditional | General |
|
48
|
+
# -----------------------------------------
|
49
|
+
# | a - b | 1.0 | 1.0 | 1.0 |
|
50
|
+
# | a - c | 1.0 | 1.0 | 1.0 |
|
51
|
+
# | b - c | 1.0 | 1.0 | 1.0 |
|
52
|
+
# -----------------------------------------
|
53
|
+
|
50
54
|
#
|
51
55
|
# == References:
|
52
56
|
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
|
@@ -54,22 +58,30 @@ module Statsample
|
|
54
58
|
class DominanceAnalysis
|
55
59
|
include GetText
|
56
60
|
bindtextdomain("statsample")
|
61
|
+
# Class to generate the regressions. Default to Statsample::Regression::Multiple::RubyEngine
|
62
|
+
attr_accessor :regression_class
|
63
|
+
# Name of analysis
|
64
|
+
attr_accessor :name
|
65
|
+
|
57
66
|
# Creates a new DominanceAnalysis object
|
58
67
|
# Params:
|
59
68
|
# * ds: A Dataset object
|
60
69
|
# * y_var: Name of dependent variable
|
61
|
-
# *
|
62
|
-
# Statsample::Regression::Multiple::BaseEngine
|
70
|
+
# * opts: Any other attribute of the class
|
63
71
|
#
|
64
|
-
def initialize(ds,y_var,
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
def initialize(ds,y_var, opts=Hash.new)
|
73
|
+
@y_var=y_var
|
74
|
+
@dy=ds[@y_var]
|
75
|
+
@ds=ds
|
76
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
77
|
+
@fields=@ds_indep.fields
|
78
|
+
@regression_class=Statsample::Regression::Multiple::RubyEngine
|
79
|
+
@name=_("Dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
80
|
+
opts.each{|k,v|
|
81
|
+
self.send("#{k}=",v) if self.respond_to? k
|
82
|
+
}
|
83
|
+
create_models
|
84
|
+
fill_models
|
73
85
|
end
|
74
86
|
def fill_models
|
75
87
|
@models.each do |m|
|
@@ -215,63 +227,65 @@ module Statsample
|
|
215
227
|
convert=data.collect {|i1| @fields[i1] }
|
216
228
|
@models.push(convert)
|
217
229
|
ds_prev=@ds.dup(convert+[@y_var])
|
218
|
-
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @
|
230
|
+
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @regression_class)
|
219
231
|
@models_data[convert.sort]=modeldata
|
220
232
|
end
|
221
233
|
end
|
222
234
|
end
|
223
|
-
def summary
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
235
|
+
def summary
|
236
|
+
rp=ReportBuilder.new()
|
237
|
+
rp.add(self)
|
238
|
+
rp.to_text
|
239
|
+
end
|
240
|
+
def to_reportbuilder(generator)
|
241
|
+
anchor=generator.add_toc_entry(_("DA: ")+@name)
|
242
|
+
generator.add_html "<div class='dominance-analysis'>#{@name}<a name='#{anchor}'></a>"
|
243
|
+
t=ReportBuilder::Table.new(:name=>_("Dominance Analysis result"))
|
244
|
+
t.header=["","r2",_("sign")]+@fields
|
229
245
|
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
230
246
|
sprintf("%0.3f", md([f]).r2)
|
231
247
|
}
|
232
248
|
t.add_row(row)
|
233
249
|
t.add_horizontal_line
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
250
|
+
for i in 1..@fields.size
|
251
|
+
mk=md_k(i)
|
252
|
+
mk.each{|m|
|
253
|
+
t.add_row(m.add_table_row)
|
254
|
+
}
|
255
|
+
# Report averages
|
256
|
+
a=average_k(i)
|
257
|
+
if !a.nil?
|
258
|
+
t.add_horizontal_line
|
259
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
260
|
+
sprintf("%0.3f",a[f])
|
261
|
+
}
|
262
|
+
t.add_row(row)
|
263
|
+
t.add_horizontal_line
|
264
|
+
|
265
|
+
end
|
266
|
+
|
249
267
|
end
|
250
268
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
sprintf("%0.3f",g[f])
|
258
|
-
}
|
259
|
-
t.add_row(row)
|
260
|
-
out.parse_table(t)
|
261
|
-
|
262
|
-
out.nl
|
263
|
-
out << _("Pairwise")+"\n"
|
264
|
-
td=total_dominance
|
265
|
-
cd=conditional_dominance
|
266
|
-
gd=general_dominance
|
267
|
-
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
268
|
-
pairs.each{|p|
|
269
|
-
name=p.join(" - ")
|
270
|
-
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
269
|
+
g=general_averages
|
270
|
+
t.add_horizontal_line
|
271
|
+
|
272
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
273
|
+
sprintf("%0.3f",g[f])
|
274
|
+
}
|
271
275
|
t.add_row(row)
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
generator.parse_element(t)
|
277
|
+
|
278
|
+
td=total_dominance
|
279
|
+
cd=conditional_dominance
|
280
|
+
gd=general_dominance
|
281
|
+
t=ReportBuilder::Table.new(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")])
|
282
|
+
pairs.each{|p|
|
283
|
+
name=p.join(" - ")
|
284
|
+
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
285
|
+
t.add_row(row)
|
286
|
+
}
|
287
|
+
generator.parse_element(t)
|
288
|
+
generator.add_html("</div>")
|
275
289
|
end
|
276
290
|
class ModelData
|
277
291
|
attr_reader :contributions
|
@@ -1,137 +1,166 @@
|
|
1
1
|
module Statsample
|
2
|
-
class DominanceAnalysis
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
class DominanceAnalysis
|
3
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
4
|
+
# References:
|
5
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
6
6
|
class Bootstrap
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
7
|
+
include GetText
|
8
|
+
include Writable
|
9
|
+
bindtextdomain("statsample")
|
10
|
+
# Total Dominance results
|
11
|
+
attr_reader :samples_td
|
12
|
+
# Conditional Dominance results
|
13
|
+
attr_reader :samples_cd
|
14
|
+
# General Dominance results
|
15
|
+
attr_reader :samples_gd
|
16
|
+
# General average results
|
17
|
+
attr_reader :samples_ga
|
18
|
+
# Name of fields
|
19
|
+
attr_reader :fields
|
20
|
+
# Regression class used for analysis
|
21
|
+
attr_accessor :regression_class
|
22
|
+
# Dataset
|
23
|
+
attr_accessor :ds
|
24
|
+
# Name of analysis
|
25
|
+
attr_accessor :name
|
26
|
+
# Alpha level of confidence
|
27
|
+
attr_accessor :alpha
|
28
|
+
# Create a new Dominance Analysis Bootstrap Object
|
29
|
+
#
|
30
|
+
# * ds: A Dataset object
|
31
|
+
# * y_var: Name of dependent variable
|
32
|
+
# * opts: Any other attribute of the class
|
33
|
+
def initialize(ds,y_var, opts=Hash.new)
|
34
|
+
@ds=ds
|
35
|
+
@y_var=y_var
|
36
|
+
@n=ds.cases
|
37
|
+
@fields=ds.fields-[y_var]
|
38
|
+
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
39
|
+
@n_samples=0
|
40
|
+
@alpha=0.95
|
41
|
+
@regression_class=Regression::Multiple::RubyEngine
|
42
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
43
|
+
opts.each{|k,v|
|
44
|
+
self.send("#{k}=",v) if self.respond_to? k
|
45
|
+
}
|
46
|
+
create_samples_pairs
|
47
|
+
end
|
48
|
+
# lr_class deprecated
|
49
|
+
alias_method :lr_class, :regression_class
|
50
|
+
def da
|
51
|
+
if @da.nil?
|
52
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
53
|
+
end
|
54
|
+
@da
|
55
|
+
end
|
56
|
+
# Creates n re-samples from original dataset and store result of
|
57
|
+
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
58
|
+
#
|
59
|
+
# * number_samples: Number of new samples to add
|
60
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
61
|
+
# * report: if true, echo number of current resample and total
|
62
|
+
def bootstrap(number_samples,n=nil,report=false)
|
63
|
+
number_samples.times{ |t|
|
64
|
+
@n_samples+=1
|
65
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
|
66
|
+
ds_boot=@ds.bootstrap(n)
|
67
|
+
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
68
|
+
da_1.total_dominance.each{|k,v|
|
69
|
+
@samples_td[k].push(v)
|
70
|
+
}
|
71
|
+
da_1.conditional_dominance.each{|k,v|
|
72
|
+
@samples_cd[k].push(v)
|
73
|
+
}
|
74
|
+
da_1.general_dominance.each{|k,v|
|
75
|
+
@samples_gd[k].push(v)
|
76
|
+
}
|
77
|
+
da_1.general_averages.each{|k,v|
|
78
|
+
@samples_ga[k].push(v)
|
79
|
+
}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
def create_samples_pairs
|
83
|
+
@samples_td={}
|
84
|
+
@samples_cd={}
|
85
|
+
@samples_gd={}
|
86
|
+
@pairs=[]
|
87
|
+
c=Statsample::Combination.new(2,@fields.size)
|
88
|
+
c.each do |data|
|
89
|
+
convert=data.collect {|i| @fields[i] }
|
90
|
+
@pairs.push(convert)
|
91
|
+
[@samples_td, @samples_cd, @samples_gd].each{|s|
|
92
|
+
s[convert]=[]
|
54
93
|
}
|
55
94
|
end
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
[0, 0.5, 1].each{|n|
|
126
|
-
freqs[n]=0 if freqs[n].nil?
|
127
|
-
}
|
128
|
-
name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
|
129
|
-
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
130
|
-
end
|
131
|
-
def f(v,n=3)
|
132
|
-
prec="%0.#{n}f"
|
133
|
-
sprintf(prec,v)
|
134
|
-
end
|
95
|
+
end
|
96
|
+
def summary
|
97
|
+
rp=ReportBuilder.new()
|
98
|
+
rp.add(self)
|
99
|
+
rp.to_text
|
100
|
+
end
|
101
|
+
def t
|
102
|
+
Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
|
103
|
+
end
|
104
|
+
def to_reportbuilder(generator)
|
105
|
+
raise "You should bootstrap first" if @n_samples==0
|
106
|
+
anchor=generator.add_toc_entry(_("DAB: ")+@name)
|
107
|
+
generator.add_html "<div class='dominance-analysis-bootstrap'>#{@name}<a name='#{anchor}'></a>"
|
108
|
+
|
109
|
+
generator.add_text _("Sample size: %d\n") % @n_samples
|
110
|
+
generator.add_text "t: #{t}\n"
|
111
|
+
generator.add_text _("Linear Regression Engine: %s") % @regression_class.name
|
112
|
+
|
113
|
+
table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
|
114
|
+
table.add_row([_("Complete dominance")])
|
115
|
+
table.add_horizontal_line
|
116
|
+
@pairs.each{|pair|
|
117
|
+
std=@samples_td[pair].to_vector(:scale)
|
118
|
+
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
119
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
120
|
+
}
|
121
|
+
table.add_horizontal_line
|
122
|
+
table.add_row([_("Conditional dominance")])
|
123
|
+
table.add_horizontal_line
|
124
|
+
@pairs.each{|pair|
|
125
|
+
std=@samples_cd[pair].to_vector(:scale)
|
126
|
+
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
127
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
128
|
+
|
129
|
+
}
|
130
|
+
table.add_horizontal_line
|
131
|
+
table.add_row([_("General Dominance")])
|
132
|
+
table.add_horizontal_line
|
133
|
+
@pairs.each{|pair|
|
134
|
+
std=@samples_gd[pair].to_vector(:scale)
|
135
|
+
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
136
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
137
|
+
}
|
138
|
+
generator.parse_element(table)
|
139
|
+
|
140
|
+
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
141
|
+
|
142
|
+
@fields.each{|f|
|
143
|
+
v=@samples_ga[f].to_vector(:scale)
|
144
|
+
row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
145
|
+
table.add_row(row)
|
146
|
+
|
147
|
+
}
|
148
|
+
|
149
|
+
generator.parse_element(table)
|
150
|
+
generator.add_html("</div>")
|
151
|
+
end
|
152
|
+
def summary_pairs(pair,std,ttd)
|
153
|
+
freqs=std.proportions
|
154
|
+
[0, 0.5, 1].each{|n|
|
155
|
+
freqs[n]=0 if freqs[n].nil?
|
156
|
+
}
|
157
|
+
name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
|
158
|
+
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
159
|
+
end
|
160
|
+
def f(v,n=3)
|
161
|
+
prec="%0.#{n}f"
|
162
|
+
sprintf(prec,v)
|
163
|
+
end
|
135
164
|
end
|
136
|
-
end
|
165
|
+
end
|
137
166
|
end
|