statsample 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
@@ -27,7 +27,7 @@ module Statsample
|
|
27
27
|
# See http://www.john-uebersax.com/stat/tetra.htm for extensive
|
28
28
|
# documentation about tetrachoric correlation.
|
29
29
|
#
|
30
|
-
# This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
30
|
+
# This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
|
31
31
|
#
|
32
32
|
# == References:
|
33
33
|
# * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
|
@@ -52,8 +52,10 @@ module Statsample
|
|
52
52
|
|
53
53
|
|
54
54
|
class Tetrachoric
|
55
|
-
|
55
|
+
include GetText
|
56
|
+
bindtextdomain("statsample")
|
56
57
|
attr_reader :r
|
58
|
+
attr_accessor :name
|
57
59
|
|
58
60
|
TWOPI=Math::PI*2
|
59
61
|
SQT2PI= 2.50662827
|
@@ -67,10 +69,14 @@ module Statsample
|
|
67
69
|
NITER = 25
|
68
70
|
X=[0,0.9972638618, 0.9856115115, 0.9647622556, 0.9349060759, 0.8963211558, 0.8493676137, 0.7944837960, 0.7321821187, 0.6630442669, 0.5877157572, 0.5068999089, 0.4213512761, 0.3318686023, 0.2392873623, 0.1444719616, 0.0483076657]
|
69
71
|
W=[0, 0.0070186100, 0.0162743947, 0.0253920653, 0.0342738629, 0.0428358980, 0.0509980593, 0.0586840935, 0.0658222228, 0.0723457941, 0.0781938958, 0.0833119242, 0.0876520930, 0.0911738787, 0.0938443991, 0.0956387201, 0.0965400885]
|
72
|
+
# Creates a Tetrachoric object based on a 2x2 Matrix.
|
73
|
+
def self.new_with_matrix(m)
|
74
|
+
Tetrachoric.new(m[0,0], m[0,1], m[1,0],m[1,1])
|
75
|
+
end
|
70
76
|
# Creates a Tetrachoric object based on two vectors.
|
71
77
|
# The vectors are dichotomized previously.
|
72
78
|
def self.new_with_vectors(v1,v2)
|
73
|
-
v1a,v2a=Statsample.only_valid(v1,v2)
|
79
|
+
v1a, v2a=Statsample.only_valid(v1,v2)
|
74
80
|
v1a=v1a.dichotomize
|
75
81
|
v2a=v2a.dichotomize
|
76
82
|
raise "v1 have only 0" if v1a.factors==[0]
|
@@ -90,17 +96,42 @@ module Statsample
|
|
90
96
|
@sdr
|
91
97
|
end
|
92
98
|
# Threshold for variable x (rows)
|
99
|
+
# Point on gauss curve under X rater select cases
|
93
100
|
def threshold_x
|
94
|
-
@
|
101
|
+
@zab
|
95
102
|
end
|
96
103
|
|
97
104
|
# Threshold for variable y (columns)
|
105
|
+
# Point on gauss curve under Y rater select cases
|
106
|
+
|
98
107
|
def threshold_y
|
99
|
-
@
|
108
|
+
@zac
|
109
|
+
end
|
110
|
+
def summary
|
111
|
+
rp=ReportBuilder.new()
|
112
|
+
rp.add(self)
|
113
|
+
rp.to_text
|
114
|
+
end
|
115
|
+
|
116
|
+
def to_reportbuilder(generator)
|
117
|
+
section=ReportBuilder::Section.new(:name=>@name)
|
118
|
+
t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
|
119
|
+
t.add_row(["X=0", @a,@b,@a+@b])
|
120
|
+
t.add_row(["X=1", @c,@d,@c+@d])
|
121
|
+
t.add_hr
|
122
|
+
t.add_row(["T", @a+@c,@b+@d,@a+@b+@c+@d])
|
123
|
+
section.add(t)
|
124
|
+
#generator.parse_element(t)
|
125
|
+
section.add(sprintf("r: %0.3f",r))
|
126
|
+
section.add(_("SE: %0.3f") % se)
|
127
|
+
section.add(_("Threshold X: %0.3f ") % [threshold_x] )
|
128
|
+
section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
|
129
|
+
generator.parse_element(section)
|
100
130
|
end
|
101
131
|
|
102
132
|
def initialize(a,b,c,d)
|
103
133
|
@a,@b,@c,@d=a,b,c,d
|
134
|
+
@name=_("Tetrachoric correlation")
|
104
135
|
#
|
105
136
|
# CHECK IF ANY CELL FREQUENCY IS NEGATIVE
|
106
137
|
#
|
@@ -396,6 +427,7 @@ module Statsample
|
|
396
427
|
pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
|
397
428
|
@pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
|
398
429
|
@pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
|
430
|
+
|
399
431
|
@sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
|
400
432
|
@sdr=0 if (@sdr<0)
|
401
433
|
@sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
|
@@ -76,6 +76,17 @@ module Statsample
|
|
76
76
|
class SpreadsheetBase
|
77
77
|
class << self
|
78
78
|
def extract_fields(row)
|
79
|
+
=begin
|
80
|
+
fields=[]
|
81
|
+
row.to_a.collect {|c|
|
82
|
+
if c.nil?
|
83
|
+
break
|
84
|
+
else
|
85
|
+
fields.push(c)
|
86
|
+
end
|
87
|
+
}
|
88
|
+
=end
|
89
|
+
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
|
79
90
|
fields=row.to_a.collect{|c| c.downcase}
|
80
91
|
fields.recode_repeated
|
81
92
|
end
|
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'statsample/dominanceanalysis/bootstrap'
|
2
2
|
module Statsample
|
3
|
-
# Dominance Analysis is a procedure based on an examination of the
|
3
|
+
# Dominance Analysis is a procedure based on an examination of the R^2 values
|
4
4
|
# for all possible subset models, to identify the relevance of one or more
|
5
5
|
# predictors in the prediction of criterium.
|
6
6
|
#
|
7
7
|
# See Budescu(1993) and Azen & Budescu (2003) for more information.
|
8
|
-
#
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
9
11
|
# a=1000.times.collect {rand}.to_scale
|
10
12
|
# b=1000.times.collect {rand}.to_scale
|
11
13
|
# c=1000.times.collect {rand}.to_scale
|
@@ -13,40 +15,42 @@ module Statsample
|
|
13
15
|
# ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
|
14
16
|
# da=Statsample::DominanceAnalysis.new(ds,'y')
|
15
17
|
# puts da.summary
|
16
|
-
# ==>
|
17
|
-
# Resultado del Analisis de Dominancia de a, b, c en y
|
18
18
|
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# |
|
25
|
-
#
|
26
|
-
# |
|
27
|
-
#
|
28
|
-
# |
|
29
|
-
#
|
30
|
-
# |
|
31
|
-
#
|
32
|
-
# |
|
33
|
-
#
|
34
|
-
# |
|
35
|
-
#
|
36
|
-
# |
|
37
|
-
#
|
38
|
-
# |
|
39
|
-
#
|
40
|
-
#
|
41
|
-
#
|
19
|
+
# Output:
|
20
|
+
#
|
21
|
+
# Report: Report 2010-02-08 19:10:11 -0300
|
22
|
+
# Table: Dominance Analysis result
|
23
|
+
# ------------------------------------------------------------
|
24
|
+
# | | r2 | sign | a | b | c |
|
25
|
+
# ------------------------------------------------------------
|
26
|
+
# | Model 0 | | | 0.648 | 0.265 | 0.109 |
|
27
|
+
# ------------------------------------------------------------
|
28
|
+
# | a | 0.648 | 0.000 | -- | 0.229 | 0.104 |
|
29
|
+
# | b | 0.265 | 0.000 | 0.612 | -- | 0.104 |
|
30
|
+
# | c | 0.109 | 0.000 | 0.643 | 0.260 | -- |
|
31
|
+
# ------------------------------------------------------------
|
32
|
+
# | k=1 Average | | | 0.627 | 0.244 | 0.104 |
|
33
|
+
# ------------------------------------------------------------
|
34
|
+
# | a*b | 0.877 | 0.000 | -- | -- | 0.099 |
|
35
|
+
# | a*c | 0.752 | 0.000 | -- | 0.224 | -- |
|
36
|
+
# | b*c | 0.369 | 0.000 | 0.607 | -- | -- |
|
37
|
+
# ------------------------------------------------------------
|
38
|
+
# | k=2 Average | | | 0.607 | 0.224 | 0.099 |
|
39
|
+
# ------------------------------------------------------------
|
40
|
+
# | a*b*c | 0.976 | 0.000 | -- | -- | -- |
|
41
|
+
# ------------------------------------------------------------
|
42
|
+
# | Overall averages | | | 0.628 | 0.245 | 0.104 |
|
43
|
+
# ------------------------------------------------------------
|
42
44
|
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
# |
|
48
|
-
# |
|
49
|
-
#
|
45
|
+
# Table: Pairwise dominance
|
46
|
+
# -----------------------------------------
|
47
|
+
# | Pairs | Total | Conditional | General |
|
48
|
+
# -----------------------------------------
|
49
|
+
# | a - b | 1.0 | 1.0 | 1.0 |
|
50
|
+
# | a - c | 1.0 | 1.0 | 1.0 |
|
51
|
+
# | b - c | 1.0 | 1.0 | 1.0 |
|
52
|
+
# -----------------------------------------
|
53
|
+
|
50
54
|
#
|
51
55
|
# == References:
|
52
56
|
# * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
|
@@ -54,22 +58,30 @@ module Statsample
|
|
54
58
|
class DominanceAnalysis
|
55
59
|
include GetText
|
56
60
|
bindtextdomain("statsample")
|
61
|
+
# Class to generate the regressions. Default to Statsample::Regression::Multiple::RubyEngine
|
62
|
+
attr_accessor :regression_class
|
63
|
+
# Name of analysis
|
64
|
+
attr_accessor :name
|
65
|
+
|
57
66
|
# Creates a new DominanceAnalysis object
|
58
67
|
# Params:
|
59
68
|
# * ds: A Dataset object
|
60
69
|
# * y_var: Name of dependent variable
|
61
|
-
# *
|
62
|
-
# Statsample::Regression::Multiple::BaseEngine
|
70
|
+
# * opts: Any other attribute of the class
|
63
71
|
#
|
64
|
-
def initialize(ds,y_var,
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
def initialize(ds,y_var, opts=Hash.new)
|
73
|
+
@y_var=y_var
|
74
|
+
@dy=ds[@y_var]
|
75
|
+
@ds=ds
|
76
|
+
@ds_indep=ds.dup(ds.fields-[y_var])
|
77
|
+
@fields=@ds_indep.fields
|
78
|
+
@regression_class=Statsample::Regression::Multiple::RubyEngine
|
79
|
+
@name=_("Dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
80
|
+
opts.each{|k,v|
|
81
|
+
self.send("#{k}=",v) if self.respond_to? k
|
82
|
+
}
|
83
|
+
create_models
|
84
|
+
fill_models
|
73
85
|
end
|
74
86
|
def fill_models
|
75
87
|
@models.each do |m|
|
@@ -215,63 +227,65 @@ module Statsample
|
|
215
227
|
convert=data.collect {|i1| @fields[i1] }
|
216
228
|
@models.push(convert)
|
217
229
|
ds_prev=@ds.dup(convert+[@y_var])
|
218
|
-
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @
|
230
|
+
modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @regression_class)
|
219
231
|
@models_data[convert.sort]=modeldata
|
220
232
|
end
|
221
233
|
end
|
222
234
|
end
|
223
|
-
def summary
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
235
|
+
def summary
|
236
|
+
rp=ReportBuilder.new()
|
237
|
+
rp.add(self)
|
238
|
+
rp.to_text
|
239
|
+
end
|
240
|
+
def to_reportbuilder(generator)
|
241
|
+
anchor=generator.add_toc_entry(_("DA: ")+@name)
|
242
|
+
generator.add_html "<div class='dominance-analysis'>#{@name}<a name='#{anchor}'></a>"
|
243
|
+
t=ReportBuilder::Table.new(:name=>_("Dominance Analysis result"))
|
244
|
+
t.header=["","r2",_("sign")]+@fields
|
229
245
|
row=[_("Model 0"),"",""]+@fields.collect{|f|
|
230
246
|
sprintf("%0.3f", md([f]).r2)
|
231
247
|
}
|
232
248
|
t.add_row(row)
|
233
249
|
t.add_horizontal_line
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
250
|
+
for i in 1..@fields.size
|
251
|
+
mk=md_k(i)
|
252
|
+
mk.each{|m|
|
253
|
+
t.add_row(m.add_table_row)
|
254
|
+
}
|
255
|
+
# Report averages
|
256
|
+
a=average_k(i)
|
257
|
+
if !a.nil?
|
258
|
+
t.add_horizontal_line
|
259
|
+
row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
|
260
|
+
sprintf("%0.3f",a[f])
|
261
|
+
}
|
262
|
+
t.add_row(row)
|
263
|
+
t.add_horizontal_line
|
264
|
+
|
265
|
+
end
|
266
|
+
|
249
267
|
end
|
250
268
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
sprintf("%0.3f",g[f])
|
258
|
-
}
|
259
|
-
t.add_row(row)
|
260
|
-
out.parse_table(t)
|
261
|
-
|
262
|
-
out.nl
|
263
|
-
out << _("Pairwise")+"\n"
|
264
|
-
td=total_dominance
|
265
|
-
cd=conditional_dominance
|
266
|
-
gd=general_dominance
|
267
|
-
t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
|
268
|
-
pairs.each{|p|
|
269
|
-
name=p.join(" - ")
|
270
|
-
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
269
|
+
g=general_averages
|
270
|
+
t.add_horizontal_line
|
271
|
+
|
272
|
+
row=[_("Overall averages"),"",""]+@fields.collect{|f|
|
273
|
+
sprintf("%0.3f",g[f])
|
274
|
+
}
|
271
275
|
t.add_row(row)
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
generator.parse_element(t)
|
277
|
+
|
278
|
+
td=total_dominance
|
279
|
+
cd=conditional_dominance
|
280
|
+
gd=general_dominance
|
281
|
+
t=ReportBuilder::Table.new(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")])
|
282
|
+
pairs.each{|p|
|
283
|
+
name=p.join(" - ")
|
284
|
+
row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
|
285
|
+
t.add_row(row)
|
286
|
+
}
|
287
|
+
generator.parse_element(t)
|
288
|
+
generator.add_html("</div>")
|
275
289
|
end
|
276
290
|
class ModelData
|
277
291
|
attr_reader :contributions
|
@@ -1,137 +1,166 @@
|
|
1
1
|
module Statsample
|
2
|
-
class DominanceAnalysis
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
class DominanceAnalysis
|
3
|
+
# Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
|
4
|
+
# References:
|
5
|
+
# * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
|
6
6
|
class Bootstrap
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
7
|
+
include GetText
|
8
|
+
include Writable
|
9
|
+
bindtextdomain("statsample")
|
10
|
+
# Total Dominance results
|
11
|
+
attr_reader :samples_td
|
12
|
+
# Conditional Dominance results
|
13
|
+
attr_reader :samples_cd
|
14
|
+
# General Dominance results
|
15
|
+
attr_reader :samples_gd
|
16
|
+
# General average results
|
17
|
+
attr_reader :samples_ga
|
18
|
+
# Name of fields
|
19
|
+
attr_reader :fields
|
20
|
+
# Regression class used for analysis
|
21
|
+
attr_accessor :regression_class
|
22
|
+
# Dataset
|
23
|
+
attr_accessor :ds
|
24
|
+
# Name of analysis
|
25
|
+
attr_accessor :name
|
26
|
+
# Alpha level of confidence
|
27
|
+
attr_accessor :alpha
|
28
|
+
# Create a new Dominance Analysis Bootstrap Object
|
29
|
+
#
|
30
|
+
# * ds: A Dataset object
|
31
|
+
# * y_var: Name of dependent variable
|
32
|
+
# * opts: Any other attribute of the class
|
33
|
+
def initialize(ds,y_var, opts=Hash.new)
|
34
|
+
@ds=ds
|
35
|
+
@y_var=y_var
|
36
|
+
@n=ds.cases
|
37
|
+
@fields=ds.fields-[y_var]
|
38
|
+
@samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
|
39
|
+
@n_samples=0
|
40
|
+
@alpha=0.95
|
41
|
+
@regression_class=Regression::Multiple::RubyEngine
|
42
|
+
@name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
|
43
|
+
opts.each{|k,v|
|
44
|
+
self.send("#{k}=",v) if self.respond_to? k
|
45
|
+
}
|
46
|
+
create_samples_pairs
|
47
|
+
end
|
48
|
+
# lr_class deprecated
|
49
|
+
alias_method :lr_class, :regression_class
|
50
|
+
def da
|
51
|
+
if @da.nil?
|
52
|
+
@da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
|
53
|
+
end
|
54
|
+
@da
|
55
|
+
end
|
56
|
+
# Creates n re-samples from original dataset and store result of
|
57
|
+
# each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
|
58
|
+
#
|
59
|
+
# * number_samples: Number of new samples to add
|
60
|
+
# * n: size of each new sample. If nil, equal to original sample size
|
61
|
+
# * report: if true, echo number of current resample and total
|
62
|
+
def bootstrap(number_samples,n=nil,report=false)
|
63
|
+
number_samples.times{ |t|
|
64
|
+
@n_samples+=1
|
65
|
+
puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
|
66
|
+
ds_boot=@ds.bootstrap(n)
|
67
|
+
da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
|
68
|
+
da_1.total_dominance.each{|k,v|
|
69
|
+
@samples_td[k].push(v)
|
70
|
+
}
|
71
|
+
da_1.conditional_dominance.each{|k,v|
|
72
|
+
@samples_cd[k].push(v)
|
73
|
+
}
|
74
|
+
da_1.general_dominance.each{|k,v|
|
75
|
+
@samples_gd[k].push(v)
|
76
|
+
}
|
77
|
+
da_1.general_averages.each{|k,v|
|
78
|
+
@samples_ga[k].push(v)
|
79
|
+
}
|
80
|
+
}
|
81
|
+
end
|
82
|
+
def create_samples_pairs
|
83
|
+
@samples_td={}
|
84
|
+
@samples_cd={}
|
85
|
+
@samples_gd={}
|
86
|
+
@pairs=[]
|
87
|
+
c=Statsample::Combination.new(2,@fields.size)
|
88
|
+
c.each do |data|
|
89
|
+
convert=data.collect {|i| @fields[i] }
|
90
|
+
@pairs.push(convert)
|
91
|
+
[@samples_td, @samples_cd, @samples_gd].each{|s|
|
92
|
+
s[convert]=[]
|
54
93
|
}
|
55
94
|
end
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
[0, 0.5, 1].each{|n|
|
126
|
-
freqs[n]=0 if freqs[n].nil?
|
127
|
-
}
|
128
|
-
name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
|
129
|
-
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
130
|
-
end
|
131
|
-
def f(v,n=3)
|
132
|
-
prec="%0.#{n}f"
|
133
|
-
sprintf(prec,v)
|
134
|
-
end
|
95
|
+
end
|
96
|
+
def summary
|
97
|
+
rp=ReportBuilder.new()
|
98
|
+
rp.add(self)
|
99
|
+
rp.to_text
|
100
|
+
end
|
101
|
+
def t
|
102
|
+
Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
|
103
|
+
end
|
104
|
+
def to_reportbuilder(generator)
|
105
|
+
raise "You should bootstrap first" if @n_samples==0
|
106
|
+
anchor=generator.add_toc_entry(_("DAB: ")+@name)
|
107
|
+
generator.add_html "<div class='dominance-analysis-bootstrap'>#{@name}<a name='#{anchor}'></a>"
|
108
|
+
|
109
|
+
generator.add_text _("Sample size: %d\n") % @n_samples
|
110
|
+
generator.add_text "t: #{t}\n"
|
111
|
+
generator.add_text _("Linear Regression Engine: %s") % @regression_class.name
|
112
|
+
|
113
|
+
table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
|
114
|
+
table.add_row([_("Complete dominance")])
|
115
|
+
table.add_horizontal_line
|
116
|
+
@pairs.each{|pair|
|
117
|
+
std=@samples_td[pair].to_vector(:scale)
|
118
|
+
ttd=da.total_dominance_pairwise(pair[0],pair[1])
|
119
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
120
|
+
}
|
121
|
+
table.add_horizontal_line
|
122
|
+
table.add_row([_("Conditional dominance")])
|
123
|
+
table.add_horizontal_line
|
124
|
+
@pairs.each{|pair|
|
125
|
+
std=@samples_cd[pair].to_vector(:scale)
|
126
|
+
ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
|
127
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
128
|
+
|
129
|
+
}
|
130
|
+
table.add_horizontal_line
|
131
|
+
table.add_row([_("General Dominance")])
|
132
|
+
table.add_horizontal_line
|
133
|
+
@pairs.each{|pair|
|
134
|
+
std=@samples_gd[pair].to_vector(:scale)
|
135
|
+
ttd=da.general_dominance_pairwise(pair[0],pair[1])
|
136
|
+
table.add_row(summary_pairs(pair,std,ttd))
|
137
|
+
}
|
138
|
+
generator.parse_element(table)
|
139
|
+
|
140
|
+
table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
|
141
|
+
|
142
|
+
@fields.each{|f|
|
143
|
+
v=@samples_ga[f].to_vector(:scale)
|
144
|
+
row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
|
145
|
+
table.add_row(row)
|
146
|
+
|
147
|
+
}
|
148
|
+
|
149
|
+
generator.parse_element(table)
|
150
|
+
generator.add_html("</div>")
|
151
|
+
end
|
152
|
+
def summary_pairs(pair,std,ttd)
|
153
|
+
freqs=std.proportions
|
154
|
+
[0, 0.5, 1].each{|n|
|
155
|
+
freqs[n]=0 if freqs[n].nil?
|
156
|
+
}
|
157
|
+
name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
|
158
|
+
[name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
|
159
|
+
end
|
160
|
+
def f(v,n=3)
|
161
|
+
prec="%0.#{n}f"
|
162
|
+
sprintf(prec,v)
|
163
|
+
end
|
135
164
|
end
|
136
|
-
end
|
165
|
+
end
|
137
166
|
end
|