statsample 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
@@ -27,7 +27,7 @@ module Statsample
27
27
  # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
28
  # documentation about tetrachoric correlation.
29
29
  #
30
- # This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
30
+ # This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
31
31
  #
32
32
  # == References:
33
33
  # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
@@ -52,8 +52,10 @@ module Statsample
52
52
 
53
53
 
54
54
  class Tetrachoric
55
-
55
+ include GetText
56
+ bindtextdomain("statsample")
56
57
  attr_reader :r
58
+ attr_accessor :name
57
59
 
58
60
  TWOPI=Math::PI*2
59
61
  SQT2PI= 2.50662827
@@ -67,10 +69,14 @@ module Statsample
67
69
  NITER = 25
68
70
  X=[0,0.9972638618, 0.9856115115, 0.9647622556, 0.9349060759, 0.8963211558, 0.8493676137, 0.7944837960, 0.7321821187, 0.6630442669, 0.5877157572, 0.5068999089, 0.4213512761, 0.3318686023, 0.2392873623, 0.1444719616, 0.0483076657]
69
71
  W=[0, 0.0070186100, 0.0162743947, 0.0253920653, 0.0342738629, 0.0428358980, 0.0509980593, 0.0586840935, 0.0658222228, 0.0723457941, 0.0781938958, 0.0833119242, 0.0876520930, 0.0911738787, 0.0938443991, 0.0956387201, 0.0965400885]
72
+ # Creates a Tetrachoric object based on a 2x2 Matrix.
73
+ def self.new_with_matrix(m)
74
+ Tetrachoric.new(m[0,0], m[0,1], m[1,0],m[1,1])
75
+ end
70
76
  # Creates a Tetrachoric object based on two vectors.
71
77
  # The vectors are dichotomized previously.
72
78
  def self.new_with_vectors(v1,v2)
73
- v1a,v2a=Statsample.only_valid(v1,v2)
79
+ v1a, v2a=Statsample.only_valid(v1,v2)
74
80
  v1a=v1a.dichotomize
75
81
  v2a=v2a.dichotomize
76
82
  raise "v1 have only 0" if v1a.factors==[0]
@@ -90,17 +96,42 @@ module Statsample
90
96
  @sdr
91
97
  end
92
98
  # Threshold for variable x (rows)
99
+ # Point on gauss curve under X rater select cases
93
100
  def threshold_x
94
- @zac
101
+ @zab
95
102
  end
96
103
 
97
104
  # Threshold for variable y (columns)
105
+ # Point on gauss curve under Y rater select cases
106
+
98
107
  def threshold_y
99
- @zab
108
+ @zac
109
+ end
110
+ def summary
111
+ rp=ReportBuilder.new()
112
+ rp.add(self)
113
+ rp.to_text
114
+ end
115
+
116
+ def to_reportbuilder(generator)
117
+ section=ReportBuilder::Section.new(:name=>@name)
118
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
119
+ t.add_row(["X=0", @a,@b,@a+@b])
120
+ t.add_row(["X=1", @c,@d,@c+@d])
121
+ t.add_hr
122
+ t.add_row(["T", @a+@c,@b+@d,@a+@b+@c+@d])
123
+ section.add(t)
124
+ #generator.parse_element(t)
125
+ section.add(sprintf("r: %0.3f",r))
126
+ section.add(_("SE: %0.3f") % se)
127
+ section.add(_("Threshold X: %0.3f ") % [threshold_x] )
128
+ section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
129
+ generator.parse_element(section)
100
130
  end
101
131
 
102
132
  def initialize(a,b,c,d)
103
133
  @a,@b,@c,@d=a,b,c,d
134
+ @name=_("Tetrachoric correlation")
104
135
  #
105
136
  # CHECK IF ANY CELL FREQUENCY IS NEGATIVE
106
137
  #
@@ -396,6 +427,7 @@ module Statsample
396
427
  pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
397
428
  @pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
398
429
  @pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
430
+
399
431
  @sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
400
432
  @sdr=0 if (@sdr<0)
401
433
  @sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
@@ -76,6 +76,17 @@ module Statsample
76
76
  class SpreadsheetBase
77
77
  class << self
78
78
  def extract_fields(row)
79
+ =begin
80
+ fields=[]
81
+ row.to_a.collect {|c|
82
+ if c.nil?
83
+ break
84
+ else
85
+ fields.push(c)
86
+ end
87
+ }
88
+ =end
89
+ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
79
90
  fields=row.to_a.collect{|c| c.downcase}
80
91
  fields.recode_repeated
81
92
  end
@@ -1,11 +1,13 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
- # Dominance Analysis is a procedure based on an examination of the R2 values
3
+ # Dominance Analysis is a procedure based on an examination of the R^2 values
4
4
  # for all possible subset models, to identify the relevance of one or more
5
5
  # predictors in the prediction of criterium.
6
6
  #
7
7
  # See Budescu(1993) and Azen & Budescu (2003) for more information.
8
- # Use:
8
+ #
9
+ # Example:
10
+ #
9
11
  # a=1000.times.collect {rand}.to_scale
10
12
  # b=1000.times.collect {rand}.to_scale
11
13
  # c=1000.times.collect {rand}.to_scale
@@ -13,40 +15,42 @@ module Statsample
13
15
  # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
14
16
  # da=Statsample::DominanceAnalysis.new(ds,'y')
15
17
  # puts da.summary
16
- # ==>
17
- # Resultado del Analisis de Dominancia de a, b, c en y
18
18
  #
19
- # ----------------------------------------------------------------
20
- # | | r2 | sign | a | b | c |
21
- # ----------------------------------------------------------------
22
- # | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
23
- # ----------------------------------------------------------------
24
- # | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
25
- # | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
26
- # | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
27
- # ----------------------------------------------------------------
28
- # | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
29
- # ----------------------------------------------------------------
30
- # | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
31
- # | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
32
- # | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
33
- # ----------------------------------------------------------------
34
- # | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
35
- # ----------------------------------------------------------------
36
- # | a*b*c | 0.975 | 0.000 | -- | -- | -- |
37
- # ----------------------------------------------------------------
38
- # | Promedios generales | | | 0.624 | 0.244 | 0.106 |
39
- # ----------------------------------------------------------------
40
- #
41
- # De a pares
19
+ # Output:
20
+ #
21
+ # Report: Report 2010-02-08 19:10:11 -0300
22
+ # Table: Dominance Analysis result
23
+ # ------------------------------------------------------------
24
+ # | | r2 | sign | a | b | c |
25
+ # ------------------------------------------------------------
26
+ # | Model 0 | | | 0.648 | 0.265 | 0.109 |
27
+ # ------------------------------------------------------------
28
+ # | a | 0.648 | 0.000 | -- | 0.229 | 0.104 |
29
+ # | b | 0.265 | 0.000 | 0.612 | -- | 0.104 |
30
+ # | c | 0.109 | 0.000 | 0.643 | 0.260 | -- |
31
+ # ------------------------------------------------------------
32
+ # | k=1 Average | | | 0.627 | 0.244 | 0.104 |
33
+ # ------------------------------------------------------------
34
+ # | a*b | 0.877 | 0.000 | -- | -- | 0.099 |
35
+ # | a*c | 0.752 | 0.000 | -- | 0.224 | -- |
36
+ # | b*c | 0.369 | 0.000 | 0.607 | -- | -- |
37
+ # ------------------------------------------------------------
38
+ # | k=2 Average | | | 0.607 | 0.224 | 0.099 |
39
+ # ------------------------------------------------------------
40
+ # | a*b*c | 0.976 | 0.000 | -- | -- | -- |
41
+ # ------------------------------------------------------------
42
+ # | Overall averages | | | 0.628 | 0.245 | 0.104 |
43
+ # ------------------------------------------------------------
42
44
  #
43
- # ----------------------------
44
- # | Pares | T | C | G |
45
- # ----------------------------
46
- # | a - b | 1.0 | 1.0 | 1.0 |
47
- # | a - c | 1.0 | 1.0 | 1.0 |
48
- # | b - c | 1.0 | 1.0 | 1.0 |
49
- # ----------------------------
45
+ # Table: Pairwise dominance
46
+ # -----------------------------------------
47
+ # | Pairs | Total | Conditional | General |
48
+ # -----------------------------------------
49
+ # | a - b | 1.0 | 1.0 | 1.0 |
50
+ # | a - c | 1.0 | 1.0 | 1.0 |
51
+ # | b - c | 1.0 | 1.0 | 1.0 |
52
+ # -----------------------------------------
53
+
50
54
  #
51
55
  # == References:
52
56
  # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
@@ -54,22 +58,30 @@ module Statsample
54
58
  class DominanceAnalysis
55
59
  include GetText
56
60
  bindtextdomain("statsample")
61
+ # Class to generate the regressions. Default to Statsample::Regression::Multiple::RubyEngine
62
+ attr_accessor :regression_class
63
+ # Name of analysis
64
+ attr_accessor :name
65
+
57
66
  # Creates a new DominanceAnalysis object
58
67
  # Params:
59
68
  # * ds: A Dataset object
60
69
  # * y_var: Name of dependent variable
61
- # * r_class: Class to generate the regressions. Could be any subclass of
62
- # Statsample::Regression::Multiple::BaseEngine
70
+ # * opts: Any other attribute of the class
63
71
  #
64
- def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
65
- @y_var=y_var
66
- @dy=ds[@y_var]
67
- @ds=ds
68
- @r_class=r_class
69
- @ds_indep=ds.dup(ds.fields-[y_var])
70
- @fields=@ds_indep.fields
71
- create_models
72
- fill_models
72
+ def initialize(ds,y_var, opts=Hash.new)
73
+ @y_var=y_var
74
+ @dy=ds[@y_var]
75
+ @ds=ds
76
+ @ds_indep=ds.dup(ds.fields-[y_var])
77
+ @fields=@ds_indep.fields
78
+ @regression_class=Statsample::Regression::Multiple::RubyEngine
79
+ @name=_("Dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
80
+ opts.each{|k,v|
81
+ self.send("#{k}=",v) if self.respond_to? k
82
+ }
83
+ create_models
84
+ fill_models
73
85
  end
74
86
  def fill_models
75
87
  @models.each do |m|
@@ -215,63 +227,65 @@ module Statsample
215
227
  convert=data.collect {|i1| @fields[i1] }
216
228
  @models.push(convert)
217
229
  ds_prev=@ds.dup(convert+[@y_var])
218
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
230
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @regression_class)
219
231
  @models_data[convert.sort]=modeldata
220
232
  end
221
233
  end
222
234
  end
223
- def summary(report_type=ConsoleSummary)
224
- out=""
225
- out.extend report_type
226
- out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
227
- t=Statsample::ReportTable.new
228
- t.header=["","r2","sign"]+@fields
235
+ def summary
236
+ rp=ReportBuilder.new()
237
+ rp.add(self)
238
+ rp.to_text
239
+ end
240
+ def to_reportbuilder(generator)
241
+ anchor=generator.add_toc_entry(_("DA: ")+@name)
242
+ generator.add_html "<div class='dominance-analysis'>#{@name}<a name='#{anchor}'></a>"
243
+ t=ReportBuilder::Table.new(:name=>_("Dominance Analysis result"))
244
+ t.header=["","r2",_("sign")]+@fields
229
245
  row=[_("Model 0"),"",""]+@fields.collect{|f|
230
246
  sprintf("%0.3f", md([f]).r2)
231
247
  }
232
248
  t.add_row(row)
233
249
  t.add_horizontal_line
234
- for i in 1..@fields.size
235
- mk=md_k(i)
236
- mk.each{|m|
237
- t.add_row(m.add_table_row)
238
- }
239
- # Report averages
240
- a=average_k(i)
241
- if !a.nil?
242
- t.add_horizontal_line
243
- row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
244
- sprintf("%0.3f",a[f])
245
- }
246
- t.add_row(row)
247
- t.add_horizontal_line
248
-
250
+ for i in 1..@fields.size
251
+ mk=md_k(i)
252
+ mk.each{|m|
253
+ t.add_row(m.add_table_row)
254
+ }
255
+ # Report averages
256
+ a=average_k(i)
257
+ if !a.nil?
258
+ t.add_horizontal_line
259
+ row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
260
+ sprintf("%0.3f",a[f])
261
+ }
262
+ t.add_row(row)
263
+ t.add_horizontal_line
264
+
265
+ end
266
+
249
267
  end
250
268
 
251
- end
252
-
253
- g=general_averages
254
- t.add_horizontal_line
255
-
256
- row=[_("Overall averages"),"",""]+@fields.collect{|f|
257
- sprintf("%0.3f",g[f])
258
- }
259
- t.add_row(row)
260
- out.parse_table(t)
261
-
262
- out.nl
263
- out << _("Pairwise")+"\n"
264
- td=total_dominance
265
- cd=conditional_dominance
266
- gd=general_dominance
267
- t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
268
- pairs.each{|p|
269
- name=p.join(" - ")
270
- row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
269
+ g=general_averages
270
+ t.add_horizontal_line
271
+
272
+ row=[_("Overall averages"),"",""]+@fields.collect{|f|
273
+ sprintf("%0.3f",g[f])
274
+ }
271
275
  t.add_row(row)
272
- }
273
- out.parse_table(t)
274
- return out
276
+ generator.parse_element(t)
277
+
278
+ td=total_dominance
279
+ cd=conditional_dominance
280
+ gd=general_dominance
281
+ t=ReportBuilder::Table.new(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")])
282
+ pairs.each{|p|
283
+ name=p.join(" - ")
284
+ row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
285
+ t.add_row(row)
286
+ }
287
+ generator.parse_element(t)
288
+ generator.add_html("</div>")
275
289
  end
276
290
  class ModelData
277
291
  attr_reader :contributions
@@ -1,137 +1,166 @@
1
1
  module Statsample
2
- class DominanceAnalysis
3
- # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
- # References:
5
- # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
2
+ class DominanceAnalysis
3
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
+ # References:
5
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
6
6
  class Bootstrap
7
- include GetText
8
- include Writable
9
- bindtextdomain("statsample")
10
- attr_reader :samples_td,:samples_cd,:samples_gd,:samples_ga, :fields
11
- attr_writer :lr_class
12
- attr_accessor :ds
13
- def initialize(ds,y_var)
14
- @ds=ds
15
- @y_var=y_var
16
- @n=ds.cases
17
- @fields=ds.fields-[y_var]
18
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
19
- @n_samples=0
20
- @lr_class=Regression::Multiple::RubyEngine
21
- create_samples_pairs
22
- end
23
- def lr_class=(lr)
24
- @lr_class=lr
25
- end
26
- def da
27
- if @da.nil?
28
- @da=DominanceAnalysis.new(@ds,@y_var,@lr_class)
29
- end
30
- @da
31
- end
32
- # Creates re-samples from original dataset.
33
- # * number_samples: Number of new samples to add
34
- # * n: size of each new sample. If nil, equal to original sample size
35
- # * report: if true, echo number of current resample and total
36
- def bootstrap(number_samples,n=nil,report=false)
37
- number_samples.times{ |t|
38
- @n_samples+=1
39
- puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
40
- ds_boot=@ds.bootstrap(n)
41
- da_1=DominanceAnalysis.new(ds_boot,@y_var,@lr_class)
42
- da_1.total_dominance.each{|k,v|
43
- @samples_td[k].push(v)
44
- }
45
- da_1.conditional_dominance.each{|k,v|
46
- @samples_cd[k].push(v)
47
- }
48
- da_1.general_dominance.each{|k,v|
49
- @samples_gd[k].push(v)
50
- }
51
- da_1.general_averages.each{|k,v|
52
- @samples_ga[k].push(v)
53
- }
7
+ include GetText
8
+ include Writable
9
+ bindtextdomain("statsample")
10
+ # Total Dominance results
11
+ attr_reader :samples_td
12
+ # Conditional Dominance results
13
+ attr_reader :samples_cd
14
+ # General Dominance results
15
+ attr_reader :samples_gd
16
+ # General average results
17
+ attr_reader :samples_ga
18
+ # Name of fields
19
+ attr_reader :fields
20
+ # Regression class used for analysis
21
+ attr_accessor :regression_class
22
+ # Dataset
23
+ attr_accessor :ds
24
+ # Name of analysis
25
+ attr_accessor :name
26
+ # Alpha level of confidence
27
+ attr_accessor :alpha
28
+ # Create a new Dominance Analysis Bootstrap Object
29
+ #
30
+ # * ds: A Dataset object
31
+ # * y_var: Name of dependent variable
32
+ # * opts: Any other attribute of the class
33
+ def initialize(ds,y_var, opts=Hash.new)
34
+ @ds=ds
35
+ @y_var=y_var
36
+ @n=ds.cases
37
+ @fields=ds.fields-[y_var]
38
+ @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
39
+ @n_samples=0
40
+ @alpha=0.95
41
+ @regression_class=Regression::Multiple::RubyEngine
42
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
43
+ opts.each{|k,v|
44
+ self.send("#{k}=",v) if self.respond_to? k
45
+ }
46
+ create_samples_pairs
47
+ end
48
+ # lr_class deprecated
49
+ alias_method :lr_class, :regression_class
50
+ def da
51
+ if @da.nil?
52
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
53
+ end
54
+ @da
55
+ end
56
+ # Creates n re-samples from original dataset and store result of
57
+ # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
58
+ #
59
+ # * number_samples: Number of new samples to add
60
+ # * n: size of each new sample. If nil, equal to original sample size
61
+ # * report: if true, echo number of current resample and total
62
+ def bootstrap(number_samples,n=nil,report=false)
63
+ number_samples.times{ |t|
64
+ @n_samples+=1
65
+ puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
66
+ ds_boot=@ds.bootstrap(n)
67
+ da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
68
+ da_1.total_dominance.each{|k,v|
69
+ @samples_td[k].push(v)
70
+ }
71
+ da_1.conditional_dominance.each{|k,v|
72
+ @samples_cd[k].push(v)
73
+ }
74
+ da_1.general_dominance.each{|k,v|
75
+ @samples_gd[k].push(v)
76
+ }
77
+ da_1.general_averages.each{|k,v|
78
+ @samples_ga[k].push(v)
79
+ }
80
+ }
81
+ end
82
+ def create_samples_pairs
83
+ @samples_td={}
84
+ @samples_cd={}
85
+ @samples_gd={}
86
+ @pairs=[]
87
+ c=Statsample::Combination.new(2,@fields.size)
88
+ c.each do |data|
89
+ convert=data.collect {|i| @fields[i] }
90
+ @pairs.push(convert)
91
+ [@samples_td, @samples_cd, @samples_gd].each{|s|
92
+ s[convert]=[]
54
93
  }
55
94
  end
56
- def create_samples_pairs
57
- @samples_td={}
58
- @samples_cd={}
59
- @samples_gd={}
60
- @pairs=[]
61
- c=Statsample::Combination.new(2,@fields.size)
62
- c.each{|data|
63
- convert=data.collect {|i|
64
- @fields[i]
65
- }
66
- @pairs.push(convert)
67
- [@samples_td,@samples_cd,@samples_gd].each{|s|
68
- s[convert]=[]
69
- }
70
- }
71
- end
72
- def summary(report_type=ConsoleSummary)
73
- out =""
74
- raise "You should bootstrap first" if @n_samples==0
75
- alfa=0.95
76
- out.extend report_type
77
- out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
78
- out.add _("Sample size: %d\n") % @n_samples
79
- t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
80
- out.add "t:#{t}\n"
81
- out.add "Linear Regression Engine: #{@lr_class.name}"
82
- out.nl
83
- table=ReportTable.new
84
- header=[_("pairs"),"sD","Dij",_("SE(Dij)"),"Pij","Pji","Pno",_("Reproducibility")]
85
- table.header=header
86
- table.add_row([_("Complete dominance")])
87
- table.add_horizontal_line
88
- @pairs.each{|pair|
89
- std=@samples_td[pair].to_vector(:scale)
90
- ttd=da.total_dominance_pairwise(pair[0],pair[1])
91
- table.add_row(summary_pairs(pair,std,ttd))
92
- }
93
- table.add_horizontal_line
94
- table.add_row([_("Conditional dominance")])
95
- table.add_horizontal_line
96
- @pairs.each{|pair|
97
- std=@samples_cd[pair].to_vector(:scale)
98
- ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
99
- table.add_row(summary_pairs(pair,std,ttd))
100
-
101
- }
102
- table.add_horizontal_line
103
- table.add_row([_("General Dominance")])
104
- table.add_horizontal_line
105
- @pairs.each{|pair|
106
- std=@samples_gd[pair].to_vector(:scale)
107
- ttd=da.general_dominance_pairwise(pair[0],pair[1])
108
- table.add_row(summary_pairs(pair,std,ttd))
109
- }
110
- out.parse_table(table)
111
- out.add(_("General averages"))
112
- table=Statsample::ReportTable.new
113
- table.header=[_("var"),_("mean"),_("se"),_("p.5"),_("p.95")]
114
- @fields.each{|f|
115
- v=@samples_ga[f].to_vector(:scale)
116
- row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
117
- table.add_row(row)
118
-
119
- }
120
- out.parse_table(table)
121
- out
122
- end
123
- def summary_pairs(pair,std,ttd)
124
- freqs=std.proportions
125
- [0, 0.5, 1].each{|n|
126
- freqs[n]=0 if freqs[n].nil?
127
- }
128
- name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
129
- [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
130
- end
131
- def f(v,n=3)
132
- prec="%0.#{n}f"
133
- sprintf(prec,v)
134
- end
95
+ end
96
+ def summary
97
+ rp=ReportBuilder.new()
98
+ rp.add(self)
99
+ rp.to_text
100
+ end
101
+ def t
102
+ Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
103
+ end
104
+ def to_reportbuilder(generator)
105
+ raise "You should bootstrap first" if @n_samples==0
106
+ anchor=generator.add_toc_entry(_("DAB: ")+@name)
107
+ generator.add_html "<div class='dominance-analysis-bootstrap'>#{@name}<a name='#{anchor}'></a>"
108
+
109
+ generator.add_text _("Sample size: %d\n") % @n_samples
110
+ generator.add_text "t: #{t}\n"
111
+ generator.add_text _("Linear Regression Engine: %s") % @regression_class.name
112
+
113
+ table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
114
+ table.add_row([_("Complete dominance")])
115
+ table.add_horizontal_line
116
+ @pairs.each{|pair|
117
+ std=@samples_td[pair].to_vector(:scale)
118
+ ttd=da.total_dominance_pairwise(pair[0],pair[1])
119
+ table.add_row(summary_pairs(pair,std,ttd))
120
+ }
121
+ table.add_horizontal_line
122
+ table.add_row([_("Conditional dominance")])
123
+ table.add_horizontal_line
124
+ @pairs.each{|pair|
125
+ std=@samples_cd[pair].to_vector(:scale)
126
+ ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
127
+ table.add_row(summary_pairs(pair,std,ttd))
128
+
129
+ }
130
+ table.add_horizontal_line
131
+ table.add_row([_("General Dominance")])
132
+ table.add_horizontal_line
133
+ @pairs.each{|pair|
134
+ std=@samples_gd[pair].to_vector(:scale)
135
+ ttd=da.general_dominance_pairwise(pair[0],pair[1])
136
+ table.add_row(summary_pairs(pair,std,ttd))
137
+ }
138
+ generator.parse_element(table)
139
+
140
+ table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
141
+
142
+ @fields.each{|f|
143
+ v=@samples_ga[f].to_vector(:scale)
144
+ row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
145
+ table.add_row(row)
146
+
147
+ }
148
+
149
+ generator.parse_element(table)
150
+ generator.add_html("</div>")
151
+ end
152
+ def summary_pairs(pair,std,ttd)
153
+ freqs=std.proportions
154
+ [0, 0.5, 1].each{|n|
155
+ freqs[n]=0 if freqs[n].nil?
156
+ }
157
+ name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
158
+ [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
159
+ end
160
+ def f(v,n=3)
161
+ prec="%0.#{n}f"
162
+ sprintf(prec,v)
163
+ end
135
164
  end
136
- end
165
+ end
137
166
  end