statsample 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +8 -19
  3. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  4. data/demo/dominance_analysis_bootstrap.rb +20 -0
  5. data/demo/dominanceanalysis.rb +11 -0
  6. data/demo/multiple_regression.rb +40 -0
  7. data/demo/polychoric.rb +13 -0
  8. data/demo/tetrachoric.rb +10 -0
  9. data/lib/distribution.rb +1 -0
  10. data/lib/distribution/normalbivariate.rb +100 -0
  11. data/lib/statsample.rb +4 -105
  12. data/lib/statsample/bivariate.rb +5 -1
  13. data/lib/statsample/bivariate/polychoric.rb +581 -0
  14. data/lib/statsample/bivariate/tetrachoric.rb +37 -5
  15. data/lib/statsample/converters.rb +11 -0
  16. data/lib/statsample/dominanceanalysis.rb +104 -90
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
  18. data/lib/statsample/factor/pca.rb +1 -2
  19. data/lib/statsample/factor/principalaxis.rb +2 -2
  20. data/lib/statsample/graph/svghistogram.rb +170 -172
  21. data/lib/statsample/matrix.rb +79 -0
  22. data/lib/statsample/mle.rb +6 -4
  23. data/lib/statsample/mle/probit.rb +0 -1
  24. data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
  25. data/lib/statsample/regression/multiple/baseengine.rb +112 -113
  26. data/lib/statsample/regression/multiple/gslengine.rb +91 -94
  27. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  28. data/lib/statsample/srs.rb +1 -1
  29. data/lib/statsample/test.rb +0 -1
  30. data/lib/statsample/test/umannwhitney.rb +8 -5
  31. data/po/es/statsample.po +201 -39
  32. data/po/statsample.pot +184 -32
  33. data/test/test_bivariate.rb +21 -2
  34. data/test/test_distribution.rb +58 -40
  35. data/test/test_factor.rb +0 -1
  36. data/test/test_gsl.rb +13 -14
  37. data/test/test_regression.rb +1 -1
  38. data/test/test_statistics.rb +1 -4
  39. metadata +10 -21
  40. data/demo/benchmark.rb +0 -76
  41. data/demo/chi-square.rb +0 -44
  42. data/demo/crosstab.rb +0 -7
  43. data/demo/dice.rb +0 -13
  44. data/demo/distribution_t.rb +0 -95
  45. data/demo/graph.rb +0 -9
  46. data/demo/item_analysis.rb +0 -30
  47. data/demo/mean.rb +0 -81
  48. data/demo/nunnally_6.rb +0 -34
  49. data/demo/pca.rb +0 -29
  50. data/demo/proportion.rb +0 -57
  51. data/demo/regression.rb +0 -82
  52. data/demo/sample_test.csv +0 -113
  53. data/demo/spss_matrix.rb +0 -3
  54. data/demo/strata_proportion.rb +0 -152
  55. data/demo/stratum.rb +0 -141
  56. data/demo/t-student.rb +0 -17
  57. data/demo/umann.rb +0 -8
  58. data/lib/matrix_extension.rb +0 -92
@@ -27,7 +27,7 @@ module Statsample
27
27
  # See http://www.john-uebersax.com/stat/tetra.htm for extensive
28
28
  # documentation about tetrachoric correlation.
29
29
  #
30
- # This class uses Brown(1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
30
+ # This class uses Brown (1977) algorithm. You can see FORTRAN code on http://lib.stat.cmu.edu/apstat/116
31
31
  #
32
32
  # == References:
33
33
  # * Brown, MB. (1977) Algorithm AS 116: the tetrachoric correlation and its standard error. _Applied Statistics, 26_, 343-351.
@@ -52,8 +52,10 @@ module Statsample
52
52
 
53
53
 
54
54
  class Tetrachoric
55
-
55
+ include GetText
56
+ bindtextdomain("statsample")
56
57
  attr_reader :r
58
+ attr_accessor :name
57
59
 
58
60
  TWOPI=Math::PI*2
59
61
  SQT2PI= 2.50662827
@@ -67,10 +69,14 @@ module Statsample
67
69
  NITER = 25
68
70
  X=[0,0.9972638618, 0.9856115115, 0.9647622556, 0.9349060759, 0.8963211558, 0.8493676137, 0.7944837960, 0.7321821187, 0.6630442669, 0.5877157572, 0.5068999089, 0.4213512761, 0.3318686023, 0.2392873623, 0.1444719616, 0.0483076657]
69
71
  W=[0, 0.0070186100, 0.0162743947, 0.0253920653, 0.0342738629, 0.0428358980, 0.0509980593, 0.0586840935, 0.0658222228, 0.0723457941, 0.0781938958, 0.0833119242, 0.0876520930, 0.0911738787, 0.0938443991, 0.0956387201, 0.0965400885]
72
+ # Creates a Tetrachoric object based on a 2x2 Matrix.
73
+ def self.new_with_matrix(m)
74
+ Tetrachoric.new(m[0,0], m[0,1], m[1,0],m[1,1])
75
+ end
70
76
  # Creates a Tetrachoric object based on two vectors.
71
77
  # The vectors are dichotomized previously.
72
78
  def self.new_with_vectors(v1,v2)
73
- v1a,v2a=Statsample.only_valid(v1,v2)
79
+ v1a, v2a=Statsample.only_valid(v1,v2)
74
80
  v1a=v1a.dichotomize
75
81
  v2a=v2a.dichotomize
76
82
  raise "v1 have only 0" if v1a.factors==[0]
@@ -90,17 +96,42 @@ module Statsample
90
96
  @sdr
91
97
  end
92
98
  # Threshold for variable x (rows)
99
+ # Point on gauss curve under X rater select cases
93
100
  def threshold_x
94
- @zac
101
+ @zab
95
102
  end
96
103
 
97
104
  # Threshold for variable y (columns)
105
+ # Point on gauss curve under Y rater select cases
106
+
98
107
  def threshold_y
99
- @zab
108
+ @zac
109
+ end
110
+ def summary
111
+ rp=ReportBuilder.new()
112
+ rp.add(self)
113
+ rp.to_text
114
+ end
115
+
116
+ def to_reportbuilder(generator)
117
+ section=ReportBuilder::Section.new(:name=>@name)
118
+ t=ReportBuilder::Table.new(:name=>_("Contingence Table"),:header=>["","Y=0","Y=1", "T"])
119
+ t.add_row(["X=0", @a,@b,@a+@b])
120
+ t.add_row(["X=1", @c,@d,@c+@d])
121
+ t.add_hr
122
+ t.add_row(["T", @a+@c,@b+@d,@a+@b+@c+@d])
123
+ section.add(t)
124
+ #generator.parse_element(t)
125
+ section.add(sprintf("r: %0.3f",r))
126
+ section.add(_("SE: %0.3f") % se)
127
+ section.add(_("Threshold X: %0.3f ") % [threshold_x] )
128
+ section.add(_("Threshold Y: %0.3f ") % [threshold_y] )
129
+ generator.parse_element(section)
100
130
  end
101
131
 
102
132
  def initialize(a,b,c,d)
103
133
  @a,@b,@c,@d=a,b,c,d
134
+ @name=_("Tetrachoric correlation")
104
135
  #
105
136
  # CHECK IF ANY CELL FREQUENCY IS NEGATIVE
106
137
  #
@@ -396,6 +427,7 @@ module Statsample
396
427
  pdf = Math::exp(-0.5 * (@zac ** 2 - 2 * @r * @zac * @zab + @zab ** 2) / rrsq ** 2) / (TWOPI * rrsq)
397
428
  @pac = Distribution::Normal.cdf((@zac - @r * @zab) / rrsq) - 0.5
398
429
  @pab = Distribution::Normal.cdf((@zab - @r * @zac) / rrsq) - 0.5
430
+
399
431
  @sdr = ((@aa+@dd) * (@bb + @cc)).quo(4) + @pab ** 2 * (@aa + @cc) * (@bb + @dd) + @pac ** 2 * (@aa + @bb) * (@cc + @dd) + 2.0 * @pab * @pac * (@aa * @dd - @bb * @cc) - @pab * (@aa * @bb - @cc * @dd) - @pac * (@aa * @cc - @bb * @dd)
400
432
  @sdr=0 if (@sdr<0)
401
433
  @sdr= Math::sqrt(@sdr) / (@tot * pdf * Math::sqrt(@tot))
@@ -76,6 +76,17 @@ module Statsample
76
76
  class SpreadsheetBase
77
77
  class << self
78
78
  def extract_fields(row)
79
+ =begin
80
+ fields=[]
81
+ row.to_a.collect {|c|
82
+ if c.nil?
83
+ break
84
+ else
85
+ fields.push(c)
86
+ end
87
+ }
88
+ =end
89
+ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.count>0
79
90
  fields=row.to_a.collect{|c| c.downcase}
80
91
  fields.recode_repeated
81
92
  end
@@ -1,11 +1,13 @@
1
1
  require 'statsample/dominanceanalysis/bootstrap'
2
2
  module Statsample
3
- # Dominance Analysis is a procedure based on an examination of the R2 values
3
+ # Dominance Analysis is a procedure based on an examination of the R^2 values
4
4
  # for all possible subset models, to identify the relevance of one or more
5
5
  # predictors in the prediction of criterium.
6
6
  #
7
7
  # See Budescu(1993) and Azen & Budescu (2003) for more information.
8
- # Use:
8
+ #
9
+ # Example:
10
+ #
9
11
  # a=1000.times.collect {rand}.to_scale
10
12
  # b=1000.times.collect {rand}.to_scale
11
13
  # c=1000.times.collect {rand}.to_scale
@@ -13,40 +15,42 @@ module Statsample
13
15
  # ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+rand()}
14
16
  # da=Statsample::DominanceAnalysis.new(ds,'y')
15
17
  # puts da.summary
16
- # ==>
17
- # Resultado del Analisis de Dominancia de a, b, c en y
18
18
  #
19
- # ----------------------------------------------------------------
20
- # | | r2 | sign | a | b | c |
21
- # ----------------------------------------------------------------
22
- # | Modelo 0 | | | 0.637 | 0.260 | 0.115 |
23
- # ----------------------------------------------------------------
24
- # | a | 0.637 | 0.000 | -- | 0.239 | 0.109 |
25
- # | b | 0.260 | 0.000 | 0.617 | -- | 0.103 |
26
- # | c | 0.115 | 0.000 | 0.632 | 0.249 | -- |
27
- # ----------------------------------------------------------------
28
- # | k=1 Promedio | | | 0.624 | 0.244 | 0.106 |
29
- # ----------------------------------------------------------------
30
- # | a*b | 0.877 | 0.000 | -- | -- | 0.098 |
31
- # | a*c | 0.746 | 0.000 | -- | 0.229 | -- |
32
- # | b*c | 0.363 | 0.000 | 0.612 | -- | -- |
33
- # ----------------------------------------------------------------
34
- # | k=2 Promedio | | | 0.612 | 0.229 | 0.098 |
35
- # ----------------------------------------------------------------
36
- # | a*b*c | 0.975 | 0.000 | -- | -- | -- |
37
- # ----------------------------------------------------------------
38
- # | Promedios generales | | | 0.624 | 0.244 | 0.106 |
39
- # ----------------------------------------------------------------
40
- #
41
- # De a pares
19
+ # Output:
20
+ #
21
+ # Report: Report 2010-02-08 19:10:11 -0300
22
+ # Table: Dominance Analysis result
23
+ # ------------------------------------------------------------
24
+ # | | r2 | sign | a | b | c |
25
+ # ------------------------------------------------------------
26
+ # | Model 0 | | | 0.648 | 0.265 | 0.109 |
27
+ # ------------------------------------------------------------
28
+ # | a | 0.648 | 0.000 | -- | 0.229 | 0.104 |
29
+ # | b | 0.265 | 0.000 | 0.612 | -- | 0.104 |
30
+ # | c | 0.109 | 0.000 | 0.643 | 0.260 | -- |
31
+ # ------------------------------------------------------------
32
+ # | k=1 Average | | | 0.627 | 0.244 | 0.104 |
33
+ # ------------------------------------------------------------
34
+ # | a*b | 0.877 | 0.000 | -- | -- | 0.099 |
35
+ # | a*c | 0.752 | 0.000 | -- | 0.224 | -- |
36
+ # | b*c | 0.369 | 0.000 | 0.607 | -- | -- |
37
+ # ------------------------------------------------------------
38
+ # | k=2 Average | | | 0.607 | 0.224 | 0.099 |
39
+ # ------------------------------------------------------------
40
+ # | a*b*c | 0.976 | 0.000 | -- | -- | -- |
41
+ # ------------------------------------------------------------
42
+ # | Overall averages | | | 0.628 | 0.245 | 0.104 |
43
+ # ------------------------------------------------------------
42
44
  #
43
- # ----------------------------
44
- # | Pares | T | C | G |
45
- # ----------------------------
46
- # | a - b | 1.0 | 1.0 | 1.0 |
47
- # | a - c | 1.0 | 1.0 | 1.0 |
48
- # | b - c | 1.0 | 1.0 | 1.0 |
49
- # ----------------------------
45
+ # Table: Pairwise dominance
46
+ # -----------------------------------------
47
+ # | Pairs | Total | Conditional | General |
48
+ # -----------------------------------------
49
+ # | a - b | 1.0 | 1.0 | 1.0 |
50
+ # | a - c | 1.0 | 1.0 | 1.0 |
51
+ # | b - c | 1.0 | 1.0 | 1.0 |
52
+ # -----------------------------------------
53
+
50
54
  #
51
55
  # == References:
52
56
  # * Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. _Psychological Bulletin, 114_, 542-551.
@@ -54,22 +58,30 @@ module Statsample
54
58
  class DominanceAnalysis
55
59
  include GetText
56
60
  bindtextdomain("statsample")
61
+ # Class to generate the regressions. Default to Statsample::Regression::Multiple::RubyEngine
62
+ attr_accessor :regression_class
63
+ # Name of analysis
64
+ attr_accessor :name
65
+
57
66
  # Creates a new DominanceAnalysis object
58
67
  # Params:
59
68
  # * ds: A Dataset object
60
69
  # * y_var: Name of dependent variable
61
- # * r_class: Class to generate the regressions. Could be any subclass of
62
- # Statsample::Regression::Multiple::BaseEngine
70
+ # * opts: Any other attribute of the class
63
71
  #
64
- def initialize(ds,y_var, r_class = Regression::Multiple::RubyEngine)
65
- @y_var=y_var
66
- @dy=ds[@y_var]
67
- @ds=ds
68
- @r_class=r_class
69
- @ds_indep=ds.dup(ds.fields-[y_var])
70
- @fields=@ds_indep.fields
71
- create_models
72
- fill_models
72
+ def initialize(ds,y_var, opts=Hash.new)
73
+ @y_var=y_var
74
+ @dy=ds[@y_var]
75
+ @ds=ds
76
+ @ds_indep=ds.dup(ds.fields-[y_var])
77
+ @fields=@ds_indep.fields
78
+ @regression_class=Statsample::Regression::Multiple::RubyEngine
79
+ @name=_("Dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
80
+ opts.each{|k,v|
81
+ self.send("#{k}=",v) if self.respond_to? k
82
+ }
83
+ create_models
84
+ fill_models
73
85
  end
74
86
  def fill_models
75
87
  @models.each do |m|
@@ -215,63 +227,65 @@ module Statsample
215
227
  convert=data.collect {|i1| @fields[i1] }
216
228
  @models.push(convert)
217
229
  ds_prev=@ds.dup(convert+[@y_var])
218
- modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @r_class)
230
+ modeldata=ModelData.new(convert,ds_prev, @y_var, @fields, @regression_class)
219
231
  @models_data[convert.sort]=modeldata
220
232
  end
221
233
  end
222
234
  end
223
- def summary(report_type=ConsoleSummary)
224
- out=""
225
- out.extend report_type
226
- out << _("Summary for Dominance Analysis of %s on %s\n") % [@fields.join(", "),@y_var]
227
- t=Statsample::ReportTable.new
228
- t.header=["","r2","sign"]+@fields
235
+ def summary
236
+ rp=ReportBuilder.new()
237
+ rp.add(self)
238
+ rp.to_text
239
+ end
240
+ def to_reportbuilder(generator)
241
+ anchor=generator.add_toc_entry(_("DA: ")+@name)
242
+ generator.add_html "<div class='dominance-analysis'>#{@name}<a name='#{anchor}'></a>"
243
+ t=ReportBuilder::Table.new(:name=>_("Dominance Analysis result"))
244
+ t.header=["","r2",_("sign")]+@fields
229
245
  row=[_("Model 0"),"",""]+@fields.collect{|f|
230
246
  sprintf("%0.3f", md([f]).r2)
231
247
  }
232
248
  t.add_row(row)
233
249
  t.add_horizontal_line
234
- for i in 1..@fields.size
235
- mk=md_k(i)
236
- mk.each{|m|
237
- t.add_row(m.add_table_row)
238
- }
239
- # Report averages
240
- a=average_k(i)
241
- if !a.nil?
242
- t.add_horizontal_line
243
- row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
244
- sprintf("%0.3f",a[f])
245
- }
246
- t.add_row(row)
247
- t.add_horizontal_line
248
-
250
+ for i in 1..@fields.size
251
+ mk=md_k(i)
252
+ mk.each{|m|
253
+ t.add_row(m.add_table_row)
254
+ }
255
+ # Report averages
256
+ a=average_k(i)
257
+ if !a.nil?
258
+ t.add_horizontal_line
259
+ row=[_("k=%d Average") % i,"",""] + @fields.collect{|f|
260
+ sprintf("%0.3f",a[f])
261
+ }
262
+ t.add_row(row)
263
+ t.add_horizontal_line
264
+
265
+ end
266
+
249
267
  end
250
268
 
251
- end
252
-
253
- g=general_averages
254
- t.add_horizontal_line
255
-
256
- row=[_("Overall averages"),"",""]+@fields.collect{|f|
257
- sprintf("%0.3f",g[f])
258
- }
259
- t.add_row(row)
260
- out.parse_table(t)
261
-
262
- out.nl
263
- out << _("Pairwise")+"\n"
264
- td=total_dominance
265
- cd=conditional_dominance
266
- gd=general_dominance
267
- t=Statsample::ReportTable.new([_("Pairs"),"T","C","G"])
268
- pairs.each{|p|
269
- name=p.join(" - ")
270
- row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
269
+ g=general_averages
270
+ t.add_horizontal_line
271
+
272
+ row=[_("Overall averages"),"",""]+@fields.collect{|f|
273
+ sprintf("%0.3f",g[f])
274
+ }
271
275
  t.add_row(row)
272
- }
273
- out.parse_table(t)
274
- return out
276
+ generator.parse_element(t)
277
+
278
+ td=total_dominance
279
+ cd=conditional_dominance
280
+ gd=general_dominance
281
+ t=ReportBuilder::Table.new(:name=>_("Pairwise dominance"), :header=>[_("Pairs"),_("Total"),_("Conditional"),_("General")])
282
+ pairs.each{|p|
283
+ name=p.join(" - ")
284
+ row=[name, sprintf("%0.1f",td[p]), sprintf("%0.1f",cd[p]), sprintf("%0.1f",gd[p])]
285
+ t.add_row(row)
286
+ }
287
+ generator.parse_element(t)
288
+ generator.add_html("</div>")
275
289
  end
276
290
  class ModelData
277
291
  attr_reader :contributions
@@ -1,137 +1,166 @@
1
1
  module Statsample
2
- class DominanceAnalysis
3
- # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
- # References:
5
- # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
2
+ class DominanceAnalysis
3
+ # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information.
4
+ # References:
5
+ # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. _Psychological Methods, 8_(2), 129-148.
6
6
  class Bootstrap
7
- include GetText
8
- include Writable
9
- bindtextdomain("statsample")
10
- attr_reader :samples_td,:samples_cd,:samples_gd,:samples_ga, :fields
11
- attr_writer :lr_class
12
- attr_accessor :ds
13
- def initialize(ds,y_var)
14
- @ds=ds
15
- @y_var=y_var
16
- @n=ds.cases
17
- @fields=ds.fields-[y_var]
18
- @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
19
- @n_samples=0
20
- @lr_class=Regression::Multiple::RubyEngine
21
- create_samples_pairs
22
- end
23
- def lr_class=(lr)
24
- @lr_class=lr
25
- end
26
- def da
27
- if @da.nil?
28
- @da=DominanceAnalysis.new(@ds,@y_var,@lr_class)
29
- end
30
- @da
31
- end
32
- # Creates re-samples from original dataset.
33
- # * number_samples: Number of new samples to add
34
- # * n: size of each new sample. If nil, equal to original sample size
35
- # * report: if true, echo number of current resample and total
36
- def bootstrap(number_samples,n=nil,report=false)
37
- number_samples.times{ |t|
38
- @n_samples+=1
39
- puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
40
- ds_boot=@ds.bootstrap(n)
41
- da_1=DominanceAnalysis.new(ds_boot,@y_var,@lr_class)
42
- da_1.total_dominance.each{|k,v|
43
- @samples_td[k].push(v)
44
- }
45
- da_1.conditional_dominance.each{|k,v|
46
- @samples_cd[k].push(v)
47
- }
48
- da_1.general_dominance.each{|k,v|
49
- @samples_gd[k].push(v)
50
- }
51
- da_1.general_averages.each{|k,v|
52
- @samples_ga[k].push(v)
53
- }
7
+ include GetText
8
+ include Writable
9
+ bindtextdomain("statsample")
10
+ # Total Dominance results
11
+ attr_reader :samples_td
12
+ # Conditional Dominance results
13
+ attr_reader :samples_cd
14
+ # General Dominance results
15
+ attr_reader :samples_gd
16
+ # General average results
17
+ attr_reader :samples_ga
18
+ # Name of fields
19
+ attr_reader :fields
20
+ # Regression class used for analysis
21
+ attr_accessor :regression_class
22
+ # Dataset
23
+ attr_accessor :ds
24
+ # Name of analysis
25
+ attr_accessor :name
26
+ # Alpha level of confidence
27
+ attr_accessor :alpha
28
+ # Create a new Dominance Analysis Bootstrap Object
29
+ #
30
+ # * ds: A Dataset object
31
+ # * y_var: Name of dependent variable
32
+ # * opts: Any other attribute of the class
33
+ def initialize(ds,y_var, opts=Hash.new)
34
+ @ds=ds
35
+ @y_var=y_var
36
+ @n=ds.cases
37
+ @fields=ds.fields-[y_var]
38
+ @samples_ga=@fields.inject({}){|a,v| a[v]=[];a}
39
+ @n_samples=0
40
+ @alpha=0.95
41
+ @regression_class=Regression::Multiple::RubyEngine
42
+ @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var]
43
+ opts.each{|k,v|
44
+ self.send("#{k}=",v) if self.respond_to? k
45
+ }
46
+ create_samples_pairs
47
+ end
48
+ # lr_class deprecated
49
+ alias_method :lr_class, :regression_class
50
+ def da
51
+ if @da.nil?
52
+ @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class)
53
+ end
54
+ @da
55
+ end
56
+ # Creates n re-samples from original dataset and store result of
57
+ # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga
58
+ #
59
+ # * number_samples: Number of new samples to add
60
+ # * n: size of each new sample. If nil, equal to original sample size
61
+ # * report: if true, echo number of current resample and total
62
+ def bootstrap(number_samples,n=nil,report=false)
63
+ number_samples.times{ |t|
64
+ @n_samples+=1
65
+ puts _("Bootstrap %d of %d") % [t+1, number_samples] if report
66
+ ds_boot=@ds.bootstrap(n)
67
+ da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class)
68
+ da_1.total_dominance.each{|k,v|
69
+ @samples_td[k].push(v)
70
+ }
71
+ da_1.conditional_dominance.each{|k,v|
72
+ @samples_cd[k].push(v)
73
+ }
74
+ da_1.general_dominance.each{|k,v|
75
+ @samples_gd[k].push(v)
76
+ }
77
+ da_1.general_averages.each{|k,v|
78
+ @samples_ga[k].push(v)
79
+ }
80
+ }
81
+ end
82
+ def create_samples_pairs
83
+ @samples_td={}
84
+ @samples_cd={}
85
+ @samples_gd={}
86
+ @pairs=[]
87
+ c=Statsample::Combination.new(2,@fields.size)
88
+ c.each do |data|
89
+ convert=data.collect {|i| @fields[i] }
90
+ @pairs.push(convert)
91
+ [@samples_td, @samples_cd, @samples_gd].each{|s|
92
+ s[convert]=[]
54
93
  }
55
94
  end
56
- def create_samples_pairs
57
- @samples_td={}
58
- @samples_cd={}
59
- @samples_gd={}
60
- @pairs=[]
61
- c=Statsample::Combination.new(2,@fields.size)
62
- c.each{|data|
63
- convert=data.collect {|i|
64
- @fields[i]
65
- }
66
- @pairs.push(convert)
67
- [@samples_td,@samples_cd,@samples_gd].each{|s|
68
- s[convert]=[]
69
- }
70
- }
71
- end
72
- def summary(report_type=ConsoleSummary)
73
- out =""
74
- raise "You should bootstrap first" if @n_samples==0
75
- alfa=0.95
76
- out.extend report_type
77
- out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
78
- out.add _("Sample size: %d\n") % @n_samples
79
- t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
80
- out.add "t:#{t}\n"
81
- out.add "Linear Regression Engine: #{@lr_class.name}"
82
- out.nl
83
- table=ReportTable.new
84
- header=[_("pairs"),"sD","Dij",_("SE(Dij)"),"Pij","Pji","Pno",_("Reproducibility")]
85
- table.header=header
86
- table.add_row([_("Complete dominance")])
87
- table.add_horizontal_line
88
- @pairs.each{|pair|
89
- std=@samples_td[pair].to_vector(:scale)
90
- ttd=da.total_dominance_pairwise(pair[0],pair[1])
91
- table.add_row(summary_pairs(pair,std,ttd))
92
- }
93
- table.add_horizontal_line
94
- table.add_row([_("Conditional dominance")])
95
- table.add_horizontal_line
96
- @pairs.each{|pair|
97
- std=@samples_cd[pair].to_vector(:scale)
98
- ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
99
- table.add_row(summary_pairs(pair,std,ttd))
100
-
101
- }
102
- table.add_horizontal_line
103
- table.add_row([_("General Dominance")])
104
- table.add_horizontal_line
105
- @pairs.each{|pair|
106
- std=@samples_gd[pair].to_vector(:scale)
107
- ttd=da.general_dominance_pairwise(pair[0],pair[1])
108
- table.add_row(summary_pairs(pair,std,ttd))
109
- }
110
- out.parse_table(table)
111
- out.add(_("General averages"))
112
- table=Statsample::ReportTable.new
113
- table.header=[_("var"),_("mean"),_("se"),_("p.5"),_("p.95")]
114
- @fields.each{|f|
115
- v=@samples_ga[f].to_vector(:scale)
116
- row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
117
- table.add_row(row)
118
-
119
- }
120
- out.parse_table(table)
121
- out
122
- end
123
- def summary_pairs(pair,std,ttd)
124
- freqs=std.proportions
125
- [0, 0.5, 1].each{|n|
126
- freqs[n]=0 if freqs[n].nil?
127
- }
128
- name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
129
- [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
130
- end
131
- def f(v,n=3)
132
- prec="%0.#{n}f"
133
- sprintf(prec,v)
134
- end
95
+ end
96
+ def summary
97
+ rp=ReportBuilder.new()
98
+ rp.add(self)
99
+ rp.to_text
100
+ end
101
+ def t
102
+ Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1)
103
+ end
104
+ def to_reportbuilder(generator)
105
+ raise "You should bootstrap first" if @n_samples==0
106
+ anchor=generator.add_toc_entry(_("DAB: ")+@name)
107
+ generator.add_html "<div class='dominance-analysis-bootstrap'>#{@name}<a name='#{anchor}'></a>"
108
+
109
+ generator.add_text _("Sample size: %d\n") % @n_samples
110
+ generator.add_text "t: #{t}\n"
111
+ generator.add_text _("Linear Regression Engine: %s") % @regression_class.name
112
+
113
+ table=ReportBuilder::Table.new(:name=>"Bootstrap report", :header => [_("pairs"), "sD","Dij", _("SE(Dij)"), "Pij", "Pji", "Pno", _("Reproducibility")])
114
+ table.add_row([_("Complete dominance")])
115
+ table.add_horizontal_line
116
+ @pairs.each{|pair|
117
+ std=@samples_td[pair].to_vector(:scale)
118
+ ttd=da.total_dominance_pairwise(pair[0],pair[1])
119
+ table.add_row(summary_pairs(pair,std,ttd))
120
+ }
121
+ table.add_horizontal_line
122
+ table.add_row([_("Conditional dominance")])
123
+ table.add_horizontal_line
124
+ @pairs.each{|pair|
125
+ std=@samples_cd[pair].to_vector(:scale)
126
+ ttd=da.conditional_dominance_pairwise(pair[0],pair[1])
127
+ table.add_row(summary_pairs(pair,std,ttd))
128
+
129
+ }
130
+ table.add_horizontal_line
131
+ table.add_row([_("General Dominance")])
132
+ table.add_horizontal_line
133
+ @pairs.each{|pair|
134
+ std=@samples_gd[pair].to_vector(:scale)
135
+ ttd=da.general_dominance_pairwise(pair[0],pair[1])
136
+ table.add_row(summary_pairs(pair,std,ttd))
137
+ }
138
+ generator.parse_element(table)
139
+
140
+ table=ReportBuilder::Table.new(:name=>_("General averages"), :header=>[_("var"), _("mean"), _("se"), _("p.5"), _("p.95")])
141
+
142
+ @fields.each{|f|
143
+ v=@samples_ga[f].to_vector(:scale)
144
+ row=[@ds.label(f), sprintf("%0.3f",v.mean), sprintf("%0.3f",v.sd), sprintf("%0.3f",v.percentil(5)),sprintf("%0.3f",v.percentil(95))]
145
+ table.add_row(row)
146
+
147
+ }
148
+
149
+ generator.parse_element(table)
150
+ generator.add_html("</div>")
151
+ end
152
+ def summary_pairs(pair,std,ttd)
153
+ freqs=std.proportions
154
+ [0, 0.5, 1].each{|n|
155
+ freqs[n]=0 if freqs[n].nil?
156
+ }
157
+ name=@ds.label(pair[0])+" - "+@ds.label(pair[1])
158
+ [name,f(ttd,1),f(std.mean,4),f(std.sd),f(freqs[1]), f(freqs[0]), f(freqs[0.5]), f(freqs[ttd])]
159
+ end
160
+ def f(v,n=3)
161
+ prec="%0.#{n}f"
162
+ sprintf(prec,v)
163
+ end
135
164
  end
136
- end
165
+ end
137
166
  end