statsample 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
@@ -3,34 +3,27 @@ module Statsample
3
3
  # From Wikipedia:
4
4
  # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
5
  class F
6
- include GetText
7
- bindtextdomain("statsample")
8
-
9
6
  include Statsample::Test
10
7
 
11
- attr_reader :ss_num, :ss_den, :df_num, :df_den, :ss_total, :df_total
8
+ attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
12
9
  # Tails for probability (:both, :left or :right)
13
10
  attr_accessor :tails
14
11
  # Name of F analysis
15
12
  attr_accessor :name
16
- # Name of numerator
17
- attr_accessor :name_numerator
18
- # Name of denominator
19
- attr_accessor :name_denominator
20
13
 
21
14
  # Parameters:
22
- # * ss_num: explained variance / between group variance
23
- # * ss_den: unexplained variance / within group variance
24
- # * df_num: degrees of freedom for explained variance / k-1
25
- # * df_den: degrees of freedom for unexplained variance / n-k
26
- def initialize(ss_num, ss_den, df_num, df_den, opts=Hash.new)
27
- @ss_num=ss_num
28
- @ss_den=ss_den
15
+ # * var_num: variance numerator
16
+ # * var_den: variance denominator
17
+ # * df_num: degrees of freedom numerator
18
+ # * df_den: degrees of freedom denominator
19
+ def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
20
+ @var_num=var_num
21
+ @var_den=var_den
29
22
  @df_num=df_num
30
23
  @df_den=df_den
31
- @ss_total=ss_num+ss_den
24
+ @var_total=var_num+var_den
32
25
  @df_total=df_num+df_den
33
- opts_default={:tails=>:right, :name_numerator=>"Numerator", :name_denominator=>"Denominator", :name=>"F Test"}
26
+ opts_default={:tails=>:right, :name=>"F Test"}
34
27
  @opts=opts_default.merge(opts)
35
28
  raise "Tails should be right or left, not both" if @opts[:tails]==:both
36
29
  opts_default.keys.each {|k|
@@ -41,20 +34,17 @@ module Statsample
41
34
  ReportBuilder.new(:no_title=>true).add(self).to_text
42
35
  end
43
36
  def f
44
- (@ss_num.quo(@df_num)).quo(@ss_den.quo(@df_den))
37
+ @var_num.quo(@var_den)
38
+ end
39
+ def to_f
40
+ f
45
41
  end
46
42
  # probability
47
43
  def probability
48
44
  p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
49
45
  end
50
- def report_building(builder)#:nodoc:
51
- builder.section(:name=>@name) do |b|
52
- b.table(:name=>_("%s Table") % @name, :header=>%w{source ss df f p}.map {|v| _(v)}) do |t|
53
- t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",f), sprintf("%0.3f", probability)])
54
- t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, "", ""])
55
- t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total,"",""])
56
- end
57
- end
46
+ def report_building(builder) #:nodoc:
47
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
58
48
  end
59
49
  end
60
50
  end
@@ -19,6 +19,7 @@ module Statsample
19
19
  # Reference:
20
20
  # * NIST/SEMATECH e-Handbook of Statistical Methods. Available on http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
21
21
  class Levene
22
+ include Statsample::Test
22
23
  # Degrees of freedom 1 (k-1)
23
24
  attr_reader :d1
24
25
  # Degrees of freedom 2 (n-k)
@@ -42,18 +43,13 @@ module Statsample
42
43
  def f
43
44
  @w
44
45
  end
45
-
46
- def report_building(g) # :nodoc:
47
- g.text @name
48
- g.text "F: #{"%0.4f" % f}"
49
- g.text "p: #{"%0.4f" % probability}"
50
-
46
+ def report_building(builder) # :nodoc:
47
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @d1, @d2, f, probability]
51
48
  end
52
49
  # Summary of results
53
50
  def summary
54
51
  ReportBuilder.new(:no_title=>true).add(self).to_text
55
52
  end
56
-
57
53
  def compute
58
54
  n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
59
55
 
@@ -86,7 +82,7 @@ module Statsample
86
82
  # Probability.
87
83
  # With H_0 = Sum(s2)=0, probability of getting a value of the test upper or equal to the obtained on the sample
88
84
  def probability
89
- 1-Distribution::F.cdf(f, @d1, @d2)
85
+ p_using_cdf(Distribution::F.cdf(f, @d1, @d2), :right)
90
86
  end
91
87
 
92
88
  end
@@ -31,7 +31,7 @@ module Statsample
31
31
  end
32
32
  num.quo(den)
33
33
  end
34
- # Degrees of freedom for equal variance
34
+ # Degrees of freedom for equal variance on t test
35
35
  def df_equal_variance(n1,n2)
36
36
  n1+n2-2
37
37
  end
@@ -67,7 +67,6 @@ module Statsample
67
67
  class OneSample
68
68
  include Math
69
69
  include Statsample::Test
70
- include DirtyMemoize
71
70
  # Options
72
71
  attr_accessor :opts
73
72
  # Name of test
@@ -76,15 +75,9 @@ module Statsample
76
75
  attr_accessor :u
77
76
  # Degress of freedom
78
77
  attr_reader :df
79
- # Value of t
80
- attr_reader :t
81
- # Probability
82
- attr_reader :probability
83
78
  # Tails for probability (:both, :left or :right)
84
79
  attr_accessor :tails
85
80
 
86
- dirty_writer :u, :tails
87
- dirty_memoize :t, :probability
88
81
  # Create a One Sample T Test
89
82
  # Options:
90
83
  # * :u = Mean to compare. Default= 0
@@ -100,14 +93,14 @@ module Statsample
100
93
  @df= @vector.n_valid-1
101
94
  @t=nil
102
95
  end
103
-
96
+ def t
97
+ T.one_sample(@vector.mean, @u, @vector.sd, @vector.n_valid)
98
+ end
104
99
 
105
- # Set t and probability for given u
106
- def compute
107
- @t = T.one_sample(@vector.mean, @u, @vector.sd, @vector.n_valid)
108
- @probability = p_using_cdf(Distribution::T.cdf(@t, @df), tails)
100
+ def probability
101
+ p_using_cdf(Distribution::T.cdf(t, @df), tails)
109
102
  end
110
- # Presents summary of analysis
103
+ # Summary of analysis
111
104
  #
112
105
  def summary
113
106
  ReportBuilder.new(:no_title=>true).add(self).to_text
@@ -154,6 +147,8 @@ module Statsample
154
147
  include Math
155
148
  include Statsample::Test
156
149
  include DirtyMemoize
150
+ include GetText
151
+ bindtextdomain("statsample")
157
152
  # Options
158
153
  attr_accessor :opts
159
154
  # Name of test
@@ -204,25 +199,36 @@ module Statsample
204
199
  @probability_not_equal_variance = p_using_cdf(Distribution::T.cdf(@t_not_equal_variance, @df_not_equal_variance), tails)
205
200
 
206
201
  end
202
+ # Cohen's d is a measure of effect size. Its defined as the difference between two means divided by a standard deviation for the data
203
+ def d
204
+ n1=@v1.n_valid
205
+ n2=@v2.n_valid
206
+ num=@v1.mean-@v2.mean
207
+ den=Math::sqrt( ((n1-1)*@v1.sd+(n2-1)*@v2.sd).quo(n1+n2))
208
+ num.quo(den)
209
+ end
210
+
207
211
  # Presents summary of analysis
208
- #
209
212
  def summary
210
213
  ReportBuilder.new(:no_title=>true).add(self).to_text
211
214
  end
215
+
212
216
  def report_building(b) # :nodoc:
213
217
  b.section(:name=>@name) {|g|
214
- g.table(:name=>"Mean and standard deviation", :header=>["Variable", "m", "sd","n"]) {|t|
215
- t.row([1,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd,@v1.n_valid])
216
- t.row([2,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
217
- }
218
- g.section(:name=>"Levene Test") {|g1|
219
- g1.parse_element(Statsample::Test.levene([@v1,@v2]))
218
+ g.table(:name=>_("Mean and standard deviation"), :header=>["Variable", "m", "sd","n"]) {|t|
219
+ t.row([@v1.name,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd,@v1.n_valid])
220
+ t.row([@v2.name,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
220
221
  }
222
+ g.parse_element(Statsample::Test.levene([@v1,@v2],:name=>_("Levene test for equality of variances")))
221
223
 
222
- g.table(:name=>"T statistics",:header=>["Type","t","df", "p (#{tails} tails)"]) {|t|
223
- t.row(["Equal variance", "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
224
- t.row(["Non equal variance", "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
224
+ g.table(:name=>_("T statistics"),:header=>["Type","t","df", "p (#{tails} tails)"].map{|v| _(v)}) {|t|
225
+ t.row([_("Equal variance"), "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
226
+ t.row([_("Non equal variance"), "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
225
227
  }
228
+ g.table(:name=>_("Effect size")) do |t|
229
+ t.row ['x1-x2', "%0.4f" % (@v1.mean-@v2.mean)]
230
+ t.row ['d', "%0.4f" % d]
231
+ end
226
232
  }
227
233
  end
228
234
  end
@@ -107,14 +107,15 @@ module Statsample
107
107
  attr_reader :u
108
108
  # Value of compensation for ties (useful for demostration)
109
109
  attr_reader :t
110
+ # Name of test
111
+ attr_accessor :name
110
112
  #
111
113
  # Create a new U Mann-Whitney test
112
114
  # Params: Two Statsample::Vectors
113
115
  #
114
- def initialize(v1,v2)
116
+ def initialize(v1,v2, opts=Hash.new)
115
117
  @n1=v1.valid_data.size
116
118
  @n2=v2.valid_data.size
117
-
118
119
  data=(v1.valid_data+v2.valid_data).to_scale
119
120
  groups=(([0]*@n1)+([1]*@n2)).to_vector
120
121
  ds={'g'=>groups, 'data'=>data}.to_dataset
@@ -132,11 +133,17 @@ module Statsample
132
133
  @u1=r1-((@n1*(@n1+1)).quo(2))
133
134
  @u2=r2-((@n2*(@n2+1)).quo(2))
134
135
  @u=(u1<u2) ? u1 : u2
136
+ opts_default={:name=>"Mann-Whitney's U"}
137
+ @opts=opts_default.merge(opts)
138
+ opts_default.keys.each {|k|
139
+ send("#{k}=", @opts[k])
140
+ }
141
+
135
142
  end
136
143
  # Report results.
137
144
  def summary
138
145
  out=<<-HEREDOC
139
- Mann-Whitney U
146
+ @name
140
147
  Sum of ranks v1: #{@r1.to_f}
141
148
  Sum of ranks v1: #{@r2.to_f}
142
149
  U Value: #{@u.to_f}
@@ -152,7 +159,7 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
152
159
  end
153
160
  # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
154
161
  # Uses u_sampling_distribution_as62
155
- def exact_probability
162
+ def probability_exact
156
163
  dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
157
164
  sum=0
158
165
  (0..@u.to_i).each {|i|
@@ -190,9 +197,9 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
190
197
  (@u-mu).quo(ou)
191
198
  end
192
199
  # Assuming H_0, the proportion of cdf with values of U lower
193
- # than the sample.
200
+ # than the sample, using normal approximation.
194
201
  # Use with more than 30 cases per group.
195
- def z_probability
202
+ def probability_z
196
203
  (1-Distribution::Normal.cdf(z.abs()))*2
197
204
  end
198
205
  end
@@ -7,7 +7,7 @@ class Array
7
7
  end
8
8
  # Creates a new Statsample::Vector object of type :scale
9
9
  def to_scale(*args)
10
- Statsample::Vector.new(self,:scale,*args)
10
+ Statsample::Vector.new(self, :scale,*args)
11
11
  end
12
12
  end
13
13
 
@@ -24,12 +24,7 @@ module Statsample
24
24
  class Vector
25
25
  include Enumerable
26
26
  include Writable
27
- # DEFAULT OPTIONS
28
- DEFAULT_OPTIONS={
29
- :missing_values=>[],
30
- :today_values=>['NOW','TODAY', :NOW, :TODAY],
31
- :labels=>{}
32
- }
27
+ include Summarizable
33
28
  # Level of measurement. Could be :nominal, :ordinal or :scale
34
29
  attr_reader :type
35
30
  # Original data.
@@ -50,23 +45,39 @@ module Statsample
50
45
  attr_reader :gsl
51
46
  # Change label for specific values
52
47
  attr_accessor :labels
48
+ # Name of vector. Should be used for output by many classes
49
+ attr_accessor :name
50
+
53
51
  #
54
52
  # Creates a new Vector object.
55
- # [data] Array of data.
56
- # [type] Level of meausurement. See Vector#type
57
- # [opts] Options
58
- # [:missing_values] Array of missing values. See Vector#missing_values
59
- # [:today_values] Array of 'today' values. See Vector#today_values
60
- # [:labels] Labels for data values
53
+ # * <tt>data</tt> Array of data.
54
+ # * <tt>type</tt> Level of meausurement. See Vector#type
55
+ # * <tt>opts</tt> Hash of options
56
+ # * <tt>:missing_values</tt> Array of missing values. See Vector#missing_values
57
+ # * <tt>:today_values</tt> Array of 'today' values. See Vector#today_values
58
+ # * <tt>:labels</tt> Labels for data values
59
+ # * <tt>:name</tt> Name of vector
61
60
  #
62
61
  def initialize(data=[], type=:nominal, opts=Hash.new)
63
62
  raise "Data should be an array" unless data.is_a? Array
64
63
  @data=data
65
64
  @type=type
66
- opts=DEFAULT_OPTIONS.merge(opts)
67
- @missing_values=opts[:missing_values]
68
- @labels=opts[:labels]
69
- @today_values=opts[:today_values]
65
+ opts_default={
66
+ :missing_values=>[],
67
+ :today_values=>['NOW','TODAY', :NOW, :TODAY],
68
+ :labels=>{},
69
+ :name=>nil
70
+ }
71
+ @opts=opts_default.merge(opts)
72
+ if @opts[:name].nil?
73
+ @@n_table||=0
74
+ @@n_table+=1
75
+ @opts[:name]="Vector #{@@n_table}"
76
+ end
77
+ @missing_values=@opts[:missing_values]
78
+ @labels=@opts[:labels]
79
+ @today_values=@opts[:today_values]
80
+ @name=@opts[:name]
70
81
  @valid_data=[]
71
82
  @data_with_nils=[]
72
83
  @date_data_with_nils=[]
@@ -80,12 +91,12 @@ module Statsample
80
91
  # Note: data, missing_values and labels are duplicated, so
81
92
  # changes on original vector doesn't propages to copies.
82
93
  def dup
83
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
94
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
84
95
  end
85
96
  # Returns an empty duplicate of the vector. Maintains the type,
86
97
  # missing values and labels.
87
98
  def dup_empty
88
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
99
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
89
100
  end
90
101
  # Raises an exception if type of vector is inferior to t type
91
102
  def check_type(t)
@@ -128,8 +139,8 @@ module Statsample
128
139
  # Vector equality.
129
140
  # Two vector will be the same if their data, missing values, type, labels are equals
130
141
  def ==(v2)
131
- raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
132
- @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
142
+ raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
143
+ @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels==v2.labels
133
144
  end
134
145
 
135
146
  def _dump(i) # :nodoc:
@@ -189,8 +200,8 @@ module Statsample
189
200
  # Vector.set_valid_data at the end of your insertion cycle
190
201
  #
191
202
  def add(v,update_valid=true)
192
- @data.push(v)
193
- set_valid_data if update_valid
203
+ @data.push(v)
204
+ set_valid_data if update_valid
194
205
  end
195
206
  # Update valid_data, missing_data, data_with_nils and gsl
196
207
  # at the end of an insertion.
@@ -208,14 +219,14 @@ module Statsample
208
219
  # v.valid_data
209
220
  # => [2,3]
210
221
  def set_valid_data
211
- @valid_data.clear
212
- @missing_data.clear
213
- @data_with_nils.clear
214
- @date_data_with_nils.clear
215
- @gsl=nil
216
- set_valid_data_intern
217
- set_scale_data if(@type==:scale)
218
- set_date_data if(@type==:date)
222
+ @valid_data.clear
223
+ @missing_data.clear
224
+ @data_with_nils.clear
225
+ @date_data_with_nils.clear
226
+ @gsl=nil
227
+ set_valid_data_intern
228
+ set_scale_data if(@type==:scale)
229
+ set_date_data if(@type==:date)
219
230
  end
220
231
 
221
232
  if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
@@ -228,21 +239,21 @@ module Statsample
228
239
  end
229
240
  end
230
241
  def _set_valid_data_intern #:nodoc:
231
- @data.each do |n|
232
- if is_valid? n
233
- @valid_data.push(n)
234
- @data_with_nils.push(n)
235
- else
236
- @data_with_nils.push(nil)
237
- @missing_data.push(n)
242
+ @data.each do |n|
243
+ if is_valid? n
244
+ @valid_data.push(n)
245
+ @data_with_nils.push(n)
246
+ else
247
+ @data_with_nils.push(nil)
248
+ @missing_data.push(n)
249
+ end
238
250
  end
239
- end
240
- @has_missing_data=@missing_data.size>0
251
+ @has_missing_data=@missing_data.size>0
241
252
  end
242
253
 
243
254
  # Retrieves true if data has one o more missing values
244
255
  def has_missing_data?
245
- @has_missing_data
256
+ @has_missing_data
246
257
  end
247
258
  # Retrieves label for value x. Retrieves x if
248
259
  # no label defined.
@@ -251,14 +262,14 @@ module Statsample
251
262
  end
252
263
  # Returns a Vector with data with labels replaced by the label.
253
264
  def vector_labeled
254
- d=@data.collect{|x|
255
- if @labels.has_key? x
256
- @labels[x]
257
- else
258
- x
259
- end
260
- }
261
- Vector.new(d,@type)
265
+ d=@data.collect{|x|
266
+ if @labels.has_key? x
267
+ @labels[x]
268
+ else
269
+ x
270
+ end
271
+ }
272
+ Vector.new(d,@type)
262
273
  end
263
274
  # Size of total data
264
275
  def size
@@ -427,13 +438,13 @@ module Statsample
427
438
  # In all the trails, every item have the same probability
428
439
  # of been selected.
429
440
  def sample_with_replacement(sample=1)
430
- if(@type!=:scale or !Statsample.has_gsl?)
431
- vds=@valid_data.size
432
- (0...sample).collect{ @valid_data[rand(vds)] }
433
- else
434
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
435
- r.sample(@gsl, sample).to_a
436
- end
441
+ if(@type!=:scale or !Statsample.has_gsl?)
442
+ vds=@valid_data.size
443
+ (0...sample).collect{ @valid_data[rand(vds)] }
444
+ else
445
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
446
+ r.sample(@gsl, sample).to_a
447
+ end
437
448
  end
438
449
  # Returns an random sample of size n, without replacement,
439
450
  # only with valid data.
@@ -597,24 +608,24 @@ module Statsample
597
608
  def proportion(v=1)
598
609
  frequencies[v].quo(@valid_data.size)
599
610
  end
600
- def summary(out="")
601
- out << sprintf("n valid:%d\n",n_valid)
602
- out << sprintf("factors:%s\n",factors.join(","))
603
- out << "mode:"+mode.to_s+"\n"
604
- out << "Distribution:\n"
605
- frequencies.sort.each{|k,v|
606
- key=labels.has_key?(k) ? labels[k]:k
607
- out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
608
- }
609
- if(@type==:ordinal)
610
- out << "median:"+median.to_s+"\n"
611
- end
612
- if(@type==:scale)
613
- out << "mean:"+mean.to_s+"\n"
614
- out << "sd:"+sd.to_s+"\n"
615
-
611
+ def report_building(b)
612
+ b.section(:name=>name) do |s|
613
+ s.text _("n :%d") % n
614
+ s.text _("n valid:%d") % n_valid
615
+ s.text _("factors:%s") % factors.join(",")
616
+ s.text _("mode: %s") % mode
617
+ s.table(:name=>_("Distribution")) do |t|
618
+ frequencies.sort.each do |k,v|
619
+ key=labels.has_key?(k) ? labels[k]:k
620
+ t.row [key,v, ("%0.2f%%" % (v.quo(n_valid)*100))]
621
+ end
622
+ end
623
+ s.text _("median: %s") % median.to_s if(@type==:ordinal)
624
+ if(@type==:scale)
625
+ s.text _("mean: %0.4f") % mean
626
+ s.text _("sd: %0.4f") % sd.to_s
627
+ end
616
628
  end
617
- out
618
629
  end
619
630
 
620
631
  # Variance of p, according to poblation size
@@ -817,8 +828,7 @@ module Statsample
817
828
  @gsl.mean
818
829
  end
819
830
  def variance_sample(m=nil) # :nodoc:
820
- check_type :scale
821
-
831
+ check_type :scale
822
832
  m||=mean
823
833
  @gsl.variance_m
824
834
  end
@@ -881,7 +891,7 @@ module Statsample
881
891
  alias_method :sdp, :standard_deviation_population
882
892
  alias_method :sds, :standard_deviation_sample
883
893
  alias_method :cov, :coefficient_of_variation
884
- alias_method :variance, :variance_sample
894
+ alias_method :variance, :variance_sample
885
895
  alias_method :sd, :standard_deviation_sample
886
896
  alias_method :ss, :sum_of_squares
887
897
  end