statsample 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
@@ -3,34 +3,27 @@ module Statsample
3
3
  # From Wikipedia:
4
4
  # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
5
  class F
6
- include GetText
7
- bindtextdomain("statsample")
8
-
9
6
  include Statsample::Test
10
7
 
11
- attr_reader :ss_num, :ss_den, :df_num, :df_den, :ss_total, :df_total
8
+ attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
12
9
  # Tails for probability (:both, :left or :right)
13
10
  attr_accessor :tails
14
11
  # Name of F analysis
15
12
  attr_accessor :name
16
- # Name of numerator
17
- attr_accessor :name_numerator
18
- # Name of denominator
19
- attr_accessor :name_denominator
20
13
 
21
14
  # Parameters:
22
- # * ss_num: explained variance / between group variance
23
- # * ss_den: unexplained variance / within group variance
24
- # * df_num: degrees of freedom for explained variance / k-1
25
- # * df_den: degrees of freedom for unexplained variance / n-k
26
- def initialize(ss_num, ss_den, df_num, df_den, opts=Hash.new)
27
- @ss_num=ss_num
28
- @ss_den=ss_den
15
+ # * var_num: variance numerator
16
+ # * var_den: variance denominator
17
+ # * df_num: degrees of freedom numerator
18
+ # * df_den: degrees of freedom denominator
19
+ def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
20
+ @var_num=var_num
21
+ @var_den=var_den
29
22
  @df_num=df_num
30
23
  @df_den=df_den
31
- @ss_total=ss_num+ss_den
24
+ @var_total=var_num+var_den
32
25
  @df_total=df_num+df_den
33
- opts_default={:tails=>:right, :name_numerator=>"Numerator", :name_denominator=>"Denominator", :name=>"F Test"}
26
+ opts_default={:tails=>:right, :name=>"F Test"}
34
27
  @opts=opts_default.merge(opts)
35
28
  raise "Tails should be right or left, not both" if @opts[:tails]==:both
36
29
  opts_default.keys.each {|k|
@@ -41,20 +34,17 @@ module Statsample
41
34
  ReportBuilder.new(:no_title=>true).add(self).to_text
42
35
  end
43
36
  def f
44
- (@ss_num.quo(@df_num)).quo(@ss_den.quo(@df_den))
37
+ @var_num.quo(@var_den)
38
+ end
39
+ def to_f
40
+ f
45
41
  end
46
42
  # probability
47
43
  def probability
48
44
  p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
49
45
  end
50
- def report_building(builder)#:nodoc:
51
- builder.section(:name=>@name) do |b|
52
- b.table(:name=>_("%s Table") % @name, :header=>%w{source ss df f p}.map {|v| _(v)}) do |t|
53
- t.row([@name_numerator, sprintf("%0.3f",@ss_num), @df_num, sprintf("%0.3f",f), sprintf("%0.3f", probability)])
54
- t.row([@name_denominator, sprintf("%0.3f",@ss_den), @df_den, "", ""])
55
- t.row([_("Total"), sprintf("%0.3f",@ss_total), @df_total,"",""])
56
- end
57
- end
46
+ def report_building(builder) #:nodoc:
47
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
58
48
  end
59
49
  end
60
50
  end
@@ -19,6 +19,7 @@ module Statsample
19
19
  # Reference:
20
20
  # * NIST/SEMATECH e-Handbook of Statistical Methods. Available on http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
21
21
  class Levene
22
+ include Statsample::Test
22
23
  # Degrees of freedom 1 (k-1)
23
24
  attr_reader :d1
24
25
  # Degrees of freedom 2 (n-k)
@@ -42,18 +43,13 @@ module Statsample
42
43
  def f
43
44
  @w
44
45
  end
45
-
46
- def report_building(g) # :nodoc:
47
- g.text @name
48
- g.text "F: #{"%0.4f" % f}"
49
- g.text "p: #{"%0.4f" % probability}"
50
-
46
+ def report_building(builder) # :nodoc:
47
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @d1, @d2, f, probability]
51
48
  end
52
49
  # Summary of results
53
50
  def summary
54
51
  ReportBuilder.new(:no_title=>true).add(self).to_text
55
52
  end
56
-
57
53
  def compute
58
54
  n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
59
55
 
@@ -86,7 +82,7 @@ module Statsample
86
82
  # Probability.
87
83
  # With H_0 = Sum(s2)=0, probability of getting a value of the test upper or equal to the obtained on the sample
88
84
  def probability
89
- 1-Distribution::F.cdf(f, @d1, @d2)
85
+ p_using_cdf(Distribution::F.cdf(f, @d1, @d2), :right)
90
86
  end
91
87
 
92
88
  end
@@ -31,7 +31,7 @@ module Statsample
31
31
  end
32
32
  num.quo(den)
33
33
  end
34
- # Degrees of freedom for equal variance
34
+ # Degrees of freedom for equal variance on t test
35
35
  def df_equal_variance(n1,n2)
36
36
  n1+n2-2
37
37
  end
@@ -67,7 +67,6 @@ module Statsample
67
67
  class OneSample
68
68
  include Math
69
69
  include Statsample::Test
70
- include DirtyMemoize
71
70
  # Options
72
71
  attr_accessor :opts
73
72
  # Name of test
@@ -76,15 +75,9 @@ module Statsample
76
75
  attr_accessor :u
77
76
  # Degress of freedom
78
77
  attr_reader :df
79
- # Value of t
80
- attr_reader :t
81
- # Probability
82
- attr_reader :probability
83
78
  # Tails for probability (:both, :left or :right)
84
79
  attr_accessor :tails
85
80
 
86
- dirty_writer :u, :tails
87
- dirty_memoize :t, :probability
88
81
  # Create a One Sample T Test
89
82
  # Options:
90
83
  # * :u = Mean to compare. Default= 0
@@ -100,14 +93,14 @@ module Statsample
100
93
  @df= @vector.n_valid-1
101
94
  @t=nil
102
95
  end
103
-
96
+ def t
97
+ T.one_sample(@vector.mean, @u, @vector.sd, @vector.n_valid)
98
+ end
104
99
 
105
- # Set t and probability for given u
106
- def compute
107
- @t = T.one_sample(@vector.mean, @u, @vector.sd, @vector.n_valid)
108
- @probability = p_using_cdf(Distribution::T.cdf(@t, @df), tails)
100
+ def probability
101
+ p_using_cdf(Distribution::T.cdf(t, @df), tails)
109
102
  end
110
- # Presents summary of analysis
103
+ # Summary of analysis
111
104
  #
112
105
  def summary
113
106
  ReportBuilder.new(:no_title=>true).add(self).to_text
@@ -154,6 +147,8 @@ module Statsample
154
147
  include Math
155
148
  include Statsample::Test
156
149
  include DirtyMemoize
150
+ include GetText
151
+ bindtextdomain("statsample")
157
152
  # Options
158
153
  attr_accessor :opts
159
154
  # Name of test
@@ -204,25 +199,36 @@ module Statsample
204
199
  @probability_not_equal_variance = p_using_cdf(Distribution::T.cdf(@t_not_equal_variance, @df_not_equal_variance), tails)
205
200
 
206
201
  end
202
+ # Cohen's d is a measure of effect size. Its defined as the difference between two means divided by a standard deviation for the data
203
+ def d
204
+ n1=@v1.n_valid
205
+ n2=@v2.n_valid
206
+ num=@v1.mean-@v2.mean
207
+ den=Math::sqrt( ((n1-1)*@v1.sd+(n2-1)*@v2.sd).quo(n1+n2))
208
+ num.quo(den)
209
+ end
210
+
207
211
  # Presents summary of analysis
208
- #
209
212
  def summary
210
213
  ReportBuilder.new(:no_title=>true).add(self).to_text
211
214
  end
215
+
212
216
  def report_building(b) # :nodoc:
213
217
  b.section(:name=>@name) {|g|
214
- g.table(:name=>"Mean and standard deviation", :header=>["Variable", "m", "sd","n"]) {|t|
215
- t.row([1,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd,@v1.n_valid])
216
- t.row([2,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
217
- }
218
- g.section(:name=>"Levene Test") {|g1|
219
- g1.parse_element(Statsample::Test.levene([@v1,@v2]))
218
+ g.table(:name=>_("Mean and standard deviation"), :header=>["Variable", "m", "sd","n"]) {|t|
219
+ t.row([@v1.name,"%0.4f" % @v1.mean,"%0.4f" % @v1.sd,@v1.n_valid])
220
+ t.row([@v2.name,"%0.4f" % @v2.mean,"%0.4f" % @v2.sd, @v2.n_valid])
220
221
  }
222
+ g.parse_element(Statsample::Test.levene([@v1,@v2],:name=>_("Levene test for equality of variances")))
221
223
 
222
- g.table(:name=>"T statistics",:header=>["Type","t","df", "p (#{tails} tails)"]) {|t|
223
- t.row(["Equal variance", "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
224
- t.row(["Non equal variance", "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
224
+ g.table(:name=>_("T statistics"),:header=>["Type","t","df", "p (#{tails} tails)"].map{|v| _(v)}) {|t|
225
+ t.row([_("Equal variance"), "%0.4f" % t_equal_variance, df_equal_variance, "%0.4f" % probability_equal_variance])
226
+ t.row([_("Non equal variance"), "%0.4f" % t_not_equal_variance, "%0.4f" % df_not_equal_variance, "%0.4f" % probability_not_equal_variance])
225
227
  }
228
+ g.table(:name=>_("Effect size")) do |t|
229
+ t.row ['x1-x2', "%0.4f" % (@v1.mean-@v2.mean)]
230
+ t.row ['d', "%0.4f" % d]
231
+ end
226
232
  }
227
233
  end
228
234
  end
@@ -107,14 +107,15 @@ module Statsample
107
107
  attr_reader :u
108
108
  # Value of compensation for ties (useful for demostration)
109
109
  attr_reader :t
110
+ # Name of test
111
+ attr_accessor :name
110
112
  #
111
113
  # Create a new U Mann-Whitney test
112
114
  # Params: Two Statsample::Vectors
113
115
  #
114
- def initialize(v1,v2)
116
+ def initialize(v1,v2, opts=Hash.new)
115
117
  @n1=v1.valid_data.size
116
118
  @n2=v2.valid_data.size
117
-
118
119
  data=(v1.valid_data+v2.valid_data).to_scale
119
120
  groups=(([0]*@n1)+([1]*@n2)).to_vector
120
121
  ds={'g'=>groups, 'data'=>data}.to_dataset
@@ -132,11 +133,17 @@ module Statsample
132
133
  @u1=r1-((@n1*(@n1+1)).quo(2))
133
134
  @u2=r2-((@n2*(@n2+1)).quo(2))
134
135
  @u=(u1<u2) ? u1 : u2
136
+ opts_default={:name=>"Mann-Whitney's U"}
137
+ @opts=opts_default.merge(opts)
138
+ opts_default.keys.each {|k|
139
+ send("#{k}=", @opts[k])
140
+ }
141
+
135
142
  end
136
143
  # Report results.
137
144
  def summary
138
145
  out=<<-HEREDOC
139
- Mann-Whitney U
146
+ @name
140
147
  Sum of ranks v1: #{@r1.to_f}
141
148
  Sum of ranks v1: #{@r2.to_f}
142
149
  U Value: #{@u.to_f}
@@ -152,7 +159,7 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
152
159
  end
153
160
  # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
154
161
  # Uses u_sampling_distribution_as62
155
- def exact_probability
162
+ def probability_exact
156
163
  dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
157
164
  sum=0
158
165
  (0..@u.to_i).each {|i|
@@ -190,9 +197,9 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
190
197
  (@u-mu).quo(ou)
191
198
  end
192
199
  # Assuming H_0, the proportion of cdf with values of U lower
193
- # than the sample.
200
+ # than the sample, using normal approximation.
194
201
  # Use with more than 30 cases per group.
195
- def z_probability
202
+ def probability_z
196
203
  (1-Distribution::Normal.cdf(z.abs()))*2
197
204
  end
198
205
  end
@@ -7,7 +7,7 @@ class Array
7
7
  end
8
8
  # Creates a new Statsample::Vector object of type :scale
9
9
  def to_scale(*args)
10
- Statsample::Vector.new(self,:scale,*args)
10
+ Statsample::Vector.new(self, :scale,*args)
11
11
  end
12
12
  end
13
13
 
@@ -24,12 +24,7 @@ module Statsample
24
24
  class Vector
25
25
  include Enumerable
26
26
  include Writable
27
- # DEFAULT OPTIONS
28
- DEFAULT_OPTIONS={
29
- :missing_values=>[],
30
- :today_values=>['NOW','TODAY', :NOW, :TODAY],
31
- :labels=>{}
32
- }
27
+ include Summarizable
33
28
  # Level of measurement. Could be :nominal, :ordinal or :scale
34
29
  attr_reader :type
35
30
  # Original data.
@@ -50,23 +45,39 @@ module Statsample
50
45
  attr_reader :gsl
51
46
  # Change label for specific values
52
47
  attr_accessor :labels
48
+ # Name of vector. Should be used for output by many classes
49
+ attr_accessor :name
50
+
53
51
  #
54
52
  # Creates a new Vector object.
55
- # [data] Array of data.
56
- # [type] Level of meausurement. See Vector#type
57
- # [opts] Options
58
- # [:missing_values] Array of missing values. See Vector#missing_values
59
- # [:today_values] Array of 'today' values. See Vector#today_values
60
- # [:labels] Labels for data values
53
+ # * <tt>data</tt> Array of data.
54
+ # * <tt>type</tt> Level of meausurement. See Vector#type
55
+ # * <tt>opts</tt> Hash of options
56
+ # * <tt>:missing_values</tt> Array of missing values. See Vector#missing_values
57
+ # * <tt>:today_values</tt> Array of 'today' values. See Vector#today_values
58
+ # * <tt>:labels</tt> Labels for data values
59
+ # * <tt>:name</tt> Name of vector
61
60
  #
62
61
  def initialize(data=[], type=:nominal, opts=Hash.new)
63
62
  raise "Data should be an array" unless data.is_a? Array
64
63
  @data=data
65
64
  @type=type
66
- opts=DEFAULT_OPTIONS.merge(opts)
67
- @missing_values=opts[:missing_values]
68
- @labels=opts[:labels]
69
- @today_values=opts[:today_values]
65
+ opts_default={
66
+ :missing_values=>[],
67
+ :today_values=>['NOW','TODAY', :NOW, :TODAY],
68
+ :labels=>{},
69
+ :name=>nil
70
+ }
71
+ @opts=opts_default.merge(opts)
72
+ if @opts[:name].nil?
73
+ @@n_table||=0
74
+ @@n_table+=1
75
+ @opts[:name]="Vector #{@@n_table}"
76
+ end
77
+ @missing_values=@opts[:missing_values]
78
+ @labels=@opts[:labels]
79
+ @today_values=@opts[:today_values]
80
+ @name=@opts[:name]
70
81
  @valid_data=[]
71
82
  @data_with_nils=[]
72
83
  @date_data_with_nils=[]
@@ -80,12 +91,12 @@ module Statsample
80
91
  # Note: data, missing_values and labels are duplicated, so
81
92
  # changes on original vector doesn't propages to copies.
82
93
  def dup
83
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
94
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
84
95
  end
85
96
  # Returns an empty duplicate of the vector. Maintains the type,
86
97
  # missing values and labels.
87
98
  def dup_empty
88
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup)
99
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
89
100
  end
90
101
  # Raises an exception if type of vector is inferior to t type
91
102
  def check_type(t)
@@ -128,8 +139,8 @@ module Statsample
128
139
  # Vector equality.
129
140
  # Two vector will be the same if their data, missing values, type, labels are equals
130
141
  def ==(v2)
131
- raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
132
- @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels=v2.labels
142
+ raise TypeError,"Argument should be a Vector" unless v2.instance_of? Statsample::Vector
143
+ @data==v2.data and @missing_values==v2.missing_values and @type==v2.type and @labels==v2.labels
133
144
  end
134
145
 
135
146
  def _dump(i) # :nodoc:
@@ -189,8 +200,8 @@ module Statsample
189
200
  # Vector.set_valid_data at the end of your insertion cycle
190
201
  #
191
202
  def add(v,update_valid=true)
192
- @data.push(v)
193
- set_valid_data if update_valid
203
+ @data.push(v)
204
+ set_valid_data if update_valid
194
205
  end
195
206
  # Update valid_data, missing_data, data_with_nils and gsl
196
207
  # at the end of an insertion.
@@ -208,14 +219,14 @@ module Statsample
208
219
  # v.valid_data
209
220
  # => [2,3]
210
221
  def set_valid_data
211
- @valid_data.clear
212
- @missing_data.clear
213
- @data_with_nils.clear
214
- @date_data_with_nils.clear
215
- @gsl=nil
216
- set_valid_data_intern
217
- set_scale_data if(@type==:scale)
218
- set_date_data if(@type==:date)
222
+ @valid_data.clear
223
+ @missing_data.clear
224
+ @data_with_nils.clear
225
+ @date_data_with_nils.clear
226
+ @gsl=nil
227
+ set_valid_data_intern
228
+ set_scale_data if(@type==:scale)
229
+ set_date_data if(@type==:date)
219
230
  end
220
231
 
221
232
  if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
@@ -228,21 +239,21 @@ module Statsample
228
239
  end
229
240
  end
230
241
  def _set_valid_data_intern #:nodoc:
231
- @data.each do |n|
232
- if is_valid? n
233
- @valid_data.push(n)
234
- @data_with_nils.push(n)
235
- else
236
- @data_with_nils.push(nil)
237
- @missing_data.push(n)
242
+ @data.each do |n|
243
+ if is_valid? n
244
+ @valid_data.push(n)
245
+ @data_with_nils.push(n)
246
+ else
247
+ @data_with_nils.push(nil)
248
+ @missing_data.push(n)
249
+ end
238
250
  end
239
- end
240
- @has_missing_data=@missing_data.size>0
251
+ @has_missing_data=@missing_data.size>0
241
252
  end
242
253
 
243
254
  # Retrieves true if data has one o more missing values
244
255
  def has_missing_data?
245
- @has_missing_data
256
+ @has_missing_data
246
257
  end
247
258
  # Retrieves label for value x. Retrieves x if
248
259
  # no label defined.
@@ -251,14 +262,14 @@ module Statsample
251
262
  end
252
263
  # Returns a Vector with data with labels replaced by the label.
253
264
  def vector_labeled
254
- d=@data.collect{|x|
255
- if @labels.has_key? x
256
- @labels[x]
257
- else
258
- x
259
- end
260
- }
261
- Vector.new(d,@type)
265
+ d=@data.collect{|x|
266
+ if @labels.has_key? x
267
+ @labels[x]
268
+ else
269
+ x
270
+ end
271
+ }
272
+ Vector.new(d,@type)
262
273
  end
263
274
  # Size of total data
264
275
  def size
@@ -427,13 +438,13 @@ module Statsample
427
438
  # In all the trails, every item have the same probability
428
439
  # of been selected.
429
440
  def sample_with_replacement(sample=1)
430
- if(@type!=:scale or !Statsample.has_gsl?)
431
- vds=@valid_data.size
432
- (0...sample).collect{ @valid_data[rand(vds)] }
433
- else
434
- r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
435
- r.sample(@gsl, sample).to_a
436
- end
441
+ if(@type!=:scale or !Statsample.has_gsl?)
442
+ vds=@valid_data.size
443
+ (0...sample).collect{ @valid_data[rand(vds)] }
444
+ else
445
+ r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
446
+ r.sample(@gsl, sample).to_a
447
+ end
437
448
  end
438
449
  # Returns an random sample of size n, without replacement,
439
450
  # only with valid data.
@@ -597,24 +608,24 @@ module Statsample
597
608
  def proportion(v=1)
598
609
  frequencies[v].quo(@valid_data.size)
599
610
  end
600
- def summary(out="")
601
- out << sprintf("n valid:%d\n",n_valid)
602
- out << sprintf("factors:%s\n",factors.join(","))
603
- out << "mode:"+mode.to_s+"\n"
604
- out << "Distribution:\n"
605
- frequencies.sort.each{|k,v|
606
- key=labels.has_key?(k) ? labels[k]:k
607
- out << sprintf("%s : %s (%0.2f%%)\n",key,v, (v.quo(n_valid))*100)
608
- }
609
- if(@type==:ordinal)
610
- out << "median:"+median.to_s+"\n"
611
- end
612
- if(@type==:scale)
613
- out << "mean:"+mean.to_s+"\n"
614
- out << "sd:"+sd.to_s+"\n"
615
-
611
+ def report_building(b)
612
+ b.section(:name=>name) do |s|
613
+ s.text _("n :%d") % n
614
+ s.text _("n valid:%d") % n_valid
615
+ s.text _("factors:%s") % factors.join(",")
616
+ s.text _("mode: %s") % mode
617
+ s.table(:name=>_("Distribution")) do |t|
618
+ frequencies.sort.each do |k,v|
619
+ key=labels.has_key?(k) ? labels[k]:k
620
+ t.row [key,v, ("%0.2f%%" % (v.quo(n_valid)*100))]
621
+ end
622
+ end
623
+ s.text _("median: %s") % median.to_s if(@type==:ordinal)
624
+ if(@type==:scale)
625
+ s.text _("mean: %0.4f") % mean
626
+ s.text _("sd: %0.4f") % sd.to_s
627
+ end
616
628
  end
617
- out
618
629
  end
619
630
 
620
631
  # Variance of p, according to poblation size
@@ -817,8 +828,7 @@ module Statsample
817
828
  @gsl.mean
818
829
  end
819
830
  def variance_sample(m=nil) # :nodoc:
820
- check_type :scale
821
-
831
+ check_type :scale
822
832
  m||=mean
823
833
  @gsl.variance_m
824
834
  end
@@ -881,7 +891,7 @@ module Statsample
881
891
  alias_method :sdp, :standard_deviation_population
882
892
  alias_method :sds, :standard_deviation_sample
883
893
  alias_method :cov, :coefficient_of_variation
884
- alias_method :variance, :variance_sample
894
+ alias_method :variance, :variance_sample
885
895
  alias_method :sd, :standard_deviation_sample
886
896
  alias_method :ss, :sum_of_squares
887
897
  end