statsample 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
@@ -6,7 +6,7 @@ module Statsample
6
6
  class << self
7
7
  # Covariance between two vectors
8
8
  def covariance(v1,v2)
9
- v1a,v2a=Statsample.only_valid(v1,v2)
9
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
10
10
  return nil if v1a.size==0
11
11
  if Statsample.has_gsl?
12
12
  GSL::Stats::covariance(v1a.gsl, v2a.gsl)
@@ -16,7 +16,7 @@ module Statsample
16
16
  end
17
17
  # Estimate the ML between two dichotomic vectors
18
18
  def maximum_likehood_dichotomic(pred,real)
19
- preda,reala=Statsample.only_valid(pred,real)
19
+ preda,reala=Statsample.only_valid_clone(pred,real)
20
20
  sum=0
21
21
  pred.each_index{|i|
22
22
  sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
@@ -29,14 +29,14 @@ module Statsample
29
29
  sum_of_squares(v1a,v2a) / (v1a.size-1)
30
30
  end
31
31
  def sum_of_squares(v1,v2)
32
- v1a,v2a=Statsample.only_valid(v1,v2)
32
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
33
33
  m1=v1a.mean
34
34
  m2=v2a.mean
35
35
  (v1a.size).times.inject(0) {|ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2)}
36
36
  end
37
37
  # Calculate Pearson correlation coefficient (r) between 2 vectors
38
38
  def pearson(v1,v2)
39
- v1a,v2a=Statsample.only_valid(v1,v2)
39
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
40
40
  return nil if v1a.size ==0
41
41
  if Statsample.has_gsl?
42
42
  GSL::Stats::correlation(v1a.gsl, v2a.gsl)
@@ -45,7 +45,7 @@ module Statsample
45
45
  end
46
46
  end
47
47
  def pearson_slow(v1,v2) # :nodoc:
48
- v1a,v2a=Statsample.only_valid(v1,v2)
48
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
49
49
  # Calculate sum of squares
50
50
  ss=sum_of_squares(v1a,v2a)
51
51
  ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
@@ -60,7 +60,7 @@ module Statsample
60
60
  # Retrieves the value for t test for a pearson correlation
61
61
  # between two vectors to test the null hipothesis of r=0
62
62
  def t_pearson(v1,v2)
63
- v1a,v2a=Statsample.only_valid(v1,v2)
63
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
64
64
  r=pearson(v1a,v2a)
65
65
  if(r==1.0)
66
66
  0
@@ -117,7 +117,7 @@ module Statsample
117
117
  # Correlation between v1 and v2, controling the effect of
118
118
  # control on both.
119
119
  def partial_correlation(v1,v2,control)
120
- v1a,v2a,cona=Statsample.only_valid(v1,v2,control)
120
+ v1a,v2a,cona=Statsample.only_valid_clone(v1,v2,control)
121
121
  rv1v2=pearson(v1a,v2a)
122
122
  rv1con=pearson(v1a,cona)
123
123
  rv2con=pearson(v2a,cona)
@@ -129,13 +129,20 @@ module Statsample
129
129
  # Order of rows and columns depends on Dataset#fields order
130
130
 
131
131
  def covariance_matrix(ds)
132
+ cache={}
132
133
  matrix=ds.collect_matrix do |row,col|
133
134
  if (ds[row].type!=:scale or ds[col].type!=:scale)
134
135
  nil
135
136
  elsif row==col
136
137
  ds[row].variance
137
138
  else
138
- covariance(ds[row], ds[col])
139
+ if cache[[col,row]].nil?
140
+ cov=covariance(ds[row],ds[col])
141
+ cache[[row,col]]=cov
142
+ cov
143
+ else
144
+ cache[[col,row]]
145
+ end
139
146
  end
140
147
  end
141
148
  matrix.extend CovariateMatrix
@@ -147,13 +154,20 @@ module Statsample
147
154
  # Order of rows and columns depends on Dataset#fields order
148
155
 
149
156
  def correlation_matrix(ds)
157
+ cache={}
150
158
  cm=ds.collect_matrix do |row,col|
151
159
  if row==col
152
160
  1.0
153
161
  elsif (ds[row].type!=:scale or ds[col].type!=:scale)
154
162
  nil
155
163
  else
156
- pearson(ds[row],ds[col])
164
+ if cache[[col,row]].nil?
165
+ r=pearson(ds[row],ds[col])
166
+ cache[[row,col]]=r
167
+ r
168
+ else
169
+ cache[[col,row]]
170
+ end
157
171
  end
158
172
  end
159
173
  cm.extend(Statsample::CovariateMatrix)
@@ -167,7 +181,7 @@ module Statsample
167
181
  if row==col
168
182
  ds[row].valid_data.size
169
183
  else
170
- rowa,rowb=Statsample.only_valid(ds[row],ds[col])
184
+ rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col])
171
185
  rowa.size
172
186
  end
173
187
  end
@@ -179,7 +193,7 @@ module Statsample
179
193
  def correlation_probability_matrix(ds, tails=:both)
180
194
  rows=ds.fields.collect do |row|
181
195
  ds.fields.collect do |col|
182
- v1a,v2a=Statsample.only_valid(ds[row],ds[col])
196
+ v1a,v2a=Statsample.only_valid_clone(ds[row],ds[col])
183
197
  (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
184
198
  end
185
199
  end
@@ -188,7 +202,7 @@ module Statsample
188
202
 
189
203
  # Spearman ranked correlation coefficient (rho) between 2 vectors
190
204
  def spearman(v1,v2)
191
- v1a,v2a=Statsample.only_valid(v1,v2)
205
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
192
206
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
193
207
  pearson(v1r,v2r)
194
208
  end
@@ -206,7 +220,7 @@ module Statsample
206
220
  # Kendall Rank Correlation Coefficient.
207
221
  # Based on Hervé Adbi article
208
222
  def tau_a(v1,v2)
209
- v1a,v2a=Statsample.only_valid(v1,v2)
223
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
210
224
  n=v1.size
211
225
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
212
226
  o1=ordered_pairs(v1r)
@@ -10,18 +10,29 @@ module Statsample
10
10
  # Polychoric correlation matrix.
11
11
  # Order of rows and columns depends on Dataset#fields order
12
12
  def self.polychoric_correlation_matrix(ds)
13
- ds.collect_matrix do |row,col|
13
+ cache={}
14
+ matrix=ds.collect_matrix do |row,col|
14
15
  if row==col
15
16
  1.0
16
17
  else
17
18
  begin
18
- polychoric(ds[row],ds[col])
19
+ if cache[[col,row]].nil?
20
+ poly=polychoric(ds[row],ds[col])
21
+ cache[[row,col]]=poly
22
+ poly
23
+ else
24
+ cache[[col,row]]
25
+ end
19
26
  rescue RuntimeError
20
27
  nil
21
28
  end
22
29
  end
23
30
  end
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
24
34
  end
35
+
25
36
  # = Polychoric correlation.
26
37
  #
27
38
  # The <em>polychoric</em> correlation is a measure of
@@ -83,6 +94,7 @@ module Statsample
83
94
 
84
95
 
85
96
  # Method of calculation of polychoric series.
97
+ # <tt>:two_step</tt> used by default.
86
98
  #
87
99
  # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
88
100
  # :polychoric_series:: polychoric series estimate, using
@@ -107,7 +119,7 @@ module Statsample
107
119
  EPSILON=1e-6
108
120
  MINIMIZER_TYPE_TWO_STEP="brent"
109
121
  MINIMIZER_TYPE_JOINT="nmsimplex"
110
- def new_with_vectors(v1,v2)
122
+ def self.new_with_vectors(v1,v2)
111
123
  Polychoric.new(Crosstab.new(v1,v2).to_matrix)
112
124
  end
113
125
  # Params:
@@ -249,6 +261,7 @@ module Statsample
249
261
  b=(j==@nc-1) ? 100: beta[j]
250
262
  #puts "a:#{a} b:#{b}"
251
263
  pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
264
+
252
265
  end
253
266
  pc[i][j] = pd[i][j]
254
267
  pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
@@ -256,7 +269,7 @@ module Statsample
256
269
  pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
257
270
  res= pd[i][j]
258
271
  #puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
259
- if (res==0)
272
+ if (res<=0)
260
273
  # puts "Correccion"
261
274
  res=1e-16
262
275
  end
@@ -328,7 +341,7 @@ module Statsample
328
341
  min.epsilon=@epsilon
329
342
  min.expected=0
330
343
  min.iterate
331
- @log+=min.log
344
+ @log+=min.log.to_table.to_s
332
345
  @r=min.x_minimum
333
346
  @loglike_model=-min.f_minimum
334
347
  puts @log if @debug
@@ -4,50 +4,44 @@ module Statsample
4
4
  # The first vector will be at rows and the second will the the columns
5
5
  #
6
6
  class Crosstab
7
- include GetText
8
- bindtextdomain("statsample")
7
+ include Summarizable
9
8
  attr_reader :v_rows, :v_cols
10
9
  attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total
11
10
  def initialize(v1, v2, opts=Hash.new)
12
- raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
13
- raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
14
- @v_rows, @v_cols=Statsample.only_valid(v1,v2)
15
- @cases=@v_rows.size
16
- @row_label=nil
17
- @column_label=nil
18
- @name=nil
19
- @percentage_row=@percentage_column=@percentage_total=false
20
- opts.each{|k,v|
21
- self.send("#{k}=",v) if self.respond_to? k
22
- }
23
- if(@name.nil?)
24
- if (!@row_label.nil? and !@column_label.nil?)
25
- @name=_("Crosstab %s - %s") % [@row_label, @column_label]
26
- else
27
- @name=_("Crosstab")
28
- end
29
- end
11
+ raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
12
+ raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
13
+ @v_rows, @v_cols=Statsample.only_valid_clone(v1,v2)
14
+ @cases=@v_rows.size
15
+ @row_label=v1.name
16
+ @column_label=v2.name
17
+ @name=nil
18
+ @percentage_row=@percentage_column=@percentage_total=false
19
+ opts.each{|k,v|
20
+ self.send("#{k}=",v) if self.respond_to? k
21
+ }
22
+ @name||=_("Crosstab %s - %s") % [@row_label, @column_label]
30
23
  end
31
24
  def rows_names
32
- @v_rows.factors.sort
25
+ @v_rows.factors.sort
33
26
  end
34
27
  def cols_names
35
- @v_cols.factors.sort
28
+ @v_cols.factors.sort
36
29
  end
37
30
  def rows_total
38
- @v_rows.frequencies
31
+ @v_rows.frequencies
39
32
  end
40
33
  def cols_total
41
- @v_cols.frequencies
34
+ @v_cols.frequencies
42
35
  end
36
+
43
37
  def frequencies
44
- base=rows_names.inject([]){|s,row|
45
- s+=cols_names.collect{|col| [row,col]}
46
- }.inject({}) {|s,par|
47
- s[par]=0
48
- s
49
- }
50
- base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
38
+ base=rows_names.inject([]){|s,row|
39
+ s+=cols_names.collect{|col| [row,col]}
40
+ }.inject({}) {|s,par|
41
+ s[par]=0
42
+ s
43
+ }
44
+ base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
51
45
  end
52
46
  def to_matrix
53
47
  f=frequencies
@@ -93,52 +87,50 @@ module Statsample
93
87
  def cols_empty_hash
94
88
  cols_names.inject({}) {|a,x| a[x]=0;a}
95
89
  end
96
- def report_building(generator)
97
- anchor=generator.toc_entry(_("Crosstab: ")+name)
98
- generator.html "<div class='crosstab'>"+_("Crosstab")+" #{@name}<a name='#{anchor}'></a>"
99
- fq=frequencies
100
- rn=rows_names
101
- cn=cols_names
102
- total=0
103
- total_cols=cols_empty_hash
104
- generator.text "Chi Square: #{chi_square}"
105
- generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
106
- generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
107
-
108
- t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
109
- rn.each do |row|
110
- total_row=0
111
- t_row=[@v_rows.labeling(row)]
112
- cn.each do |col|
113
- data=fq[[row,col]]
114
- total_row+=fq[[row,col]]
115
- total+=fq[[row,col]]
116
- total_cols[col]+=fq[[row,col]]
117
- t_row.push(data)
90
+ def report_building(builder)
91
+ builder.section(:name=>@name) do |generator|
92
+ fq=frequencies
93
+ rn=rows_names
94
+ cn=cols_names
95
+ total=0
96
+ total_cols=cols_empty_hash
97
+ generator.text "Chi Square: #{chi_square}"
98
+ generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
99
+ generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
100
+
101
+ t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
102
+ rn.each do |row|
103
+ total_row=0
104
+ t_row=[@v_rows.labeling(row)]
105
+ cn.each do |col|
106
+ data=fq[[row,col]]
107
+ total_row+=fq[[row,col]]
108
+ total+=fq[[row,col]]
109
+ total_cols[col]+=fq[[row,col]]
110
+ t_row.push(data)
111
+ end
112
+ t_row.push(total_row)
113
+ t.row(t_row)
114
+ end
115
+ t.hr
116
+ t_row=[_("Total")]
117
+ cn.each do |v|
118
+ t_row.push(total_cols[v])
118
119
  end
119
- t_row.push(total_row)
120
+ t_row.push(total)
120
121
  t.row(t_row)
122
+ generator.parse_element(t)
123
+
124
+ if(@percentage_row)
125
+ table_percentage(generator,:row)
126
+ end
127
+ if(@percentage_column)
128
+ table_percentage(generator,:column)
129
+ end
130
+ if(@percentage_total)
131
+ table_percentage(generator,:total)
132
+ end
121
133
  end
122
- t.hr
123
- t_row=[_("Total")]
124
- cn.each do |v|
125
- t_row.push(total_cols[v])
126
- end
127
- t_row.push(total)
128
- t.row(t_row)
129
- generator.parse_element(t)
130
-
131
- if(@percentage_row)
132
- table_percentage(generator,:row)
133
- end
134
- if(@percentage_column)
135
- table_percentage(generator,:column)
136
- end
137
- if(@percentage_total)
138
- table_percentage(generator,:total)
139
- end
140
-
141
- generator.html("</div>")
142
134
  end
143
135
 
144
136
 
@@ -56,16 +56,17 @@ module Statsample
56
56
 
57
57
  class Dataset
58
58
  include Writable
59
+ include Summarizable
59
60
  # Hash of Statsample::Vector
60
61
  attr_reader :vectors
61
62
  # Ordered names of vectors
62
63
  attr_reader :fields
64
+ # Name of dataset
65
+ attr_accessor:name
63
66
  # Number of cases
64
67
  attr_reader :cases
65
68
  # Location of pointer on enumerations methods (like #each)
66
69
  attr_reader :i
67
- # Deprecated: Label of vectors
68
- attr_accessor :labels
69
70
 
70
71
  # Generates a new dataset, using three vectors
71
72
  # - Rows
@@ -122,10 +123,12 @@ module Statsample
122
123
  # [fields] Array of names for vectors. Is only used for set the
123
124
  # order of variables. If empty, vectors keys on alfabethic order as
124
125
  # used as fields
125
- # [labels] Hash to set names for fields.
126
126
 
127
127
  #
128
- def initialize(vectors={}, fields=[], labels={})
128
+ def initialize(vectors={}, fields=[])
129
+ @@n_dataset||=0
130
+ @@n_dataset+=1
131
+ @name=_("Dataset %d") % @@n_dataset
129
132
  if vectors.instance_of? Array
130
133
  @fields=vectors.dup
131
134
  @vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
@@ -137,7 +140,6 @@ module Statsample
137
140
  check_length
138
141
  end
139
142
  @i=nil
140
- @labels=labels
141
143
  end
142
144
  def to_gsl_matrix
143
145
  matrix=GSL::Matrix.alloc(cases,@vectors.size)
@@ -146,11 +148,7 @@ module Statsample
146
148
  end
147
149
  matrix
148
150
  end
149
- # Retrieves label for a vector, giving a field name.
150
- def label(v_id)
151
- raise "Vector #{v} doesn't exists" unless @fields.include? v_id
152
- @labels[v_id].nil? ? v_id : @labels[v_id]
153
- end
151
+
154
152
  # Creates a copy of the given dataset, deleting all the cases with
155
153
  # missing data on one of the vectors
156
154
  def dup_only_valid
@@ -172,7 +170,8 @@ module Statsample
172
170
  @fields.slice(@fields.index(from)..@fields.index(to))
173
171
  end
174
172
  # Returns a duplicate of the Database
175
- # If fields given, only include those vectors
173
+ # If fields given, only include those vectors.
174
+ # Every vector will be dup
176
175
  def dup(*fields_to_include)
177
176
  if fields_to_include.size==1 and fields_to_include[0].is_a? Array
178
177
  fields_to_include=fields_to_include[0]
@@ -180,14 +179,27 @@ module Statsample
180
179
  fields_to_include=@fields if fields_to_include.size==0
181
180
  vectors={}
182
181
  fields=[]
183
- new_labels={}
184
182
  fields_to_include.each{|f|
185
183
  raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
186
184
  vectors[f]=@vectors[f].dup
187
- new_labels[f]=@labels[f]
188
185
  fields.push(f)
189
186
  }
190
- Dataset.new(vectors,fields,new_labels)
187
+ Dataset.new(vectors,fields)
188
+ end
189
+ # Returns a shallow copy of Dataset.
190
+ # Object id will be distinct, but @vectors will be the same.
191
+ def clone(*fields_to_include)
192
+ if fields_to_include.size==1 and fields_to_include[0].is_a? Array
193
+ fields_to_include=fields_to_include[0]
194
+ end
195
+ fields_to_include=@fields.dup if fields_to_include.size==0
196
+ ds=Dataset.new
197
+ fields_to_include.each{|f|
198
+ raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
199
+ ds[f]=@vectors[f]
200
+ }
201
+ ds.fields=fields_to_include
202
+ ds
191
203
  end
192
204
  # Creates a copy of the given dataset, without data on vectors
193
205
  def dup_empty
@@ -195,7 +207,7 @@ module Statsample
195
207
  a[v[0]]=v[1].dup_empty
196
208
  a
197
209
  }
198
- Dataset.new(vectors,@fields.dup,@labels.dup)
210
+ Dataset.new(vectors,@fields.dup)
199
211
  end
200
212
  # Merge vectors from two datasets
201
213
  # In case of name collition, the vectors names are changed to
@@ -216,14 +228,14 @@ module Statsample
216
228
  ds_new.update_valid_data
217
229
  ds_new
218
230
  end
219
- # Returns a dataset with standarized data
220
- def standarize
221
- ds=dup()
222
- ds.fields.each {|f|
223
- ds[f]=ds[f].vector_standarized
224
- }
225
- ds
226
- end
231
+ # Returns a dataset with standarized data
232
+ def standarize
233
+ ds=dup()
234
+ ds.fields.each do |f|
235
+ ds[f]=ds[f].vector_standarized
236
+ end
237
+ ds
238
+ end
227
239
  # Generate a matrix, based on fields of dataset
228
240
  def collect_matrix
229
241
  rows=@fields.collect{|row|
@@ -233,7 +245,7 @@ module Statsample
233
245
  }
234
246
  Matrix.rows(rows)
235
247
  end
236
- # We have the same datasets if the labels and vectors are the same
248
+ # We have the same datasets if vectors and fields are the same
237
249
  def ==(d2)
238
250
  @vectors==d2.vectors and @fields==d2.fields
239
251
  end
@@ -305,12 +317,12 @@ module Statsample
305
317
  @vectors.delete(name)
306
318
  end
307
319
 
308
- def add_vectors_by_split_recode(name,join='-',sep=Statsample::SPLIT_TOKEN)
309
- split=@vectors[name].split_by_separator(sep)
320
+ def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
321
+ split=@vectors[name_].split_by_separator(sep)
310
322
  i=1
311
323
  split.each{|k,v|
312
- new_field=name+join+i.to_s
313
- @labels[new_field]=name+":"+k
324
+ new_field=name_+join+i.to_s
325
+ v.name=name_+":"+k
314
326
  add_vector(new_field,v)
315
327
  i+=1
316
328
  }
@@ -505,15 +517,13 @@ module Statsample
505
517
  end
506
518
  # Returns the vector named i
507
519
  def[](i)
508
- if i.is_a? String
509
- raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
510
- @vectors[i]
511
- elsif i.is_a? Range
520
+ if i.is_a? Range
512
521
  fields=from_to(i.begin,i.end)
513
522
  vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
514
523
  ds=Dataset.new(vectors,fields)
515
524
  else
516
- raise ArgumentError, "You need a String or a Range"
525
+ raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
526
+ @vectors[i]
517
527
  end
518
528
  end
519
529
  # Retrieves a Statsample::Vector, based on the result
@@ -702,7 +712,7 @@ module Statsample
702
712
  vr
703
713
  end
704
714
  def to_s
705
- "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] labels="+@labels.inspect+" cases="+@vectors[@fields[0]].size.to_s
715
+ "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
706
716
  end
707
717
  def inspect
708
718
  self.to_s
@@ -779,17 +789,14 @@ module Statsample
779
789
  ds
780
790
  end
781
791
 
782
- def summary
783
- out=""
784
- out << "Summary for dataset\n"
785
- @vectors.each{|k,v|
786
- out << "###############\n"
787
- out << "Vector #{k}:\n"
788
- out << v.summary
789
- out << "###############\n"
790
-
791
- }
792
- out
792
+ def report_building(b)
793
+ b.section(:name=>@name) do |g|
794
+ g.text _"Cases: %d" % cases
795
+
796
+ @fields.each do |f|
797
+ g.parse_element(@vectors[f])
798
+ end
799
+ end
793
800
  end
794
801
  def as_r
795
802
  require 'rsruby/dataframe'