statsample 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +20 -1
  3. data/Manifest.txt +8 -1
  4. data/README.txt +11 -7
  5. data/Rakefile +2 -2
  6. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  7. data/examples/dataset.rb +8 -0
  8. data/examples/multiple_regression.rb +1 -1
  9. data/examples/parallel_analysis.rb +29 -0
  10. data/examples/parallel_analysis_tetrachoric.rb +30 -0
  11. data/examples/vector.rb +6 -0
  12. data/lib/distribution.rb +16 -6
  13. data/lib/distribution/normal.rb +27 -20
  14. data/lib/distribution/normalbivariate.rb +1 -1
  15. data/lib/statsample.rb +19 -2
  16. data/lib/statsample/anova.rb +118 -16
  17. data/lib/statsample/bivariate.rb +27 -13
  18. data/lib/statsample/bivariate/polychoric.rb +18 -5
  19. data/lib/statsample/crosstab.rb +66 -74
  20. data/lib/statsample/dataset.rb +52 -45
  21. data/lib/statsample/dominanceanalysis.rb +2 -5
  22. data/lib/statsample/factor.rb +1 -1
  23. data/lib/statsample/factor/parallelanalysis.rb +122 -0
  24. data/lib/statsample/factor/pca.rb +23 -28
  25. data/lib/statsample/factor/principalaxis.rb +8 -3
  26. data/lib/statsample/matrix.rb +27 -24
  27. data/lib/statsample/mle.rb +11 -11
  28. data/lib/statsample/permutation.rb +2 -1
  29. data/lib/statsample/regression.rb +10 -8
  30. data/lib/statsample/regression/multiple/baseengine.rb +36 -25
  31. data/lib/statsample/regression/multiple/gslengine.rb +14 -0
  32. data/lib/statsample/regression/multiple/matrixengine.rb +4 -32
  33. data/lib/statsample/regression/multiple/rubyengine.rb +2 -6
  34. data/lib/statsample/regression/simple.rb +1 -1
  35. data/lib/statsample/reliability.rb +42 -54
  36. data/lib/statsample/test.rb +10 -6
  37. data/lib/statsample/test/f.rb +16 -26
  38. data/lib/statsample/test/levene.rb +4 -8
  39. data/lib/statsample/test/t.rb +30 -24
  40. data/lib/statsample/test/umannwhitney.rb +13 -6
  41. data/lib/statsample/vector.rb +86 -76
  42. data/po/es/statsample.mo +0 -0
  43. data/po/es/statsample.po +127 -94
  44. data/po/statsample.pot +114 -79
  45. data/test/test_anovaoneway.rb +27 -0
  46. data/test/test_anovawithvectors.rb +97 -0
  47. data/test/test_bivariate.rb +6 -57
  48. data/test/test_bivariate_polychoric.rb +65 -0
  49. data/test/test_crosstab.rb +6 -0
  50. data/test/test_dataset.rb +29 -1
  51. data/test/test_distribution.rb +6 -13
  52. data/test/test_dominance_analysis.rb +1 -1
  53. data/test/test_factor.rb +3 -3
  54. data/test/test_helpers.rb +18 -18
  55. data/test/test_matrix.rb +33 -20
  56. data/test/test_permutation.rb +36 -30
  57. data/test/test_regression.rb +26 -8
  58. data/test/test_reliability.rb +104 -14
  59. data/test/test_test_f.rb +11 -14
  60. data/test/test_test_t.rb +42 -35
  61. data/test/test_umannwhitney.rb +22 -10
  62. data/test/test_vector.rb +204 -102
  63. metadata +57 -81
  64. metadata.gz.sig +0 -0
  65. data/test/test_anova.rb +0 -24
@@ -6,7 +6,7 @@ module Statsample
6
6
  class << self
7
7
  # Covariance between two vectors
8
8
  def covariance(v1,v2)
9
- v1a,v2a=Statsample.only_valid(v1,v2)
9
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
10
10
  return nil if v1a.size==0
11
11
  if Statsample.has_gsl?
12
12
  GSL::Stats::covariance(v1a.gsl, v2a.gsl)
@@ -16,7 +16,7 @@ module Statsample
16
16
  end
17
17
  # Estimate the ML between two dichotomic vectors
18
18
  def maximum_likehood_dichotomic(pred,real)
19
- preda,reala=Statsample.only_valid(pred,real)
19
+ preda,reala=Statsample.only_valid_clone(pred,real)
20
20
  sum=0
21
21
  pred.each_index{|i|
22
22
  sum+=(real[i]*Math::log(pred[i])) + ((1-real[i])*Math::log(1-pred[i]))
@@ -29,14 +29,14 @@ module Statsample
29
29
  sum_of_squares(v1a,v2a) / (v1a.size-1)
30
30
  end
31
31
  def sum_of_squares(v1,v2)
32
- v1a,v2a=Statsample.only_valid(v1,v2)
32
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
33
33
  m1=v1a.mean
34
34
  m2=v2a.mean
35
35
  (v1a.size).times.inject(0) {|ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2)}
36
36
  end
37
37
  # Calculate Pearson correlation coefficient (r) between 2 vectors
38
38
  def pearson(v1,v2)
39
- v1a,v2a=Statsample.only_valid(v1,v2)
39
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
40
40
  return nil if v1a.size ==0
41
41
  if Statsample.has_gsl?
42
42
  GSL::Stats::correlation(v1a.gsl, v2a.gsl)
@@ -45,7 +45,7 @@ module Statsample
45
45
  end
46
46
  end
47
47
  def pearson_slow(v1,v2) # :nodoc:
48
- v1a,v2a=Statsample.only_valid(v1,v2)
48
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
49
49
  # Calculate sum of squares
50
50
  ss=sum_of_squares(v1a,v2a)
51
51
  ss.quo(Math::sqrt(v1a.sum_of_squares) * Math::sqrt(v2a.sum_of_squares))
@@ -60,7 +60,7 @@ module Statsample
60
60
  # Retrieves the value for t test for a pearson correlation
61
61
  # between two vectors to test the null hipothesis of r=0
62
62
  def t_pearson(v1,v2)
63
- v1a,v2a=Statsample.only_valid(v1,v2)
63
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
64
64
  r=pearson(v1a,v2a)
65
65
  if(r==1.0)
66
66
  0
@@ -117,7 +117,7 @@ module Statsample
117
117
  # Correlation between v1 and v2, controling the effect of
118
118
  # control on both.
119
119
  def partial_correlation(v1,v2,control)
120
- v1a,v2a,cona=Statsample.only_valid(v1,v2,control)
120
+ v1a,v2a,cona=Statsample.only_valid_clone(v1,v2,control)
121
121
  rv1v2=pearson(v1a,v2a)
122
122
  rv1con=pearson(v1a,cona)
123
123
  rv2con=pearson(v2a,cona)
@@ -129,13 +129,20 @@ module Statsample
129
129
  # Order of rows and columns depends on Dataset#fields order
130
130
 
131
131
  def covariance_matrix(ds)
132
+ cache={}
132
133
  matrix=ds.collect_matrix do |row,col|
133
134
  if (ds[row].type!=:scale or ds[col].type!=:scale)
134
135
  nil
135
136
  elsif row==col
136
137
  ds[row].variance
137
138
  else
138
- covariance(ds[row], ds[col])
139
+ if cache[[col,row]].nil?
140
+ cov=covariance(ds[row],ds[col])
141
+ cache[[row,col]]=cov
142
+ cov
143
+ else
144
+ cache[[col,row]]
145
+ end
139
146
  end
140
147
  end
141
148
  matrix.extend CovariateMatrix
@@ -147,13 +154,20 @@ module Statsample
147
154
  # Order of rows and columns depends on Dataset#fields order
148
155
 
149
156
  def correlation_matrix(ds)
157
+ cache={}
150
158
  cm=ds.collect_matrix do |row,col|
151
159
  if row==col
152
160
  1.0
153
161
  elsif (ds[row].type!=:scale or ds[col].type!=:scale)
154
162
  nil
155
163
  else
156
- pearson(ds[row],ds[col])
164
+ if cache[[col,row]].nil?
165
+ r=pearson(ds[row],ds[col])
166
+ cache[[row,col]]=r
167
+ r
168
+ else
169
+ cache[[col,row]]
170
+ end
157
171
  end
158
172
  end
159
173
  cm.extend(Statsample::CovariateMatrix)
@@ -167,7 +181,7 @@ module Statsample
167
181
  if row==col
168
182
  ds[row].valid_data.size
169
183
  else
170
- rowa,rowb=Statsample.only_valid(ds[row],ds[col])
184
+ rowa,rowb=Statsample.only_valid_clone(ds[row],ds[col])
171
185
  rowa.size
172
186
  end
173
187
  end
@@ -179,7 +193,7 @@ module Statsample
179
193
  def correlation_probability_matrix(ds, tails=:both)
180
194
  rows=ds.fields.collect do |row|
181
195
  ds.fields.collect do |col|
182
- v1a,v2a=Statsample.only_valid(ds[row],ds[col])
196
+ v1a,v2a=Statsample.only_valid_clone(ds[row],ds[col])
183
197
  (row==col or ds[row].type!=:scale or ds[col].type!=:scale) ? nil : prop_pearson(t_pearson(ds[row],ds[col]), v1a.size, tails)
184
198
  end
185
199
  end
@@ -188,7 +202,7 @@ module Statsample
188
202
 
189
203
  # Spearman ranked correlation coefficient (rho) between 2 vectors
190
204
  def spearman(v1,v2)
191
- v1a,v2a=Statsample.only_valid(v1,v2)
205
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
192
206
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
193
207
  pearson(v1r,v2r)
194
208
  end
@@ -206,7 +220,7 @@ module Statsample
206
220
  # Kendall Rank Correlation Coefficient.
207
221
  # Based on Hervé Adbi article
208
222
  def tau_a(v1,v2)
209
- v1a,v2a=Statsample.only_valid(v1,v2)
223
+ v1a,v2a=Statsample.only_valid_clone(v1,v2)
210
224
  n=v1.size
211
225
  v1r,v2r=v1a.ranked(:scale),v2a.ranked(:scale)
212
226
  o1=ordered_pairs(v1r)
@@ -10,18 +10,29 @@ module Statsample
10
10
  # Polychoric correlation matrix.
11
11
  # Order of rows and columns depends on Dataset#fields order
12
12
  def self.polychoric_correlation_matrix(ds)
13
- ds.collect_matrix do |row,col|
13
+ cache={}
14
+ matrix=ds.collect_matrix do |row,col|
14
15
  if row==col
15
16
  1.0
16
17
  else
17
18
  begin
18
- polychoric(ds[row],ds[col])
19
+ if cache[[col,row]].nil?
20
+ poly=polychoric(ds[row],ds[col])
21
+ cache[[row,col]]=poly
22
+ poly
23
+ else
24
+ cache[[col,row]]
25
+ end
19
26
  rescue RuntimeError
20
27
  nil
21
28
  end
22
29
  end
23
30
  end
31
+ matrix.extend CovariateMatrix
32
+ matrix.fields=ds.fields
33
+ matrix
24
34
  end
35
+
25
36
  # = Polychoric correlation.
26
37
  #
27
38
  # The <em>polychoric</em> correlation is a measure of
@@ -83,6 +94,7 @@ module Statsample
83
94
 
84
95
 
85
96
  # Method of calculation of polychoric series.
97
+ # <tt>:two_step</tt> used by default.
86
98
  #
87
99
  # :two_step:: two-step ML, based on code by Gegenfurtner(1992).
88
100
  # :polychoric_series:: polychoric series estimate, using
@@ -107,7 +119,7 @@ module Statsample
107
119
  EPSILON=1e-6
108
120
  MINIMIZER_TYPE_TWO_STEP="brent"
109
121
  MINIMIZER_TYPE_JOINT="nmsimplex"
110
- def new_with_vectors(v1,v2)
122
+ def self.new_with_vectors(v1,v2)
111
123
  Polychoric.new(Crosstab.new(v1,v2).to_matrix)
112
124
  end
113
125
  # Params:
@@ -249,6 +261,7 @@ module Statsample
249
261
  b=(j==@nc-1) ? 100: beta[j]
250
262
  #puts "a:#{a} b:#{b}"
251
263
  pd[i][j]=Distribution::NormalBivariate.cdf(a, b, rho)
264
+
252
265
  end
253
266
  pc[i][j] = pd[i][j]
254
267
  pd[i][j] = pd[i][j] - pc[i-1][j] if i>0
@@ -256,7 +269,7 @@ module Statsample
256
269
  pd[i][j] = pd[i][j] + pc[i-1][j-1] if (i>0 and j>0)
257
270
  res= pd[i][j]
258
271
  #puts "i:#{i} | j:#{j} | ac: #{sprintf("%0.4f", pc[i][j])} | pd: #{sprintf("%0.4f", pd[i][j])} | res:#{sprintf("%0.4f", res)}"
259
- if (res==0)
272
+ if (res<=0)
260
273
  # puts "Correccion"
261
274
  res=1e-16
262
275
  end
@@ -328,7 +341,7 @@ module Statsample
328
341
  min.epsilon=@epsilon
329
342
  min.expected=0
330
343
  min.iterate
331
- @log+=min.log
344
+ @log+=min.log.to_table.to_s
332
345
  @r=min.x_minimum
333
346
  @loglike_model=-min.f_minimum
334
347
  puts @log if @debug
@@ -4,50 +4,44 @@ module Statsample
4
4
  # The first vector will be at rows and the second will the the columns
5
5
  #
6
6
  class Crosstab
7
- include GetText
8
- bindtextdomain("statsample")
7
+ include Summarizable
9
8
  attr_reader :v_rows, :v_cols
10
9
  attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total
11
10
  def initialize(v1, v2, opts=Hash.new)
12
- raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
13
- raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
14
- @v_rows, @v_cols=Statsample.only_valid(v1,v2)
15
- @cases=@v_rows.size
16
- @row_label=nil
17
- @column_label=nil
18
- @name=nil
19
- @percentage_row=@percentage_column=@percentage_total=false
20
- opts.each{|k,v|
21
- self.send("#{k}=",v) if self.respond_to? k
22
- }
23
- if(@name.nil?)
24
- if (!@row_label.nil? and !@column_label.nil?)
25
- @name=_("Crosstab %s - %s") % [@row_label, @column_label]
26
- else
27
- @name=_("Crosstab")
28
- end
29
- end
11
+ raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector
12
+ raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size
13
+ @v_rows, @v_cols=Statsample.only_valid_clone(v1,v2)
14
+ @cases=@v_rows.size
15
+ @row_label=v1.name
16
+ @column_label=v2.name
17
+ @name=nil
18
+ @percentage_row=@percentage_column=@percentage_total=false
19
+ opts.each{|k,v|
20
+ self.send("#{k}=",v) if self.respond_to? k
21
+ }
22
+ @name||=_("Crosstab %s - %s") % [@row_label, @column_label]
30
23
  end
31
24
  def rows_names
32
- @v_rows.factors.sort
25
+ @v_rows.factors.sort
33
26
  end
34
27
  def cols_names
35
- @v_cols.factors.sort
28
+ @v_cols.factors.sort
36
29
  end
37
30
  def rows_total
38
- @v_rows.frequencies
31
+ @v_rows.frequencies
39
32
  end
40
33
  def cols_total
41
- @v_cols.frequencies
34
+ @v_cols.frequencies
42
35
  end
36
+
43
37
  def frequencies
44
- base=rows_names.inject([]){|s,row|
45
- s+=cols_names.collect{|col| [row,col]}
46
- }.inject({}) {|s,par|
47
- s[par]=0
48
- s
49
- }
50
- base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
38
+ base=rows_names.inject([]){|s,row|
39
+ s+=cols_names.collect{|col| [row,col]}
40
+ }.inject({}) {|s,par|
41
+ s[par]=0
42
+ s
43
+ }
44
+ base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies)
51
45
  end
52
46
  def to_matrix
53
47
  f=frequencies
@@ -93,52 +87,50 @@ module Statsample
93
87
  def cols_empty_hash
94
88
  cols_names.inject({}) {|a,x| a[x]=0;a}
95
89
  end
96
- def report_building(generator)
97
- anchor=generator.toc_entry(_("Crosstab: ")+name)
98
- generator.html "<div class='crosstab'>"+_("Crosstab")+" #{@name}<a name='#{anchor}'></a>"
99
- fq=frequencies
100
- rn=rows_names
101
- cn=cols_names
102
- total=0
103
- total_cols=cols_empty_hash
104
- generator.text "Chi Square: #{chi_square}"
105
- generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
106
- generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
107
-
108
- t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
109
- rn.each do |row|
110
- total_row=0
111
- t_row=[@v_rows.labeling(row)]
112
- cn.each do |col|
113
- data=fq[[row,col]]
114
- total_row+=fq[[row,col]]
115
- total+=fq[[row,col]]
116
- total_cols[col]+=fq[[row,col]]
117
- t_row.push(data)
90
+ def report_building(builder)
91
+ builder.section(:name=>@name) do |generator|
92
+ fq=frequencies
93
+ rn=rows_names
94
+ cn=cols_names
95
+ total=0
96
+ total_cols=cols_empty_hash
97
+ generator.text "Chi Square: #{chi_square}"
98
+ generator.text(_("Rows: %s") % @row_label) unless @row_label.nil?
99
+ generator.text(_("Columns: %s") % @column_label) unless @column_label.nil?
100
+
101
+ t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")])
102
+ rn.each do |row|
103
+ total_row=0
104
+ t_row=[@v_rows.labeling(row)]
105
+ cn.each do |col|
106
+ data=fq[[row,col]]
107
+ total_row+=fq[[row,col]]
108
+ total+=fq[[row,col]]
109
+ total_cols[col]+=fq[[row,col]]
110
+ t_row.push(data)
111
+ end
112
+ t_row.push(total_row)
113
+ t.row(t_row)
114
+ end
115
+ t.hr
116
+ t_row=[_("Total")]
117
+ cn.each do |v|
118
+ t_row.push(total_cols[v])
118
119
  end
119
- t_row.push(total_row)
120
+ t_row.push(total)
120
121
  t.row(t_row)
122
+ generator.parse_element(t)
123
+
124
+ if(@percentage_row)
125
+ table_percentage(generator,:row)
126
+ end
127
+ if(@percentage_column)
128
+ table_percentage(generator,:column)
129
+ end
130
+ if(@percentage_total)
131
+ table_percentage(generator,:total)
132
+ end
121
133
  end
122
- t.hr
123
- t_row=[_("Total")]
124
- cn.each do |v|
125
- t_row.push(total_cols[v])
126
- end
127
- t_row.push(total)
128
- t.row(t_row)
129
- generator.parse_element(t)
130
-
131
- if(@percentage_row)
132
- table_percentage(generator,:row)
133
- end
134
- if(@percentage_column)
135
- table_percentage(generator,:column)
136
- end
137
- if(@percentage_total)
138
- table_percentage(generator,:total)
139
- end
140
-
141
- generator.html("</div>")
142
134
  end
143
135
 
144
136
 
@@ -56,16 +56,17 @@ module Statsample
56
56
 
57
57
  class Dataset
58
58
  include Writable
59
+ include Summarizable
59
60
  # Hash of Statsample::Vector
60
61
  attr_reader :vectors
61
62
  # Ordered names of vectors
62
63
  attr_reader :fields
64
+ # Name of dataset
65
+ attr_accessor:name
63
66
  # Number of cases
64
67
  attr_reader :cases
65
68
  # Location of pointer on enumerations methods (like #each)
66
69
  attr_reader :i
67
- # Deprecated: Label of vectors
68
- attr_accessor :labels
69
70
 
70
71
  # Generates a new dataset, using three vectors
71
72
  # - Rows
@@ -122,10 +123,12 @@ module Statsample
122
123
  # [fields] Array of names for vectors. Is only used for set the
123
124
  # order of variables. If empty, vectors keys on alfabethic order as
124
125
  # used as fields
125
- # [labels] Hash to set names for fields.
126
126
 
127
127
  #
128
- def initialize(vectors={}, fields=[], labels={})
128
+ def initialize(vectors={}, fields=[])
129
+ @@n_dataset||=0
130
+ @@n_dataset+=1
131
+ @name=_("Dataset %d") % @@n_dataset
129
132
  if vectors.instance_of? Array
130
133
  @fields=vectors.dup
131
134
  @vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
@@ -137,7 +140,6 @@ module Statsample
137
140
  check_length
138
141
  end
139
142
  @i=nil
140
- @labels=labels
141
143
  end
142
144
  def to_gsl_matrix
143
145
  matrix=GSL::Matrix.alloc(cases,@vectors.size)
@@ -146,11 +148,7 @@ module Statsample
146
148
  end
147
149
  matrix
148
150
  end
149
- # Retrieves label for a vector, giving a field name.
150
- def label(v_id)
151
- raise "Vector #{v} doesn't exists" unless @fields.include? v_id
152
- @labels[v_id].nil? ? v_id : @labels[v_id]
153
- end
151
+
154
152
  # Creates a copy of the given dataset, deleting all the cases with
155
153
  # missing data on one of the vectors
156
154
  def dup_only_valid
@@ -172,7 +170,8 @@ module Statsample
172
170
  @fields.slice(@fields.index(from)..@fields.index(to))
173
171
  end
174
172
  # Returns a duplicate of the Database
175
- # If fields given, only include those vectors
173
+ # If fields given, only include those vectors.
174
+ # Every vector will be dup
176
175
  def dup(*fields_to_include)
177
176
  if fields_to_include.size==1 and fields_to_include[0].is_a? Array
178
177
  fields_to_include=fields_to_include[0]
@@ -180,14 +179,27 @@ module Statsample
180
179
  fields_to_include=@fields if fields_to_include.size==0
181
180
  vectors={}
182
181
  fields=[]
183
- new_labels={}
184
182
  fields_to_include.each{|f|
185
183
  raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
186
184
  vectors[f]=@vectors[f].dup
187
- new_labels[f]=@labels[f]
188
185
  fields.push(f)
189
186
  }
190
- Dataset.new(vectors,fields,new_labels)
187
+ Dataset.new(vectors,fields)
188
+ end
189
+ # Returns a shallow copy of Dataset.
190
+ # Object id will be distinct, but @vectors will be the same.
191
+ def clone(*fields_to_include)
192
+ if fields_to_include.size==1 and fields_to_include[0].is_a? Array
193
+ fields_to_include=fields_to_include[0]
194
+ end
195
+ fields_to_include=@fields.dup if fields_to_include.size==0
196
+ ds=Dataset.new
197
+ fields_to_include.each{|f|
198
+ raise "Vector #{f} doesn't exists" unless @vectors.has_key? f
199
+ ds[f]=@vectors[f]
200
+ }
201
+ ds.fields=fields_to_include
202
+ ds
191
203
  end
192
204
  # Creates a copy of the given dataset, without data on vectors
193
205
  def dup_empty
@@ -195,7 +207,7 @@ module Statsample
195
207
  a[v[0]]=v[1].dup_empty
196
208
  a
197
209
  }
198
- Dataset.new(vectors,@fields.dup,@labels.dup)
210
+ Dataset.new(vectors,@fields.dup)
199
211
  end
200
212
  # Merge vectors from two datasets
201
213
  # In case of name collition, the vectors names are changed to
@@ -216,14 +228,14 @@ module Statsample
216
228
  ds_new.update_valid_data
217
229
  ds_new
218
230
  end
219
- # Returns a dataset with standarized data
220
- def standarize
221
- ds=dup()
222
- ds.fields.each {|f|
223
- ds[f]=ds[f].vector_standarized
224
- }
225
- ds
226
- end
231
+ # Returns a dataset with standarized data
232
+ def standarize
233
+ ds=dup()
234
+ ds.fields.each do |f|
235
+ ds[f]=ds[f].vector_standarized
236
+ end
237
+ ds
238
+ end
227
239
  # Generate a matrix, based on fields of dataset
228
240
  def collect_matrix
229
241
  rows=@fields.collect{|row|
@@ -233,7 +245,7 @@ module Statsample
233
245
  }
234
246
  Matrix.rows(rows)
235
247
  end
236
- # We have the same datasets if the labels and vectors are the same
248
+ # We have the same datasets if vectors and fields are the same
237
249
  def ==(d2)
238
250
  @vectors==d2.vectors and @fields==d2.fields
239
251
  end
@@ -305,12 +317,12 @@ module Statsample
305
317
  @vectors.delete(name)
306
318
  end
307
319
 
308
- def add_vectors_by_split_recode(name,join='-',sep=Statsample::SPLIT_TOKEN)
309
- split=@vectors[name].split_by_separator(sep)
320
+ def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
321
+ split=@vectors[name_].split_by_separator(sep)
310
322
  i=1
311
323
  split.each{|k,v|
312
- new_field=name+join+i.to_s
313
- @labels[new_field]=name+":"+k
324
+ new_field=name_+join+i.to_s
325
+ v.name=name_+":"+k
314
326
  add_vector(new_field,v)
315
327
  i+=1
316
328
  }
@@ -505,15 +517,13 @@ module Statsample
505
517
  end
506
518
  # Returns the vector named i
507
519
  def[](i)
508
- if i.is_a? String
509
- raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
510
- @vectors[i]
511
- elsif i.is_a? Range
520
+ if i.is_a? Range
512
521
  fields=from_to(i.begin,i.end)
513
522
  vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
514
523
  ds=Dataset.new(vectors,fields)
515
524
  else
516
- raise ArgumentError, "You need a String or a Range"
525
+ raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
526
+ @vectors[i]
517
527
  end
518
528
  end
519
529
  # Retrieves a Statsample::Vector, based on the result
@@ -702,7 +712,7 @@ module Statsample
702
712
  vr
703
713
  end
704
714
  def to_s
705
- "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] labels="+@labels.inspect+" cases="+@vectors[@fields[0]].size.to_s
715
+ "#<"+self.class.to_s+":"+self.object_id.to_s+" @fields=["+@fields.join(",")+"] cases="+@vectors[@fields[0]].size.to_s
706
716
  end
707
717
  def inspect
708
718
  self.to_s
@@ -779,17 +789,14 @@ module Statsample
779
789
  ds
780
790
  end
781
791
 
782
- def summary
783
- out=""
784
- out << "Summary for dataset\n"
785
- @vectors.each{|k,v|
786
- out << "###############\n"
787
- out << "Vector #{k}:\n"
788
- out << v.summary
789
- out << "###############\n"
790
-
791
- }
792
- out
792
+ def report_building(b)
793
+ b.section(:name=>@name) do |g|
794
+ g.text _"Cases: %d" % cases
795
+
796
+ @fields.each do |f|
797
+ g.parse_element(@vectors[f])
798
+ end
799
+ end
793
800
  end
794
801
  def as_r
795
802
  require 'rsruby/dataframe'