statsample 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -114,6 +114,11 @@ module Statsample
114
114
  }
115
115
  out
116
116
  end
117
+ def get_averages(averages)
118
+ out={}
119
+ averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
120
+ out
121
+ end
117
122
  def average_k(k)
118
123
  return nil if k==@fields.size
119
124
  models=md_k(k)
@@ -123,11 +128,7 @@ module Statsample
123
128
  averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
124
129
  }
125
130
  }
126
- out={}
127
- averages.each{|key,val|
128
- out[key]=val.to_vector(:scale).mean
129
- }
130
- out
131
+ get_averages(averages)
131
132
  end
132
133
  def general_averages
133
134
  if @general_averages.nil?
@@ -138,11 +139,7 @@ module Statsample
138
139
  averages[f].push(ak[f])
139
140
  }
140
141
  end
141
- out={}
142
- averages.each{|key,val|
143
- out[key]=val.to_vector(:scale).mean
144
- }
145
- @general_averages=out
142
+ @general_averages=get_averages(averages)
146
143
  end
147
144
  @general_averages
148
145
  end
@@ -27,6 +27,29 @@ module Statsample
27
27
  @uniq_file+=1
28
28
  "#{prepend}_#{@uniq_file}_#{Time.now.to_i}"
29
29
  end
30
+
31
+ def add_tetrachoric_correlation_matrix(ds)
32
+ add_anchor("Tetrachoric correlation Matrix")
33
+ html="<h2>Tetrachoric Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
34
+ matrix=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
35
+
36
+
37
+ (0...(matrix.row_size)).each {|row|
38
+ html+="<tr><td>"+ds.fields[row]+"</td>"
39
+ (0...(matrix.column_size)).each {|col|
40
+ if matrix[row,col].nil?
41
+ html+="<td>--</td>"
42
+ else
43
+ html+="<td><strong>#{sprintf("%0.2f",matrix[row,col])}</td>"
44
+ end
45
+ }
46
+ html+="</tr>"
47
+ }
48
+ html+="</tbody></table>"
49
+ @partials.push(html)
50
+ end
51
+
52
+
30
53
  def add_correlation_matrix(ds)
31
54
  add_anchor("Correlation Matrix")
32
55
  html="<h2>Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
@@ -8,7 +8,7 @@ module Statsample
8
8
  @y_var=y_var
9
9
  @r2=nil
10
10
  end
11
-
11
+
12
12
  # Retrieves a vector with predicted values for y
13
13
  def predicted
14
14
  (0...@ds.cases).collect { |i|
@@ -52,13 +52,13 @@ module Statsample
52
52
  # Sum of squares (Error)
53
53
  def sse
54
54
  sst - ssr
55
- end
55
+ end
56
56
  # T values for coeffs
57
57
  def coeffs_t
58
58
  out={}
59
59
  se=coeffs_se
60
60
  coeffs.each{|k,v|
61
- out[k]=v / se[k]
61
+ out[k]=v / se[k]
62
62
  }
63
63
  out
64
64
  end
@@ -69,7 +69,7 @@ module Statsample
69
69
  # Mean Square Error
70
70
  def mse
71
71
  sse.quo(df_e)
72
- end
72
+ end
73
73
  # Degrees of freedom for regression
74
74
  def df_r
75
75
  @dep_columns.size
@@ -113,7 +113,7 @@ module Statsample
113
113
  out
114
114
  end
115
115
  # Estimated Variance-Covariance Matrix
116
- # Used for calculation of se of constant
116
+ # Used for calculation of se of constant
117
117
  def estimated_variance_covariance_matrix
118
118
  mse_p=mse
119
119
  columns=[]
@@ -129,7 +129,7 @@ module Statsample
129
129
  end
130
130
  # T for constant
131
131
  def constant_t
132
- constant.to_f/constant_se
132
+ constant.to_f/constant_se
133
133
  end
134
134
  # Standard error for constant
135
135
  def constant_se
@@ -140,27 +140,27 @@ module Statsample
140
140
  c=coeffs
141
141
  out=""
142
142
  out.extend report_type
143
- out.add <<HEREDOC
144
- Summary for regression of #{@fields.join(',')} over #{@y_var}
145
- *************************************************************
146
- Engine: #{self.class}
147
- Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
148
- r=#{sprintf("%0.3f",r)}
149
- r2=#{sprintf("%0.3f",r2)}
150
- Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
143
+ out.add <<-HEREDOC
144
+ Summary for regression of #{@fields.join(',')} over #{@y_var}
145
+ *************************************************************
146
+ Engine: #{self.class}
147
+ Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
148
+ r=#{sprintf("%0.3f",r)}
149
+ r2=#{sprintf("%0.3f",r2)}
150
+ Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
151
151
  HEREDOC
152
-
152
+
153
153
  out.add_line
154
154
  out.add "ANOVA TABLE"
155
-
155
+
156
156
  t=Statsample::ReportTable.new(%w{source ss df ms f s})
157
157
  t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
158
158
  t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
159
-
159
+
160
160
  t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
161
-
161
+
162
162
  out.parse_table(t)
163
-
163
+
164
164
  begin
165
165
  out.add "Beta coefficientes"
166
166
  sc=standarized_coeffs
@@ -171,63 +171,63 @@ HEREDOC
171
171
  t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
172
172
  }
173
173
  out.parse_table(t)
174
-
174
+
175
175
  rescue
176
176
  end
177
177
  out
178
178
  end
179
179
  def assign_names(c)
180
- a={}
181
- @fields.each_index {|i|
182
- a[@fields[i]]=c[i]
183
- }
184
- a
185
- end
186
-
187
-
180
+ a={}
181
+ @fields.each_index {|i|
182
+ a[@fields[i]]=c[i]
183
+ }
184
+ a
185
+ end
186
+
187
+
188
188
  # Deprecated
189
189
  # Sum of squares of error (manual calculation)
190
190
  # using the predicted value minus the y_i value
191
191
  def sse_manual
192
- pr=predicted
193
- cases=0
194
- sse=(0...@ds.cases).inject(0) {|a,i|
195
- if !@dy.data_with_nils[i].nil? and !pr[i].nil?
196
- cases+=1
197
- a+((pr[i]-@dy[i])**2)
198
- else
199
- a
200
- end
201
- }
202
- sse*(min_n_valid-1.0).quo(cases-1)
192
+ pr=predicted
193
+ cases=0
194
+ sse=(0...@ds.cases).inject(0) {|a,i|
195
+ if !@dy.data_with_nils[i].nil? and !pr[i].nil?
196
+ cases+=1
197
+ a+((pr[i]-@dy[i])**2)
198
+ else
199
+ a
200
+ end
201
+ }
202
+ sse*(min_n_valid-1.0).quo(cases-1)
203
203
  end
204
204
  # Sum of squares of regression
205
205
  # using the predicted value minus y mean
206
206
  def ssr_direct
207
- mean=@dy.mean
208
- cases=0
209
- ssr=(0...@ds.cases).inject(0) {|a,i|
210
- invalid=false
211
- v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
212
- if !invalid
213
- cases+=1
214
- a+((process(v)-mean)**2)
215
- else
216
- a
217
- end
218
- }
219
- ssr
207
+ mean=@dy.mean
208
+ cases=0
209
+ ssr=(0...@ds.cases).inject(0) {|a,i|
210
+ invalid=false
211
+ v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
212
+ if !invalid
213
+ cases+=1
214
+ a+((process(v)-mean)**2)
215
+ else
216
+ a
217
+ end
218
+ }
219
+ ssr
220
220
  end
221
221
  def sse_direct
222
- sst-ssr
222
+ sst-ssr
223
223
  end
224
224
  def process(v)
225
- c=coeffs
226
- total=constant
227
- @fields.each_index{|i|
228
- total+=c[@fields[i]]*v[i]
229
- }
230
- total
225
+ c=coeffs
226
+ total=constant
227
+ @fields.each_index{|i|
228
+ total+=c[@fields[i]]*v[i]
229
+ }
230
+ total
231
231
  end
232
232
  end
233
233
  end
@@ -1,4 +1,4 @@
1
- if HAS_ALGIB
1
+ if HAS_GSL
2
2
  module Statsample
3
3
  module Regression
4
4
  module Multiple
@@ -1,160 +1,158 @@
1
1
  module Statsample
2
- module Reliability
3
- class << self
4
- # Calculate Chonbach's alpha for a given dataset.
5
- # only uses tuples without missing data
2
+ module Reliability
3
+ class << self
4
+ # Calculate Chonbach's alpha for a given dataset.
5
+ # only uses tuples without missing data
6
6
  def cronbach_alpha(ods)
7
7
  ds=ods.dup_only_valid
8
8
  n_items=ds.fields.size
9
9
  sum_var_items=ds.vectors.inject(0) {|ac,v|
10
- ac+v[1].variance_sample
11
- }
10
+ ac+v[1].variance_sample }
12
11
  total=ds.vector_sum
13
12
  (n_items / (n_items-1).to_f) * (1-(sum_var_items/ total.variance_sample))
14
13
  end
15
- # Calculate Chonbach's alpha for a given dataset
16
- # using standarized values for every vector.
17
- # Only uses tuples without missing data
18
-
19
- def cronbach_alpha_standarized(ods)
20
- ds=ods.fields.inject({}){|a,f|
21
- a[f]=ods[f].vector_standarized
22
- a
23
- }.to_dataset
24
- cronbach_alpha(ds)
25
- end
26
- end
27
-
14
+ # Calculate Chonbach's alpha for a given dataset
15
+ # using standarized values for every vector.
16
+ # Only uses tuples without missing data
17
+
18
+ def cronbach_alpha_standarized(ods)
19
+ ds=ods.dup_only_valid.fields.inject({}){|a,f|
20
+ a[f]=ods[f].vector_standarized; a
21
+ }.to_dataset
22
+ cronbach_alpha(ds)
23
+ end
24
+ end
28
25
  class ItemCharacteristicCurve
29
- attr_reader :totals, :counts,:vector_total
30
- def initialize (ds, vector_total=nil)
31
- vector_total||=ds.vector_sum
32
- raise "Total size != Dataset size" if vector_total.size!=ds.cases
33
- @vector_total=vector_total
34
- @ds=ds
35
- @totals={}
36
- @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
37
- process
38
- end
39
- def process
40
- i=0
41
- @ds.each{|row|
42
- tot=@vector_total[i]
43
- @totals[tot]||=0
44
- @totals[tot]+=1
45
- @ds.fields.each {|f|
46
- item=row[f].to_s
47
- @counts[f][tot]||={}
48
- @counts[f][tot][item]||=0
49
- @counts[f][tot][item] += 1
50
- }
26
+ attr_reader :totals, :counts,:vector_total
27
+ def initialize (ds, vector_total=nil)
28
+ vector_total||=ds.vector_sum
29
+ raise "Total size != Dataset size" if vector_total.size!=ds.cases
30
+ @vector_total=vector_total
31
+ @ds=ds
32
+ @totals={}
33
+ @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
34
+ process
35
+ end
36
+ def process
37
+ i=0
38
+ @ds.each do |row|
39
+ tot=@vector_total[i]
40
+ @totals[tot]||=0
41
+ @totals[tot]+=1
42
+ @ds.fields.each do |f|
43
+ item=row[f].to_s
44
+ @counts[f][tot]||={}
45
+ @counts[f][tot][item]||=0
46
+ @counts[f][tot][item] += 1
47
+ end
51
48
  i+=1
52
- }
53
- end
54
- def curve_field(field, item)
55
- out={}
56
- item=item.to_s
57
- @totals.each{|value,n|
58
- count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
59
- out[value]=count_value.to_f/n.to_f
60
- }
61
- out
62
- end
63
49
  end
50
+ end
51
+ def curve_field(field, item)
52
+ out={}
53
+ item=item.to_s
54
+ @totals.each{|value,n|
55
+ count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
56
+ out[value]=count_value.to_f/n.to_f
57
+ }
58
+ out
59
+ end
60
+ end
64
61
  class ItemAnalysis
65
- attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized
62
+ attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized
66
63
  def initialize(ds)
67
- @ds=ds.dup_only_valid
68
- @total=@ds.vector_sum
69
- @mean=@total.mean
70
- @median=@total.median
71
- @skew=@total.skew
72
- @kurtosis=@total.kurtosis
73
- @sd=@total.sdp
74
- @valid_n=@total.size
75
- begin
76
- @alpha=Statsample::Reliability.cronbach_alpha(ds)
77
- @alpha_standarized=Statsample::Reliability.cronbach_alpha_standarized(ds)
78
- rescue => e
79
- raise DatasetException.new(@ds,e), "Problem on calculate alpha"
80
- end
64
+ @ds=ds.dup_only_valid
65
+ @total=@ds.vector_sum
66
+ @item_mean=@ds.vector_mean.mean
67
+ @mean=@total.mean
68
+ @median=@total.median
69
+ @skew=@total.skew
70
+ @kurtosis=@total.kurtosis
71
+ @sd = @total.sd
72
+ @valid_n = @total.size
73
+ begin
74
+ @alpha = Statsample::Reliability.cronbach_alpha(ds)
75
+ @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
76
+ rescue => e
77
+ raise DatasetException.new(@ds,e), "Problem on calculate alpha"
78
+ end
81
79
  end
82
80
  # Returns a hash with structure
83
81
  def item_characteristic_curve
84
- i=0
85
- out={}
86
- total={}
87
- @ds.each{|row|
88
- tot=@total[i]
89
- @ds.fields.each {|f|
90
- out[f]||= {}
91
- total[f]||={}
92
- out[f][tot]||= 0
93
- total[f][tot]||=0
94
- out[f][tot]+= row[f]
95
- total[f][tot]+=1
96
- }
97
- i+=1
98
- }
99
- total.each{|f,var|
100
- var.each{|tot,v|
101
- out[f][tot]=out[f][tot].to_f / total[f][tot]
102
- }
103
- }
104
- out
82
+ i=0
83
+ out={}
84
+ total={}
85
+ @ds.each do |row|
86
+ tot=@total[i]
87
+ @ds.fields.each do |f|
88
+ out[f]||= {}
89
+ total[f]||={}
90
+ out[f][tot]||= 0
91
+ total[f][tot]||=0
92
+ out[f][tot]+= row[f]
93
+ total[f][tot]+=1
94
+ end
95
+ i+=1
96
+ end
97
+ total.each do |f,var|
98
+ var.each do |tot,v|
99
+ out[f][tot]=out[f][tot].to_f / total[f][tot]
100
+ end
101
+ end
102
+ out
105
103
  end
106
- def gnuplot_item_characteristic_curve(directory, base="crd",options={})
107
- require 'gnuplot'
108
-
109
- crd=item_characteristic_curve
110
- @ds.fields.each {|f|
111
- x=[]
112
- y=[]
113
- Gnuplot.open do |gp|
114
- Gnuplot::Plot.new( gp ) do |plot|
115
- crd[f].sort.each{|tot,prop|
116
- x.push(tot)
117
- y.push((prop*100).to_i.to_f/100)
118
- }
119
- plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
120
- ds.with = "linespoints"
121
- ds.notitle
122
- end
123
-
124
- end
125
- end
126
- }
127
-
128
- end
129
- def svggraph_item_characteristic_curve(directory, base="icc",options={})
130
- require 'statsample/graph/svggraph'
131
- crd=ItemCharacteristicCurve.new(@ds)
132
- @ds.fields.each {|f|
133
- factors=@ds[f].factors.sort
134
- options={
135
- :height=>500,
136
- :width=>800,
137
- :key=>true
138
- }.update(options)
139
- graph = ::SVG::Graph::Plot.new(options)
140
- factors.each{|factor|
141
- factor=factor.to_s
142
- dataset=[]
143
- crd.curve_field(f, factor).each{|tot,prop|
144
- dataset.push(tot)
145
- dataset.push((prop*100).to_i.to_f/100)
146
- }
147
- graph.add_data({
148
- :title=>"#{factor}",
149
- :data=>dataset
150
- })
151
- }
152
- File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
153
- fp.puts(graph.burn())
154
- }
155
- }
156
-
157
- end
104
+ def gnuplot_item_characteristic_curve(directory, base="crd",options={})
105
+ require 'gnuplot'
106
+
107
+ crd=item_characteristic_curve
108
+ @ds.fields.each {|f|
109
+ x=[]
110
+ y=[]
111
+ Gnuplot.open do |gp|
112
+ Gnuplot::Plot.new( gp ) do |plot|
113
+ crd[f].sort.each{|tot,prop|
114
+ x.push(tot)
115
+ y.push((prop*100).to_i.to_f/100)
116
+ }
117
+ plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
118
+ ds.with = "linespoints"
119
+ ds.notitle
120
+ end
121
+
122
+ end
123
+ end
124
+ }
125
+
126
+ end
127
+ def svggraph_item_characteristic_curve(directory, base="icc",options={})
128
+ require 'statsample/graph/svggraph'
129
+ crd=ItemCharacteristicCurve.new(@ds)
130
+ @ds.fields.each {|f|
131
+ factors=@ds[f].factors.sort
132
+ options={
133
+ :height=>500,
134
+ :width=>800,
135
+ :key=>true
136
+ }.update(options)
137
+ graph = ::SVG::Graph::Plot.new(options)
138
+ factors.each{|factor|
139
+ factor=factor.to_s
140
+ dataset=[]
141
+ crd.curve_field(f, factor).each{|tot,prop|
142
+ dataset.push(tot)
143
+ dataset.push((prop*100).to_i.to_f/100)
144
+ }
145
+ graph.add_data({
146
+ :title=>"#{factor}",
147
+ :data=>dataset
148
+ })
149
+ }
150
+ File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
151
+ fp.puts(graph.burn())
152
+ }
153
+ }
154
+
155
+ end
158
156
  def item_total_correlation
159
157
  @ds.fields.inject({}) do |a,v|
160
158
  vector=@ds[v].dup
@@ -163,7 +161,7 @@ module Statsample
163
161
  total=ds2.vector_sum
164
162
  a[v]=Statsample::Bivariate.pearson(vector,total)
165
163
  a
166
- end
164
+ end
167
165
  end
168
166
  def item_statistics
169
167
  @ds.fields.inject({}) do |a,v|
@@ -171,9 +169,29 @@ module Statsample
171
169
  a
172
170
  end
173
171
  end
174
-
172
+ # Returns a dataset with cases ordered by score
173
+ # and variables ordered by difficulty
174
+
175
+ def item_difficulty_analysis
176
+ dif={}
177
+ @ds.fields.each{|f| dif[f]=@ds[f].mean }
178
+ dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
179
+ scores_sort={}
180
+ scores=@ds.vector_mean
181
+ scores.each_index{|i| scores_sort[i]=scores[i] }
182
+ scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
183
+ ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
184
+ scores_sort.each do |i,score|
185
+ row=[i, score]
186
+ case_row=@ds.case_as_hash(i)
187
+ dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
188
+ ds_new.add_case_array(row)
189
+ end
190
+ ds_new.update_valid_data
191
+ ds_new
192
+ end
175
193
  def stats_if_deleted
176
- @ds.fields.inject({}){|a,v|
194
+ @ds.fields.inject({}) do |a,v|
177
195
  ds2=@ds.dup
178
196
  ds2.delete_vector(v)
179
197
  total=ds2.vector_sum
@@ -183,13 +201,15 @@ module Statsample
183
201
  a[v][:variance_sample]=total.variance_sample
184
202
  a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
185
203
  a
186
- }
204
+ end
187
205
  end
188
206
  def html_summary
189
207
  html = <<EOF
190
208
  <p><strong>Summary for scale:</strong></p>
191
209
  <ul>
192
- <li>Mean=#{@mean}</li>
210
+ <li>Items=#{@ds.fields.size}</li>
211
+ <li>Total Mean=#{@mean}</li>
212
+ <li>Item Mean=#{@item_mean}</li>
193
213
  <li>Std.Dv.=#{@sd}</li>
194
214
  <li>Median=#{@median}</li>
195
215
  <li>Skewness=#{sprintf("%0.3f",@skew)}</li>