statsample 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -114,6 +114,11 @@ module Statsample
114
114
  }
115
115
  out
116
116
  end
117
+ def get_averages(averages)
118
+ out={}
119
+ averages.each{|key,val| out[key]=val.to_vector(:scale).mean }
120
+ out
121
+ end
117
122
  def average_k(k)
118
123
  return nil if k==@fields.size
119
124
  models=md_k(k)
@@ -123,11 +128,7 @@ module Statsample
123
128
  averages[f].push(m.contributions[f]) unless m.contributions[f].nil?
124
129
  }
125
130
  }
126
- out={}
127
- averages.each{|key,val|
128
- out[key]=val.to_vector(:scale).mean
129
- }
130
- out
131
+ get_averages(averages)
131
132
  end
132
133
  def general_averages
133
134
  if @general_averages.nil?
@@ -138,11 +139,7 @@ module Statsample
138
139
  averages[f].push(ak[f])
139
140
  }
140
141
  end
141
- out={}
142
- averages.each{|key,val|
143
- out[key]=val.to_vector(:scale).mean
144
- }
145
- @general_averages=out
142
+ @general_averages=get_averages(averages)
146
143
  end
147
144
  @general_averages
148
145
  end
@@ -27,6 +27,29 @@ module Statsample
27
27
  @uniq_file+=1
28
28
  "#{prepend}_#{@uniq_file}_#{Time.now.to_i}"
29
29
  end
30
+
31
+ def add_tetrachoric_correlation_matrix(ds)
32
+ add_anchor("Tetrachoric correlation Matrix")
33
+ html="<h2>Tetrachoric Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
34
+ matrix=Statsample::Bivariate.tetrachoric_correlation_matrix(ds)
35
+
36
+
37
+ (0...(matrix.row_size)).each {|row|
38
+ html+="<tr><td>"+ds.fields[row]+"</td>"
39
+ (0...(matrix.column_size)).each {|col|
40
+ if matrix[row,col].nil?
41
+ html+="<td>--</td>"
42
+ else
43
+ html+="<td><strong>#{sprintf("%0.2f",matrix[row,col])}</td>"
44
+ end
45
+ }
46
+ html+="</tr>"
47
+ }
48
+ html+="</tbody></table>"
49
+ @partials.push(html)
50
+ end
51
+
52
+
30
53
  def add_correlation_matrix(ds)
31
54
  add_anchor("Correlation Matrix")
32
55
  html="<h2>Correlation Matrix</h2> <table><thead><th>-</th><th>"+ds.fields.join("</th><th>")+"</th> </thead> <tbody>"
@@ -8,7 +8,7 @@ module Statsample
8
8
  @y_var=y_var
9
9
  @r2=nil
10
10
  end
11
-
11
+
12
12
  # Retrieves a vector with predicted values for y
13
13
  def predicted
14
14
  (0...@ds.cases).collect { |i|
@@ -52,13 +52,13 @@ module Statsample
52
52
  # Sum of squares (Error)
53
53
  def sse
54
54
  sst - ssr
55
- end
55
+ end
56
56
  # T values for coeffs
57
57
  def coeffs_t
58
58
  out={}
59
59
  se=coeffs_se
60
60
  coeffs.each{|k,v|
61
- out[k]=v / se[k]
61
+ out[k]=v / se[k]
62
62
  }
63
63
  out
64
64
  end
@@ -69,7 +69,7 @@ module Statsample
69
69
  # Mean Square Error
70
70
  def mse
71
71
  sse.quo(df_e)
72
- end
72
+ end
73
73
  # Degrees of freedom for regression
74
74
  def df_r
75
75
  @dep_columns.size
@@ -113,7 +113,7 @@ module Statsample
113
113
  out
114
114
  end
115
115
  # Estimated Variance-Covariance Matrix
116
- # Used for calculation of se of constant
116
+ # Used for calculation of se of constant
117
117
  def estimated_variance_covariance_matrix
118
118
  mse_p=mse
119
119
  columns=[]
@@ -129,7 +129,7 @@ module Statsample
129
129
  end
130
130
  # T for constant
131
131
  def constant_t
132
- constant.to_f/constant_se
132
+ constant.to_f/constant_se
133
133
  end
134
134
  # Standard error for constant
135
135
  def constant_se
@@ -140,27 +140,27 @@ module Statsample
140
140
  c=coeffs
141
141
  out=""
142
142
  out.extend report_type
143
- out.add <<HEREDOC
144
- Summary for regression of #{@fields.join(',')} over #{@y_var}
145
- *************************************************************
146
- Engine: #{self.class}
147
- Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
148
- r=#{sprintf("%0.3f",r)}
149
- r2=#{sprintf("%0.3f",r2)}
150
- Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
143
+ out.add <<-HEREDOC
144
+ Summary for regression of #{@fields.join(',')} over #{@y_var}
145
+ *************************************************************
146
+ Engine: #{self.class}
147
+ Cases(listwise)=#{@ds.cases}(#{@ds_valid.cases})
148
+ r=#{sprintf("%0.3f",r)}
149
+ r2=#{sprintf("%0.3f",r2)}
150
+ Equation=#{sprintf("%0.3f",constant)}+#{@fields.collect {|k| sprintf("%0.3f%s",c[k],k)}.join(' + ')}
151
151
  HEREDOC
152
-
152
+
153
153
  out.add_line
154
154
  out.add "ANOVA TABLE"
155
-
155
+
156
156
  t=Statsample::ReportTable.new(%w{source ss df ms f s})
157
157
  t.add_row(["Regression", sprintf("%0.3f",ssr), df_r, sprintf("%0.3f",msr), sprintf("%0.3f",f), sprintf("%0.3f", significance)])
158
158
  t.add_row(["Error", sprintf("%0.3f",sse), df_e, sprintf("%0.3f",mse)])
159
-
159
+
160
160
  t.add_row(["Total", sprintf("%0.3f",sst), df_r+df_e])
161
-
161
+
162
162
  out.parse_table(t)
163
-
163
+
164
164
  begin
165
165
  out.add "Beta coefficientes"
166
166
  sc=standarized_coeffs
@@ -171,63 +171,63 @@ HEREDOC
171
171
  t.add_row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
172
172
  }
173
173
  out.parse_table(t)
174
-
174
+
175
175
  rescue
176
176
  end
177
177
  out
178
178
  end
179
179
  def assign_names(c)
180
- a={}
181
- @fields.each_index {|i|
182
- a[@fields[i]]=c[i]
183
- }
184
- a
185
- end
186
-
187
-
180
+ a={}
181
+ @fields.each_index {|i|
182
+ a[@fields[i]]=c[i]
183
+ }
184
+ a
185
+ end
186
+
187
+
188
188
  # Deprecated
189
189
  # Sum of squares of error (manual calculation)
190
190
  # using the predicted value minus the y_i value
191
191
  def sse_manual
192
- pr=predicted
193
- cases=0
194
- sse=(0...@ds.cases).inject(0) {|a,i|
195
- if !@dy.data_with_nils[i].nil? and !pr[i].nil?
196
- cases+=1
197
- a+((pr[i]-@dy[i])**2)
198
- else
199
- a
200
- end
201
- }
202
- sse*(min_n_valid-1.0).quo(cases-1)
192
+ pr=predicted
193
+ cases=0
194
+ sse=(0...@ds.cases).inject(0) {|a,i|
195
+ if !@dy.data_with_nils[i].nil? and !pr[i].nil?
196
+ cases+=1
197
+ a+((pr[i]-@dy[i])**2)
198
+ else
199
+ a
200
+ end
201
+ }
202
+ sse*(min_n_valid-1.0).quo(cases-1)
203
203
  end
204
204
  # Sum of squares of regression
205
205
  # using the predicted value minus y mean
206
206
  def ssr_direct
207
- mean=@dy.mean
208
- cases=0
209
- ssr=(0...@ds.cases).inject(0) {|a,i|
210
- invalid=false
211
- v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
212
- if !invalid
213
- cases+=1
214
- a+((process(v)-mean)**2)
215
- else
216
- a
217
- end
218
- }
219
- ssr
207
+ mean=@dy.mean
208
+ cases=0
209
+ ssr=(0...@ds.cases).inject(0) {|a,i|
210
+ invalid=false
211
+ v=@dep_columns.collect{|c| invalid=true if c[i].nil?; c[i]}
212
+ if !invalid
213
+ cases+=1
214
+ a+((process(v)-mean)**2)
215
+ else
216
+ a
217
+ end
218
+ }
219
+ ssr
220
220
  end
221
221
  def sse_direct
222
- sst-ssr
222
+ sst-ssr
223
223
  end
224
224
  def process(v)
225
- c=coeffs
226
- total=constant
227
- @fields.each_index{|i|
228
- total+=c[@fields[i]]*v[i]
229
- }
230
- total
225
+ c=coeffs
226
+ total=constant
227
+ @fields.each_index{|i|
228
+ total+=c[@fields[i]]*v[i]
229
+ }
230
+ total
231
231
  end
232
232
  end
233
233
  end
@@ -1,4 +1,4 @@
1
- if HAS_ALGIB
1
+ if HAS_GSL
2
2
  module Statsample
3
3
  module Regression
4
4
  module Multiple
@@ -1,160 +1,158 @@
1
1
  module Statsample
2
- module Reliability
3
- class << self
4
- # Calculate Chonbach's alpha for a given dataset.
5
- # only uses tuples without missing data
2
+ module Reliability
3
+ class << self
4
+ # Calculate Chonbach's alpha for a given dataset.
5
+ # only uses tuples without missing data
6
6
  def cronbach_alpha(ods)
7
7
  ds=ods.dup_only_valid
8
8
  n_items=ds.fields.size
9
9
  sum_var_items=ds.vectors.inject(0) {|ac,v|
10
- ac+v[1].variance_sample
11
- }
10
+ ac+v[1].variance_sample }
12
11
  total=ds.vector_sum
13
12
  (n_items / (n_items-1).to_f) * (1-(sum_var_items/ total.variance_sample))
14
13
  end
15
- # Calculate Chonbach's alpha for a given dataset
16
- # using standarized values for every vector.
17
- # Only uses tuples without missing data
18
-
19
- def cronbach_alpha_standarized(ods)
20
- ds=ods.fields.inject({}){|a,f|
21
- a[f]=ods[f].vector_standarized
22
- a
23
- }.to_dataset
24
- cronbach_alpha(ds)
25
- end
26
- end
27
-
14
+ # Calculate Chonbach's alpha for a given dataset
15
+ # using standarized values for every vector.
16
+ # Only uses tuples without missing data
17
+
18
+ def cronbach_alpha_standarized(ods)
19
+ ds=ods.dup_only_valid.fields.inject({}){|a,f|
20
+ a[f]=ods[f].vector_standarized; a
21
+ }.to_dataset
22
+ cronbach_alpha(ds)
23
+ end
24
+ end
28
25
  class ItemCharacteristicCurve
29
- attr_reader :totals, :counts,:vector_total
30
- def initialize (ds, vector_total=nil)
31
- vector_total||=ds.vector_sum
32
- raise "Total size != Dataset size" if vector_total.size!=ds.cases
33
- @vector_total=vector_total
34
- @ds=ds
35
- @totals={}
36
- @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
37
- process
38
- end
39
- def process
40
- i=0
41
- @ds.each{|row|
42
- tot=@vector_total[i]
43
- @totals[tot]||=0
44
- @totals[tot]+=1
45
- @ds.fields.each {|f|
46
- item=row[f].to_s
47
- @counts[f][tot]||={}
48
- @counts[f][tot][item]||=0
49
- @counts[f][tot][item] += 1
50
- }
26
+ attr_reader :totals, :counts,:vector_total
27
+ def initialize (ds, vector_total=nil)
28
+ vector_total||=ds.vector_sum
29
+ raise "Total size != Dataset size" if vector_total.size!=ds.cases
30
+ @vector_total=vector_total
31
+ @ds=ds
32
+ @totals={}
33
+ @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
34
+ process
35
+ end
36
+ def process
37
+ i=0
38
+ @ds.each do |row|
39
+ tot=@vector_total[i]
40
+ @totals[tot]||=0
41
+ @totals[tot]+=1
42
+ @ds.fields.each do |f|
43
+ item=row[f].to_s
44
+ @counts[f][tot]||={}
45
+ @counts[f][tot][item]||=0
46
+ @counts[f][tot][item] += 1
47
+ end
51
48
  i+=1
52
- }
53
- end
54
- def curve_field(field, item)
55
- out={}
56
- item=item.to_s
57
- @totals.each{|value,n|
58
- count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
59
- out[value]=count_value.to_f/n.to_f
60
- }
61
- out
62
- end
63
49
  end
50
+ end
51
+ def curve_field(field, item)
52
+ out={}
53
+ item=item.to_s
54
+ @totals.each{|value,n|
55
+ count_value= @counts[field][value][item].nil? ? 0 : @counts[field][value][item]
56
+ out[value]=count_value.to_f/n.to_f
57
+ }
58
+ out
59
+ end
60
+ end
64
61
  class ItemAnalysis
65
- attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized
62
+ attr_reader :mean, :sd,:valid_n, :alpha , :alpha_standarized
66
63
  def initialize(ds)
67
- @ds=ds.dup_only_valid
68
- @total=@ds.vector_sum
69
- @mean=@total.mean
70
- @median=@total.median
71
- @skew=@total.skew
72
- @kurtosis=@total.kurtosis
73
- @sd=@total.sdp
74
- @valid_n=@total.size
75
- begin
76
- @alpha=Statsample::Reliability.cronbach_alpha(ds)
77
- @alpha_standarized=Statsample::Reliability.cronbach_alpha_standarized(ds)
78
- rescue => e
79
- raise DatasetException.new(@ds,e), "Problem on calculate alpha"
80
- end
64
+ @ds=ds.dup_only_valid
65
+ @total=@ds.vector_sum
66
+ @item_mean=@ds.vector_mean.mean
67
+ @mean=@total.mean
68
+ @median=@total.median
69
+ @skew=@total.skew
70
+ @kurtosis=@total.kurtosis
71
+ @sd = @total.sd
72
+ @valid_n = @total.size
73
+ begin
74
+ @alpha = Statsample::Reliability.cronbach_alpha(ds)
75
+ @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
76
+ rescue => e
77
+ raise DatasetException.new(@ds,e), "Problem on calculate alpha"
78
+ end
81
79
  end
82
80
  # Returns a hash with structure
83
81
  def item_characteristic_curve
84
- i=0
85
- out={}
86
- total={}
87
- @ds.each{|row|
88
- tot=@total[i]
89
- @ds.fields.each {|f|
90
- out[f]||= {}
91
- total[f]||={}
92
- out[f][tot]||= 0
93
- total[f][tot]||=0
94
- out[f][tot]+= row[f]
95
- total[f][tot]+=1
96
- }
97
- i+=1
98
- }
99
- total.each{|f,var|
100
- var.each{|tot,v|
101
- out[f][tot]=out[f][tot].to_f / total[f][tot]
102
- }
103
- }
104
- out
82
+ i=0
83
+ out={}
84
+ total={}
85
+ @ds.each do |row|
86
+ tot=@total[i]
87
+ @ds.fields.each do |f|
88
+ out[f]||= {}
89
+ total[f]||={}
90
+ out[f][tot]||= 0
91
+ total[f][tot]||=0
92
+ out[f][tot]+= row[f]
93
+ total[f][tot]+=1
94
+ end
95
+ i+=1
96
+ end
97
+ total.each do |f,var|
98
+ var.each do |tot,v|
99
+ out[f][tot]=out[f][tot].to_f / total[f][tot]
100
+ end
101
+ end
102
+ out
105
103
  end
106
- def gnuplot_item_characteristic_curve(directory, base="crd",options={})
107
- require 'gnuplot'
108
-
109
- crd=item_characteristic_curve
110
- @ds.fields.each {|f|
111
- x=[]
112
- y=[]
113
- Gnuplot.open do |gp|
114
- Gnuplot::Plot.new( gp ) do |plot|
115
- crd[f].sort.each{|tot,prop|
116
- x.push(tot)
117
- y.push((prop*100).to_i.to_f/100)
118
- }
119
- plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
120
- ds.with = "linespoints"
121
- ds.notitle
122
- end
123
-
124
- end
125
- end
126
- }
127
-
128
- end
129
- def svggraph_item_characteristic_curve(directory, base="icc",options={})
130
- require 'statsample/graph/svggraph'
131
- crd=ItemCharacteristicCurve.new(@ds)
132
- @ds.fields.each {|f|
133
- factors=@ds[f].factors.sort
134
- options={
135
- :height=>500,
136
- :width=>800,
137
- :key=>true
138
- }.update(options)
139
- graph = ::SVG::Graph::Plot.new(options)
140
- factors.each{|factor|
141
- factor=factor.to_s
142
- dataset=[]
143
- crd.curve_field(f, factor).each{|tot,prop|
144
- dataset.push(tot)
145
- dataset.push((prop*100).to_i.to_f/100)
146
- }
147
- graph.add_data({
148
- :title=>"#{factor}",
149
- :data=>dataset
150
- })
151
- }
152
- File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
153
- fp.puts(graph.burn())
154
- }
155
- }
156
-
157
- end
104
+ def gnuplot_item_characteristic_curve(directory, base="crd",options={})
105
+ require 'gnuplot'
106
+
107
+ crd=item_characteristic_curve
108
+ @ds.fields.each {|f|
109
+ x=[]
110
+ y=[]
111
+ Gnuplot.open do |gp|
112
+ Gnuplot::Plot.new( gp ) do |plot|
113
+ crd[f].sort.each{|tot,prop|
114
+ x.push(tot)
115
+ y.push((prop*100).to_i.to_f/100)
116
+ }
117
+ plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
118
+ ds.with = "linespoints"
119
+ ds.notitle
120
+ end
121
+
122
+ end
123
+ end
124
+ }
125
+
126
+ end
127
+ def svggraph_item_characteristic_curve(directory, base="icc",options={})
128
+ require 'statsample/graph/svggraph'
129
+ crd=ItemCharacteristicCurve.new(@ds)
130
+ @ds.fields.each {|f|
131
+ factors=@ds[f].factors.sort
132
+ options={
133
+ :height=>500,
134
+ :width=>800,
135
+ :key=>true
136
+ }.update(options)
137
+ graph = ::SVG::Graph::Plot.new(options)
138
+ factors.each{|factor|
139
+ factor=factor.to_s
140
+ dataset=[]
141
+ crd.curve_field(f, factor).each{|tot,prop|
142
+ dataset.push(tot)
143
+ dataset.push((prop*100).to_i.to_f/100)
144
+ }
145
+ graph.add_data({
146
+ :title=>"#{factor}",
147
+ :data=>dataset
148
+ })
149
+ }
150
+ File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
151
+ fp.puts(graph.burn())
152
+ }
153
+ }
154
+
155
+ end
158
156
  def item_total_correlation
159
157
  @ds.fields.inject({}) do |a,v|
160
158
  vector=@ds[v].dup
@@ -163,7 +161,7 @@ module Statsample
163
161
  total=ds2.vector_sum
164
162
  a[v]=Statsample::Bivariate.pearson(vector,total)
165
163
  a
166
- end
164
+ end
167
165
  end
168
166
  def item_statistics
169
167
  @ds.fields.inject({}) do |a,v|
@@ -171,9 +169,29 @@ module Statsample
171
169
  a
172
170
  end
173
171
  end
174
-
172
+ # Returns a dataset with cases ordered by score
173
+ # and variables ordered by difficulty
174
+
175
+ def item_difficulty_analysis
176
+ dif={}
177
+ @ds.fields.each{|f| dif[f]=@ds[f].mean }
178
+ dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
179
+ scores_sort={}
180
+ scores=@ds.vector_mean
181
+ scores.each_index{|i| scores_sort[i]=scores[i] }
182
+ scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
183
+ ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
184
+ scores_sort.each do |i,score|
185
+ row=[i, score]
186
+ case_row=@ds.case_as_hash(i)
187
+ dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
188
+ ds_new.add_case_array(row)
189
+ end
190
+ ds_new.update_valid_data
191
+ ds_new
192
+ end
175
193
  def stats_if_deleted
176
- @ds.fields.inject({}){|a,v|
194
+ @ds.fields.inject({}) do |a,v|
177
195
  ds2=@ds.dup
178
196
  ds2.delete_vector(v)
179
197
  total=ds2.vector_sum
@@ -183,13 +201,15 @@ module Statsample
183
201
  a[v][:variance_sample]=total.variance_sample
184
202
  a[v][:alpha]=Statsample::Reliability.cronbach_alpha(ds2)
185
203
  a
186
- }
204
+ end
187
205
  end
188
206
  def html_summary
189
207
  html = <<EOF
190
208
  <p><strong>Summary for scale:</strong></p>
191
209
  <ul>
192
- <li>Mean=#{@mean}</li>
210
+ <li>Items=#{@ds.fields.size}</li>
211
+ <li>Total Mean=#{@mean}</li>
212
+ <li>Item Mean=#{@item_mean}</li>
193
213
  <li>Std.Dv.=#{@sd}</li>
194
214
  <li>Median=#{@median}</li>
195
215
  <li>Skewness=#{sprintf("%0.3f",@skew)}</li>