statsample 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +7 -0
  3. data/Manifest.txt +6 -4
  4. data/README.txt +5 -1
  5. data/Rakefile +1 -1
  6. data/examples/boxplot.rb +17 -0
  7. data/examples/dominance_analysis_bootstrap.rb +5 -0
  8. data/examples/histogram.rb +14 -0
  9. data/examples/scatterplot.rb +4 -3
  10. data/lib/distribution/normalbivariate.rb +1 -1
  11. data/lib/statsample.rb +16 -3
  12. data/lib/statsample/bivariate.rb +4 -2
  13. data/lib/statsample/converter/csv.rb +0 -2
  14. data/lib/statsample/converters.rb +13 -1
  15. data/lib/statsample/dataset.rb +23 -15
  16. data/lib/statsample/dominanceanalysis.rb +3 -2
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -1
  18. data/lib/statsample/factor/parallelanalysis.rb +1 -1
  19. data/lib/statsample/factor/principalaxis.rb +1 -1
  20. data/lib/statsample/graph.rb +2 -0
  21. data/lib/statsample/graph/boxplot.rb +234 -0
  22. data/lib/statsample/graph/histogram.rb +133 -0
  23. data/lib/statsample/graph/scatterplot.rb +1 -9
  24. data/lib/statsample/histogram.rb +47 -11
  25. data/lib/statsample/mle.rb +4 -4
  26. data/lib/statsample/mle/normal.rb +3 -3
  27. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  28. data/lib/statsample/regression/multiple/gslengine.rb +0 -1
  29. data/lib/statsample/regression/multiple/matrixengine.rb +1 -1
  30. data/lib/statsample/reliability.rb +1 -0
  31. data/lib/statsample/reliability/scaleanalysis.rb +3 -51
  32. data/lib/statsample/reliability/skillscaleanalysis.rb +93 -0
  33. data/lib/statsample/srs.rb +1 -1
  34. data/lib/statsample/test/umannwhitney.rb +1 -1
  35. data/lib/statsample/vector.rb +13 -36
  36. data/test/test_factor.rb +1 -1
  37. data/test/test_ggobi.rb +0 -5
  38. data/test/test_histogram.rb +75 -18
  39. data/test/test_mle.rb +0 -44
  40. data/test/test_reliability_skillscale.rb +41 -0
  41. data/test/test_statistics.rb +3 -3
  42. data/test/test_stest.rb +2 -2
  43. data/test/test_vector.rb +13 -8
  44. metadata +36 -18
  45. metadata.gz.sig +0 -0
  46. data/lib/statsample/combination.rb +0 -114
  47. data/lib/statsample/permutation.rb +0 -98
  48. data/test/test_combination.rb +0 -37
  49. data/test/test_permutation.rb +0 -42
@@ -0,0 +1,133 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+ class Histogram
5
+ include Summarizable
6
+ attr_accessor :name
7
+ # Total width of Boxplot
8
+ attr_accessor :width
9
+ # Total height of Boxplot
10
+ attr_accessor :height
11
+ # Top margin
12
+ attr_accessor :margin_top
13
+ # Bottom margin
14
+ attr_accessor :margin_bottom
15
+ # Left margin
16
+ attr_accessor :margin_left
17
+ # Right margin
18
+ attr_accessor :margin_right
19
+ attr_reader :hist
20
+ # Could be an array of ranges or number of bins
21
+ attr_accessor :bins
22
+ # data could be a vector or a histogram
23
+ def initialize(data,opts=Hash.new)
24
+ prov_name=data.respond_to? :name ? data.name : ""
25
+ opts_default={
26
+ :name=>_("Histograma (%s)") % prov_name,
27
+ :width=>400,
28
+ :height=>300,
29
+ :margin_top=>10,
30
+ :margin_bottom=>20,
31
+ :margin_left=>20,
32
+ :margin_right=>20,
33
+ :bins=>nil
34
+ }
35
+ @opts=opts_default.merge(opts)
36
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
37
+ @data=data
38
+ end
39
+ def pre_vis
40
+ if @data.is_a? Statsample::Histogram
41
+ @hist=@data
42
+ elsif @data.is_a? Statsample::Vector
43
+ @bins||=Math::sqrt(@data.size).floor
44
+ @hist=@data.histogram(@bins)
45
+ end
46
+ end
47
+ # Returns a Rubyvis panel with scatterplot
48
+ def rubyvis_panel # :nodoc:
49
+ pre_vis
50
+ that=self
51
+
52
+ max_bin = @hist.max_val
53
+
54
+ margin_hor=margin_left + margin_right
55
+ margin_vert=margin_top + margin_bottom
56
+
57
+ x_scale = pv.Scale.linear(@hist.min, @hist.max).range(0,width-margin_hor)
58
+
59
+ y_scale=Rubyvis::Scale.linear(0,max_bin).range(0, height-margin_vert)
60
+
61
+ y_scale.nice
62
+ max_range=@hist.max
63
+ bins=@hist.bins.times.map {|i|
64
+ {
65
+ :low =>@hist.get_range(i)[0],
66
+ :high=>@hist.get_range(i)[1],
67
+ :value=>@hist.bin[i]
68
+ }
69
+ }
70
+ # cache data
71
+ vis=Rubyvis::Panel.new do |pan|
72
+ pan.width width - margin_hor
73
+ pan.height height - margin_vert
74
+ pan.bottom margin_bottom
75
+ pan.left margin_left
76
+ pan.right margin_right
77
+ pan.top margin_top
78
+ # Y axis
79
+ pan.rule do
80
+ data y_scale.ticks
81
+ bottom y_scale
82
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
83
+ label(:anchor=>'left') do
84
+ text y_scale.tick_format
85
+ end
86
+ end
87
+ # X axis
88
+ pan.rule do
89
+ data x_scale.ticks
90
+ left x_scale
91
+ stroke_style "black"
92
+ height 5
93
+ bottom -5
94
+ label(:anchor=>'bottom') do
95
+ text x_scale.tick_format
96
+ end
97
+ end
98
+
99
+ pan.bar do |bar|
100
+ bar.data(bins)
101
+ bar.left {|v| x_scale.scale(v[:low])}
102
+ bar.width {|v| x_scale.scale(v[:high]) - x_scale.scale(v[:low])}
103
+ bar.bottom 0
104
+ bar.height {|v| y_scale.scale(v[:value])}
105
+ bar.stroke_style "black"
106
+ bar.line_width 1
107
+ end
108
+ end
109
+ end
110
+ # Returns SVG with scatterplot
111
+ def to_svg
112
+ rp=rubyvis_panel
113
+ rp.render
114
+ rp.to_svg
115
+ end
116
+ def report_building(builder) # :nodoc:
117
+ builder.section(:name=>name) do |b|
118
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
119
+ end
120
+ end
121
+ def report_building_text(generator)
122
+ pre_vis
123
+ #anchor=generator.toc_entry(_("Histogram %s") % [@name])
124
+ step= @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
125
+
126
+ @hist.range.each_with_index do |r,i|
127
+ next if i==@hist.bins
128
+ generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
@@ -1,4 +1,3 @@
1
- require 'tmpdir'
2
1
  require 'rubyvis'
3
2
  module Statsample
4
3
  module Graph
@@ -152,15 +151,8 @@ module Statsample
152
151
  rp.to_svg
153
152
  end
154
153
  def report_building(builder) # :nodoc:
155
- img_svg=to_svg
156
154
  builder.section(:name=>name) do |b|
157
- Dir.mktmpdir {|dir|
158
- time=Time.new.to_f
159
- File.open("#{dir}/image_#{time}.svg","w") {|fp|
160
- fp.write img_svg
161
- }
162
- b.image("#{dir}/image_#{time}.svg", :width=>width, :height=>height)
163
- }
155
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
164
156
  end
165
157
 
166
158
  end
@@ -39,10 +39,24 @@ module Statsample
39
39
 
40
40
  class Histogram
41
41
  class << self
42
+ # Alloc +n_bins+, using +range+ as ranges of bins
42
43
  def alloc(n_bins, range=nil, opts=Hash.new)
43
- Histogram.new(n_bins, range)
44
+ Histogram.new(n_bins, range, opts)
44
45
 
45
46
  end
47
+ # Alloc +n_bins+ bins, using +p1+ as minimum and +p2+
48
+ # as maximum
49
+ def alloc_uniform(n_bins, p1=nil,p2=nil)
50
+ if p1.is_a? Array
51
+ min,max=p1
52
+ else
53
+ min,max=p1,p2
54
+ end
55
+ range=max - min
56
+ step=range / n_bins.to_f
57
+ range=(n_bins+1).times.map {|i| min + (step*i)}
58
+ Histogram.new(range)
59
+ end
46
60
  end
47
61
  attr_accessor :name
48
62
  attr_reader :bin
@@ -53,25 +67,29 @@ module Statsample
53
67
 
54
68
  if p1.is_a? Array
55
69
  range=p1
56
- n_bins=p1.size-1
70
+ @n_bins=p1.size-1
57
71
  elsif p1.is_a? Integer
58
- n_bins=p1
72
+ @n_bins=p1
59
73
  end
60
74
 
61
- @bin=[0.0]*(n_bins)
75
+ @bin=[0.0]*(@n_bins)
62
76
  if(min_max)
63
77
  min, max=min_max[0], min_max[1]
64
- range=Array.new(n_bins+1)
65
- (n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(n_bins)) }
78
+ range=Array.new(@n_bins+1)
79
+ (@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
66
80
  end
67
- range||=[0.0]*(n_bins+1)
81
+ range||=[0.0]*(@n_bins+1)
68
82
  set_ranges(range)
69
83
  @name=""
70
84
  opts.each{|k,v|
71
85
  self.send("#{k}=",v) if self.respond_to? k
72
86
  }
73
87
  end
74
-
88
+ # Number of bins
89
+ def bins
90
+ @n_bins
91
+ end
92
+ #
75
93
  def increment(x, w=1)
76
94
  if x.is_a? Array
77
95
  x.each{|y| increment(y,w) }
@@ -88,11 +106,29 @@ module Statsample
88
106
  raise "Range size should be bin+1" if range.size!=@bin.size+1
89
107
  @range=range
90
108
  end
109
+ def get_range(i)
110
+ [@range[i],@range[i+1]]
111
+ end
112
+ def max
113
+ @range.last
114
+ end
115
+ def min
116
+ @range.first
117
+ end
118
+ def max_val
119
+ @bin.max
120
+ end
121
+ def min_val
122
+ @bin.min
123
+ end
124
+ def report_building(generator)
125
+ hg=Statsample::Graph::Histogram.new(self)
126
+ generator.parse_element(hg)
127
+ end
91
128
  def report_building_text(generator)
92
- anchor=generator.toc_entry(_("Histogram %s") % [@name])
93
- range.each_with_index do |r,i|
129
+ @range.each_with_index do |r,i|
94
130
  next if i==@bin.size
95
- generator.text(sprintf("%4.2f : %d", r, @bin[i]))
131
+ generator.text(sprintf("%5.2f : %d", r, @bin[i]))
96
132
  end
97
133
  end
98
134
  end
@@ -41,7 +41,7 @@ module Statsample
41
41
  x.row_size.times{|i|
42
42
  xi=Matrix.rows([x.row(i).to_a.collect{|v| v.to_f}])
43
43
  y_val=y[i,0].to_f
44
- fbx=f(b,x)
44
+ #fbx=f(b,x)
45
45
  prod=prod*likehood_i(xi, y_val ,b)
46
46
  }
47
47
  prod
@@ -62,7 +62,7 @@ module Statsample
62
62
  def set_default_parameters(x)
63
63
  fd=[0.0]*x.column_size
64
64
  fd.push(0.1) if self.is_a? Statsample::MLE::Normal
65
- parameters = Matrix.columns([fd])
65
+ Matrix.columns([fd])
66
66
  end
67
67
 
68
68
  # Newton Raphson with automatic stopping criteria.
@@ -80,8 +80,8 @@ module Statsample
80
80
  parameters = start_values.dup
81
81
  end
82
82
  k=parameters.row_size
83
- cv=Matrix.rows([([1.0]*k)])
84
- last_diff=nil
83
+ #cv=Matrix.rows([([1.0]*k)])
84
+ #last_diff=nil
85
85
  raise "n on y != n on x" if x.row_size!=y.row_size
86
86
  h=nil
87
87
  fd=nil
@@ -39,7 +39,7 @@ module Statsample
39
39
  rows = Array.new(k+1)
40
40
  k.times{|i| rows[i] = [xte[i,0] / sigma2]}
41
41
  rows[k] = [ete[0,0] / (2*sigma4) - n / (2*sigma2)]
42
- fd = Matrix.rows(rows, true)
42
+ Matrix.rows(rows, true)
43
43
  end
44
44
 
45
45
  # second derivative for normal model
@@ -48,7 +48,7 @@ module Statsample
48
48
  raise "x.rows!=y.rows" if x.row_size!=y.row_size
49
49
  raise "x.columns+1!=p.rows" if x.column_size+1!=p.row_size
50
50
 
51
- n = x.row_size
51
+ #n = x.row_size
52
52
  k = x.column_size
53
53
  b = Array.new(k)
54
54
  k.times{|i| b[i]=[p[i,0]]}
@@ -76,7 +76,7 @@ module Statsample
76
76
  end
77
77
  last_row[k] = 2*sigma4 - ete[0,0] / sigma6
78
78
  rows[k] = last_row
79
- sd = Matrix.rows(rows, true)
79
+ Matrix.rows(rows, true)
80
80
  end
81
81
  end
82
82
  end
@@ -150,7 +150,7 @@ module Statsample
150
150
  # Estimated Variance-Covariance Matrix
151
151
  # Used for calculation of se of constant
152
152
  def estimated_variance_covariance_matrix
153
- mse_p=mse
153
+ #mse_p=mse
154
154
  columns=[]
155
155
  @ds_valid.fields.each{|k|
156
156
  v=@ds_valid[k]
@@ -66,7 +66,6 @@ class GslEngine < BaseEngine
66
66
  # Coefficients using a constant
67
67
  # Based on http://www.xycoon.com/ols1.htm
68
68
  def matrix_resolution
69
- mse_p=mse
70
69
  columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
71
70
  columns.unshift([1.0]*@ds.cases)
72
71
  y=Matrix.columns([@dy.data.map {|i| i.to_f}])
@@ -157,7 +157,7 @@ class MatrixEngine < BaseEngine
157
157
  #
158
158
  def coeffs_se
159
159
  out={}
160
- mse=sse.quo(df_e)
160
+ #mse=sse.quo(df_e)
161
161
  coeffs.each {|k,v|
162
162
  out[k]=@y_sd.quo(@x_sd[k])*Math::sqrt( 1.quo(tolerance(k)))*Math::sqrt((1-r2).quo(df_e))
163
163
  }
@@ -141,4 +141,5 @@ end # Statsample
141
141
 
142
142
  require 'statsample/reliability/icc.rb'
143
143
  require 'statsample/reliability/scaleanalysis.rb'
144
+ require 'statsample/reliability/skillscaleanalysis.rb'
144
145
  require 'statsample/reliability/multiscaleanalysis.rb'
@@ -40,7 +40,7 @@ module Statsample
40
40
  @sd = @total.sd
41
41
  @variance=@total.variance
42
42
  @valid_n = @total.size
43
- opts_default={:name=>"Reliability Analisis"}
43
+ opts_default={:name=>_("Reliability Analisis")}
44
44
  @opts=opts_default.merge(opts)
45
45
  @name=@opts[:name]
46
46
  @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
@@ -79,56 +79,6 @@ module Statsample
79
79
  end
80
80
  out
81
81
  end
82
- def gnuplot_item_characteristic_curve(directory, base="crd",options={})
83
- require 'gnuplot'
84
-
85
- crd=item_characteristic_curve
86
- @ds.fields.each do |f|
87
- x=[]
88
- y=[]
89
- Gnuplot.open do |gp|
90
- Gnuplot::Plot.new( gp ) do |plot|
91
- crd[f].sort.each do |tot,prop|
92
- x.push(tot)
93
- y.push((prop*100).to_i.to_f/100)
94
- end
95
- plot.data << Gnuplot::DataSet.new( [x, y] ) do |ds|
96
- ds.with = "linespoints"
97
- ds.notitle
98
- end
99
-
100
- end
101
- end
102
- end
103
- end
104
- def svggraph_item_characteristic_curve(directory, base="icc",options={})
105
- require 'statsample/graph/svggraph'
106
- crd=ItemCharacteristicCurve.new(@ds)
107
- @ds.fields.each do |f|
108
- factors=@ds[f].factors.sort
109
- options={
110
- :height=>500,
111
- :width=>800,
112
- :key=>true
113
- }.update(options)
114
- graph = ::SVG::Graph::Plot.new(options)
115
- factors.each do |factor|
116
- factor=factor.to_s
117
- dataset=[]
118
- crd.curve_field(f, factor).each do |tot,prop|
119
- dataset.push(tot)
120
- dataset.push((prop*100).to_i.to_f/100)
121
- end
122
- graph.add_data({
123
- :title=>"#{factor}",
124
- :data=>dataset
125
- })
126
- end
127
- File.open(directory+"/"+base+"_#{f}.svg","w") {|fp|
128
- fp.puts(graph.burn())
129
- }
130
- end
131
- end
132
82
  # =Adjusted R.P.B. for each item
133
83
  # Adjusted RPB(Point biserial-correlation) for each item
134
84
  #
@@ -172,9 +122,11 @@ module Statsample
172
122
  ds_new.update_valid_data
173
123
  ds_new
174
124
  end
125
+
175
126
  def stats_if_deleted
176
127
  @sif||=stats_if_deleted_intern
177
128
  end
129
+
178
130
  def stats_if_deleted_intern # :nodoc:
179
131
  return Hash.new if @ds.fields.size==1
180
132
  @ds.fields.inject({}) do |a,v|
@@ -0,0 +1,93 @@
1
+ module Statsample
2
+ module Reliability
3
+ # Analysis of a Skill Scale
4
+ # Given a dataset with results and a correct answers hash,
5
+ # generates a ScaleAnalysis
6
+ # == Usage
7
+ # x1=%{a b b c}.to_vector
8
+ # x2=%{b a b c}.to_vector
9
+ # x3=%{a c b a}.to_vector
10
+ # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3}.to_dataset
11
+ # key={'x1'=>'a','x2'=>'b','x3'=>'a'}
12
+ # ssa=Statsample::Reliability::SkillScaleAnalysis.new(ds,key)
13
+ # puts ssa.summary
14
+ class SkillScaleAnalysis
15
+ include Summarizable
16
+ attr_accessor :name
17
+ attr_accessor :summary_minimal_item_correlation
18
+ attr_accessor :summary_show_problematic_items
19
+ def initialize(ds,key,opts=Hash.new)
20
+ opts_default={
21
+ :name=>_("Skill Scale Reliability Analysis"),
22
+ :summary_minimal_item_correlation=>0.10,
23
+ :summary_show_problematic_items=>true
24
+ }
25
+ @ds=ds
26
+ @key=key
27
+ @opts=opts_default.merge(opts)
28
+ @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
29
+ @cds=nil
30
+ end
31
+ def corrected_dataset_minimal
32
+ cds=corrected_dataset
33
+ @key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
34
+ end
35
+ def vector_sum
36
+ corrected_dataset_minimal.vector_sum
37
+ end
38
+ def vector_mean
39
+ corrected_dataset_minimal.vector_mean
40
+ end
41
+ def scale_analysis
42
+ sa=ScaleAnalysis.new(corrected_dataset_minimal)
43
+ sa.name=_("%s (Scale Analysis)") % @name
44
+ sa
45
+ end
46
+ def corrected_dataset
47
+ if @cds.nil?
48
+ @cds=@ds.dup_empty
49
+ @key.keys.each {|k| @cds[k].type=:scale; @cds[k].name=@ds[k].name}
50
+ @ds.each do |row|
51
+ out={}
52
+ row.each do |k,v|
53
+ if @key.keys.include? k
54
+ if @ds[k].is_valid? v
55
+ out[k]= @key[k]==v ? 1 : 0
56
+ else
57
+ out[k]=nil
58
+ end
59
+ else
60
+ out[k]=v
61
+ end
62
+ end
63
+ @cds.add_case(out,false)
64
+ end
65
+ @cds.update_valid_data
66
+ end
67
+ @cds
68
+ end
69
+ def report_building(builder)
70
+ builder.section(:name=>@name) do |s|
71
+ sa=scale_analysis
72
+ s.parse_element(sa)
73
+ if summary_show_problematic_items
74
+ s.section(:name=>_("Problematic Items")) do |spi|
75
+ count=0
76
+ sa.item_total_correlation.each do |k,v|
77
+ if v<summary_minimal_item_correlation
78
+ count+=1
79
+ spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
80
+ spii.text _("Correct answer: %s") % @key[k]
81
+ spii.parse_element(@ds[k])
82
+ end
83
+ end
84
+ end
85
+ spi.text _("No problematic items") if count==0
86
+ end
87
+ end
88
+
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end