statsample 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,181 @@
1
+ require 'SVG/Graph/Bar'
2
+ require 'SVG/Graph/BarHorizontal'
3
+ require 'SVG/Graph/Pie'
4
+ require 'SVG/Graph/Line'
5
+ require 'SVG/Graph/Plot'
6
+ require 'statsample/graph/svghistogram'
7
+
8
+ module Statsample
9
+ class Nominal
10
+ # Creates a barchart using ruby-gdchart
11
+ def svggraph_frequencies(file, width=600, height=300, chart_type=SVG::Graph::BarNoOp, options={})
12
+ labels,data=[],[]
13
+ self.frequencies.sort.each{|k,v|
14
+ labels.push(k.to_s)
15
+ data.push(v)
16
+ }
17
+ options[:height]=height
18
+ options[:width]=width
19
+ options[:fields]=labels
20
+ graph = chart_type.new(options)
21
+ graph.add_data(
22
+ :data => data,
23
+ :title => "Frequencies"
24
+ )
25
+ File.open(file,"w") {|f|
26
+ f.puts(graph.burn)
27
+ }
28
+ end
29
+ end
30
+ class Scale < Ordinal
31
+ def svggraph_histogram(bins, options={})
32
+ options={:graph_title=>"Histogram", :show_graph_title=>true,:show_normal=>true, :mean=>self.mean, :sigma=>sdp }.merge! options
33
+ graph = Statsample::Graph::SvgHistogram.new(options)
34
+ graph.histogram=histogram(bins)
35
+ graph
36
+ end
37
+ # Returns a Run-Sequence Plot
38
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/runseqpl.htm
39
+ def svggraph_runsequence_plot(options={})
40
+ options={:graph_title=>"Run-Sequence Plot", :show_graph_title=>true, :scale_x_integers => true, :add_popups=>true }.merge! options
41
+ vx=(1..@data.size).to_a.to_vector(:scale)
42
+ vy=@data.to_vector(:scale)
43
+ ds={'index'=>vx,'value'=>vy}.to_dataset
44
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
45
+ graph.set_x('index')
46
+ graph.parse
47
+ graph
48
+ end
49
+ def svggraph_boxplot(options={})
50
+ options={:graph_title=>"Boxplot", :fields=>['vector'], :show_graph_title=>true}.merge! options
51
+ vx=@data.to_a.to_vector(:scale)
52
+ graph = Statsample::Graph::SvgBoxplot.new(options)
53
+ graph.add_data(:title=>"vector", :data=>@data.to_a)
54
+ graph
55
+ end
56
+
57
+ def svggraph_lag_plot(options={})
58
+ options={:graph_title=>"Lag Plot", :show_graph_title=>true}.merge! options
59
+ vx=@data[0...(@data.size-1)].to_vector(:scale)
60
+ vy=@data[1...@data.size].to_vector(:scale)
61
+ ds={'x_minus_1'=>vx,'x'=>vy}.to_dataset
62
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
63
+ graph.set_x('x_minus_1')
64
+ graph.parse
65
+ graph
66
+ end
67
+
68
+ # Returns a Normal Probability Plot
69
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
70
+ def svggraph_normalprobability_plot(options={})
71
+ extend Statsample::Util
72
+
73
+ options={:graph_title=>"Normal Probability Plot", :show_graph_title=>true}.merge! options
74
+ n=@data.size
75
+ vx=(1..@data.size).to_a.collect{|i|
76
+ GSL::Cdf.gaussian_Pinv(normal_order_statistic_medians(i,n))
77
+ }.to_vector(:scale)
78
+ vy=@data.sort.to_vector(:scale)
79
+ ds={'normal_order_statistics_medians'=>vx, 'ordered_response'=>vy}.to_dataset
80
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
81
+ graph.set_x('normal_order_statistics_medians')
82
+ graph.parse
83
+ graph
84
+ end
85
+ end
86
+ end
87
+
88
+ # replaces all key and fill classes with similar ones, without opacity
89
+ # this allows rendering of svg and png on rox and gqview without problems
90
+ module SVG
91
+ module Graph
92
+ class BarNoOp < Bar
93
+ def get_css; SVG::Graph.get_css_standard; end
94
+ end
95
+ class BarHorizontalNoOp < BarHorizontal
96
+ def get_css; SVG::Graph.get_css_standard; end
97
+ end
98
+
99
+ class LineNoOp < Line
100
+ def get_css; SVG::Graph.get_css_standard; end
101
+
102
+ end
103
+ class PlotNoOp < Plot
104
+ def get_css; SVG::Graph.get_css_standard; end
105
+ end
106
+ class PieNoOp < Pie
107
+ def get_css; SVG::Graph.get_css_standard; end
108
+
109
+ end
110
+ class << self
111
+ def get_css_standard
112
+ return <<EOL
113
+ /* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
114
+ .key1,.fill1{
115
+ fill: #ff0000;
116
+ stroke: none;
117
+ stroke-width: 0.5px;
118
+ }
119
+ .key2,.fill2{
120
+ fill: #0000ff;
121
+ stroke: none;
122
+ stroke-width: 1px;
123
+ }
124
+ .key3,.fill3{
125
+ fill: #00ff00;
126
+ stroke: none;
127
+ stroke-width: 1px;
128
+ }
129
+ .key4,.fill4{
130
+ fill: #ffcc00;
131
+ stroke: none;
132
+ stroke-width: 1px;
133
+ }
134
+ .key5,.fill5{
135
+ fill: #00ccff;
136
+ stroke: none;
137
+ stroke-width: 1px;
138
+ }
139
+ .key6,.fill6{
140
+ fill: #ff00ff;
141
+ stroke: none;
142
+ stroke-width: 1px;
143
+ }
144
+ .key7,.fill7{
145
+ fill: #00ffff;
146
+ stroke: none;
147
+ stroke-width: 1px;
148
+ }
149
+ .key8,.fill8{
150
+ fill: #ffff00;
151
+ stroke: none;
152
+ stroke-width: 1px;
153
+ }
154
+ .key9,.fill9{
155
+ fill: #cc6666;
156
+ stroke: none;
157
+ stroke-width: 1px;
158
+ }
159
+ .key10,.fill10{
160
+ fill: #663399;
161
+ stroke: none;
162
+ stroke-width: 1px;
163
+ }
164
+ .key11,.fill11{
165
+ fill: #339900;
166
+ stroke: none;
167
+ stroke-width: 1px;
168
+ }
169
+ .key12,.fill12{
170
+ fill: #9966FF;
171
+ stroke: none;
172
+ stroke-width: 1px;
173
+ }
174
+ EOL
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ require 'statsample/graph/svgscatterplot'
181
+ require 'statsample/graph/svgboxplot'
@@ -0,0 +1,208 @@
1
+ module Statsample
2
+ module Graph
3
+ class SvgHistogram < SVG::Graph::BarBase
4
+ attr_accessor :inner_margin, :mean, :sigma, :show_normal
5
+ def initialize(config)
6
+ config[:fields]=[:dummy]
7
+ super(config)
8
+
9
+ @histogram=nil
10
+ end
11
+ include REXML
12
+
13
+ # In addition to the defaults set in Graph::initialize, sets
14
+ # [inner_margin] 14
15
+ # [key] false
16
+ def set_defaults
17
+ super
18
+ self.top_align = self.top_font = 0
19
+ init_with({
20
+ :inner_margin=>16,
21
+ :key=>false,
22
+ :show_normal=>false
23
+ })
24
+
25
+ end
26
+
27
+ def histogram=(h)
28
+ @histogram=h
29
+ @data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
30
+ @histogram[i]
31
+ }}]
32
+ end
33
+ def get_x_labels
34
+ [""]
35
+ end
36
+
37
+ def get_y_labels
38
+ maxvalue = max_value
39
+ minvalue = min_value
40
+ range = maxvalue - minvalue
41
+
42
+ top_pad = range == 0 ? 10 : range / 20.0
43
+ scale_range = (maxvalue + top_pad) - minvalue
44
+
45
+ scale_division = scale_divisions || (scale_range / 10.0)
46
+
47
+ if scale_integers
48
+ scale_division = scale_division < 1 ? 1 : scale_division.round
49
+ end
50
+
51
+ rv = []
52
+ maxvalue = maxvalue%scale_division == 0 ?
53
+ maxvalue : maxvalue + scale_division
54
+ minvalue.step( maxvalue, scale_division ) {|v| rv << v}
55
+ return rv
56
+ end
57
+
58
+ def unit_width
59
+ (@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
60
+ end
61
+ def draw_x_label(v)
62
+ left = (v - @histogram.min)*unit_width
63
+ x=@inner_margin+left
64
+ text = @graph.add_element( "text" )
65
+ text.attributes["class"] = "xAxisLabels"
66
+ text.text = sprintf("%0.2f",v)
67
+ y = @graph_height + x_label_font_size + 3
68
+ text.attributes["x"] = x.to_s
69
+ text.attributes["y"] = y.to_s
70
+
71
+ end
72
+ def draw_x_labels
73
+ if show_x_labels
74
+ (0...@histogram.bins).each { |i|
75
+ value = @histogram[i]
76
+ range = @histogram.get_range(i)
77
+ draw_x_label(range[0])
78
+ if(i==(@histogram.bins)-1)
79
+ draw_x_label(range[1])
80
+ end
81
+ }
82
+ end
83
+ end
84
+ def draw_data
85
+ minvalue = min_value
86
+ fieldwidth = field_width
87
+
88
+ unit_size = (@graph_height.to_f - font_size*2*top_font) /
89
+ (get_y_labels.max - get_y_labels.min)
90
+ bottom = @graph_height
91
+ field_count = 0
92
+ hist_min=@histogram.min
93
+ hist_max=@histogram.max
94
+ range_hist=hist_max-hist_min
95
+ total=0
96
+ (0...@histogram.bins).each { |i|
97
+ dataset_count = 0
98
+ value = @histogram[i]
99
+ total=total+value
100
+ range = @histogram.get_range(i)
101
+ left = (range[0] - hist_min)*unit_width
102
+ bar_width = (range[1] - hist_min)*unit_width - left
103
+ length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
104
+ # top is 0 if value is negative
105
+ top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
106
+ @graph.add_element( "rect", {
107
+ "x" => (@inner_margin+left).to_s,
108
+ "y" => top.to_s,
109
+ "width" => bar_width.to_s,
110
+ "height" => length.to_s,
111
+ "class" => "fill#{dataset_count+1}"
112
+ })
113
+ make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
114
+ field_count += 1
115
+ }
116
+ if(show_normal)
117
+ divs=30
118
+ path=""
119
+ 0.upto(divs) {|i|
120
+ x_abs=hist_min+(range_hist/divs)*i
121
+ y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
122
+ xg=@inner_margin+((x_abs-hist_min)*unit_width)
123
+ yg=bottom-(y-minvalue)*unit_size
124
+ if i==0
125
+ path="M#{xg} #{yg} "
126
+ else
127
+ path+="L#{xg} #{yg} "
128
+ end
129
+ }
130
+ @graph.add_element("path", {
131
+ "d"=>path,
132
+ "style"=>"stroke:black;fill:none"
133
+ }
134
+ )
135
+ end
136
+ end
137
+
138
+
139
+ def get_css
140
+ return <<EOL
141
+ /* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
142
+
143
+ .key1,.fill1{
144
+ fill: #ff0000;
145
+ stroke: black;
146
+ stroke-width: 1px;
147
+ }
148
+ .key2,.fill2{
149
+ fill: #0000ff;
150
+ stroke: black;
151
+ stroke-width: 1px;
152
+ }
153
+ .key3,.fill3{
154
+ fill: #00ff00;
155
+ stroke: none;
156
+ stroke-width: 1px;
157
+ }
158
+ .key4,.fill4{
159
+ fill: #ffcc00;
160
+ stroke: none;
161
+ stroke-width: 1px;
162
+ }
163
+ .key5,.fill5{
164
+ fill: #00ccff;
165
+ stroke: none;
166
+ stroke-width: 1px;
167
+ }
168
+ .key6,.fill6{
169
+ fill: #ff00ff;
170
+ stroke: none;
171
+ stroke-width: 1px;
172
+ }
173
+ .key7,.fill7{
174
+ fill: #00ffff;
175
+ stroke: none;
176
+ stroke-width: 1px;
177
+ }
178
+ .key8,.fill8{
179
+ fill: #ffff00;
180
+ stroke: none;
181
+ stroke-width: 1px;
182
+ }
183
+ .key9,.fill9{
184
+ fill: #cc6666;
185
+ stroke: none;
186
+ stroke-width: 1px;
187
+ }
188
+ .key10,.fill10{
189
+ fill: #663399;
190
+ stroke: none;
191
+ stroke-width: 1px;
192
+ }
193
+ .key11,.fill11{
194
+ fill: #339900;
195
+ stroke: none;
196
+ stroke-width: 1px;
197
+ }
198
+ .key12,.fill12{
199
+ fill: #9966FF;
200
+ stroke: none;
201
+ stroke-width: 1px;
202
+ }
203
+ EOL
204
+ end
205
+
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,111 @@
1
+ module Statsample
2
+ module Graph
3
+ class SvgScatterplot < SVG::Graph::Plot
4
+ attr_accessor :draw_path
5
+ def initialize(ds,config={})
6
+ super(config)
7
+ @ds=ds
8
+ set_x(@ds.fields[0])
9
+ end
10
+ def set_defaults
11
+ super
12
+ init_with(
13
+ :show_data_values => false,
14
+ :draw_path => false
15
+ )
16
+ end
17
+ def set_x(x)
18
+ @x=x
19
+ @y=@ds.fields - [x]
20
+ end
21
+ def parse
22
+ data=@y.inject({}){|a,v| a[v]=[];a}
23
+ @ds.each{|row|
24
+ @y.each{|y|
25
+ data[y]+=[row[@x],row[y]] unless row[@x].nil? or row[y].nil?
26
+ }
27
+ }
28
+ data.each{|y,d|
29
+ add_data({
30
+ :data=>d, :title=>@ds.vector_label(y)
31
+ })
32
+ }
33
+ end
34
+ def get_x_labels
35
+ values=super
36
+ values.collect{|x|
37
+ if x.is_a? Integer
38
+ x
39
+ else
40
+ sprintf("%0.2f",x).to_f
41
+ end
42
+ }
43
+ end
44
+ def get_y_labels
45
+ values=super
46
+ values.collect{|x|
47
+ if x.is_a? Integer
48
+ x
49
+ else
50
+ sprintf("%0.2f",x).to_f
51
+ end
52
+ }
53
+ end
54
+ def draw_data
55
+ line = 1
56
+
57
+ x_min, x_max, x_div = x_range
58
+ y_min, y_max, y_div = y_range
59
+ x_step = (@graph_width.to_f - font_size*2) / (x_max-x_min)
60
+ y_step = (@graph_height.to_f - font_size*2) / (y_max-y_min)
61
+
62
+ for data in @data
63
+ x_points = data[:data][X]
64
+ y_points = data[:data][Y]
65
+
66
+ lpath = "L"
67
+ x_start = 0
68
+ y_start = 0
69
+ x_points.each_index { |idx|
70
+ x = (x_points[idx] - x_min) * x_step
71
+ y = @graph_height - (y_points[idx] - y_min) * y_step
72
+ x_start, y_start = x,y if idx == 0
73
+ lpath << "#{x} #{y} "
74
+ }
75
+
76
+ if area_fill
77
+ @graph.add_element( "path", {
78
+ "d" => "M#{x_start} #@graph_height #{lpath} V#@graph_height Z",
79
+ "class" => "fill#{line}"
80
+ })
81
+ end
82
+ if draw_path
83
+ @graph.add_element( "path", {
84
+ "d" => "M#{x_start} #{y_start} #{lpath}",
85
+ "class" => "line#{line}"
86
+ })
87
+ end
88
+ if show_data_points || show_data_values
89
+ x_points.each_index { |idx|
90
+ x = (x_points[idx] - x_min) * x_step
91
+ y = @graph_height - (y_points[idx] - y_min) * y_step
92
+ if show_data_points
93
+ @graph.add_element( "circle", {
94
+ "cx" => x.to_s,
95
+ "cy" => y.to_s,
96
+ "r" => "2.5",
97
+ "class" => "dataPoint#{line}"
98
+ })
99
+ add_popup(x, y, format( x_points[idx], y_points[idx] )) if add_popups
100
+ end
101
+ make_datapoint_text( x, y-6, y_points[idx] ) if show_data_values
102
+ }
103
+ end
104
+ line += 1
105
+ end
106
+ end
107
+
108
+
109
+ end
110
+ end
111
+ end