statsample 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +79 -0
  2. data/Manifest.txt +56 -0
  3. data/README.txt +77 -0
  4. data/Rakefile +22 -0
  5. data/bin/statsample +2 -0
  6. data/demo/benchmark.rb +52 -0
  7. data/demo/chi-square.rb +44 -0
  8. data/demo/dice.rb +13 -0
  9. data/demo/distribution_t.rb +95 -0
  10. data/demo/graph.rb +9 -0
  11. data/demo/item_analysis.rb +30 -0
  12. data/demo/mean.rb +81 -0
  13. data/demo/proportion.rb +57 -0
  14. data/demo/sample_test.csv +113 -0
  15. data/demo/strata_proportion.rb +152 -0
  16. data/demo/stratum.rb +141 -0
  17. data/lib/spss.rb +131 -0
  18. data/lib/statsample.rb +216 -0
  19. data/lib/statsample/anova.rb +74 -0
  20. data/lib/statsample/bivariate.rb +255 -0
  21. data/lib/statsample/chidistribution.rb +39 -0
  22. data/lib/statsample/codification.rb +120 -0
  23. data/lib/statsample/converters.rb +338 -0
  24. data/lib/statsample/crosstab.rb +122 -0
  25. data/lib/statsample/dataset.rb +526 -0
  26. data/lib/statsample/dominanceanalysis.rb +259 -0
  27. data/lib/statsample/dominanceanalysis/bootstrap.rb +126 -0
  28. data/lib/statsample/graph/gdchart.rb +45 -0
  29. data/lib/statsample/graph/svgboxplot.rb +108 -0
  30. data/lib/statsample/graph/svggraph.rb +181 -0
  31. data/lib/statsample/graph/svghistogram.rb +208 -0
  32. data/lib/statsample/graph/svgscatterplot.rb +111 -0
  33. data/lib/statsample/htmlreport.rb +232 -0
  34. data/lib/statsample/multiset.rb +281 -0
  35. data/lib/statsample/regression.rb +522 -0
  36. data/lib/statsample/reliability.rb +235 -0
  37. data/lib/statsample/resample.rb +20 -0
  38. data/lib/statsample/srs.rb +159 -0
  39. data/lib/statsample/test.rb +25 -0
  40. data/lib/statsample/vector.rb +759 -0
  41. data/test/_test_chart.rb +58 -0
  42. data/test/test_anova.rb +31 -0
  43. data/test/test_codification.rb +59 -0
  44. data/test/test_crosstab.rb +55 -0
  45. data/test/test_csv.csv +7 -0
  46. data/test/test_csv.rb +27 -0
  47. data/test/test_dataset.rb +293 -0
  48. data/test/test_ggobi.rb +42 -0
  49. data/test/test_multiset.rb +98 -0
  50. data/test/test_regression.rb +108 -0
  51. data/test/test_reliability.rb +32 -0
  52. data/test/test_resample.rb +23 -0
  53. data/test/test_srs.rb +14 -0
  54. data/test/test_statistics.rb +152 -0
  55. data/test/test_stratified.rb +19 -0
  56. data/test/test_svg_graph.rb +63 -0
  57. data/test/test_vector.rb +265 -0
  58. data/test/test_xls.rb +32 -0
  59. metadata +158 -0
@@ -0,0 +1,181 @@
1
+ require 'SVG/Graph/Bar'
2
+ require 'SVG/Graph/BarHorizontal'
3
+ require 'SVG/Graph/Pie'
4
+ require 'SVG/Graph/Line'
5
+ require 'SVG/Graph/Plot'
6
+ require 'statsample/graph/svghistogram'
7
+
8
+ module Statsample
9
+ class Nominal
10
+ # Creates a barchart using ruby-gdchart
11
+ def svggraph_frequencies(file, width=600, height=300, chart_type=SVG::Graph::BarNoOp, options={})
12
+ labels,data=[],[]
13
+ self.frequencies.sort.each{|k,v|
14
+ labels.push(k.to_s)
15
+ data.push(v)
16
+ }
17
+ options[:height]=height
18
+ options[:width]=width
19
+ options[:fields]=labels
20
+ graph = chart_type.new(options)
21
+ graph.add_data(
22
+ :data => data,
23
+ :title => "Frequencies"
24
+ )
25
+ File.open(file,"w") {|f|
26
+ f.puts(graph.burn)
27
+ }
28
+ end
29
+ end
30
+ class Scale < Ordinal
31
+ def svggraph_histogram(bins, options={})
32
+ options={:graph_title=>"Histogram", :show_graph_title=>true,:show_normal=>true, :mean=>self.mean, :sigma=>sdp }.merge! options
33
+ graph = Statsample::Graph::SvgHistogram.new(options)
34
+ graph.histogram=histogram(bins)
35
+ graph
36
+ end
37
+ # Returns a Run-Sequence Plot
38
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/runseqpl.htm
39
+ def svggraph_runsequence_plot(options={})
40
+ options={:graph_title=>"Run-Sequence Plot", :show_graph_title=>true, :scale_x_integers => true, :add_popups=>true }.merge! options
41
+ vx=(1..@data.size).to_a.to_vector(:scale)
42
+ vy=@data.to_vector(:scale)
43
+ ds={'index'=>vx,'value'=>vy}.to_dataset
44
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
45
+ graph.set_x('index')
46
+ graph.parse
47
+ graph
48
+ end
49
+ def svggraph_boxplot(options={})
50
+ options={:graph_title=>"Boxplot", :fields=>['vector'], :show_graph_title=>true}.merge! options
51
+ vx=@data.to_a.to_vector(:scale)
52
+ graph = Statsample::Graph::SvgBoxplot.new(options)
53
+ graph.add_data(:title=>"vector", :data=>@data.to_a)
54
+ graph
55
+ end
56
+
57
+ def svggraph_lag_plot(options={})
58
+ options={:graph_title=>"Lag Plot", :show_graph_title=>true}.merge! options
59
+ vx=@data[0...(@data.size-1)].to_vector(:scale)
60
+ vy=@data[1...@data.size].to_vector(:scale)
61
+ ds={'x_minus_1'=>vx,'x'=>vy}.to_dataset
62
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
63
+ graph.set_x('x_minus_1')
64
+ graph.parse
65
+ graph
66
+ end
67
+
68
+ # Returns a Normal Probability Plot
69
+ # Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
70
+ def svggraph_normalprobability_plot(options={})
71
+ extend Statsample::Util
72
+
73
+ options={:graph_title=>"Normal Probability Plot", :show_graph_title=>true}.merge! options
74
+ n=@data.size
75
+ vx=(1..@data.size).to_a.collect{|i|
76
+ GSL::Cdf.gaussian_Pinv(normal_order_statistic_medians(i,n))
77
+ }.to_vector(:scale)
78
+ vy=@data.sort.to_vector(:scale)
79
+ ds={'normal_order_statistics_medians'=>vx, 'ordered_response'=>vy}.to_dataset
80
+ graph = Statsample::Graph::SvgScatterplot.new(ds,options)
81
+ graph.set_x('normal_order_statistics_medians')
82
+ graph.parse
83
+ graph
84
+ end
85
+ end
86
+ end
87
+
88
+ # replaces all key and fill classes with similar ones, without opacity
89
+ # this allows rendering of svg and png on rox and gqview without problems
90
+ module SVG
91
+ module Graph
92
+ class BarNoOp < Bar
93
+ def get_css; SVG::Graph.get_css_standard; end
94
+ end
95
+ class BarHorizontalNoOp < BarHorizontal
96
+ def get_css; SVG::Graph.get_css_standard; end
97
+ end
98
+
99
+ class LineNoOp < Line
100
+ def get_css; SVG::Graph.get_css_standard; end
101
+
102
+ end
103
+ class PlotNoOp < Plot
104
+ def get_css; SVG::Graph.get_css_standard; end
105
+ end
106
+ class PieNoOp < Pie
107
+ def get_css; SVG::Graph.get_css_standard; end
108
+
109
+ end
110
+ class << self
111
+ def get_css_standard
112
+ return <<EOL
113
+ /* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
114
+ .key1,.fill1{
115
+ fill: #ff0000;
116
+ stroke: none;
117
+ stroke-width: 0.5px;
118
+ }
119
+ .key2,.fill2{
120
+ fill: #0000ff;
121
+ stroke: none;
122
+ stroke-width: 1px;
123
+ }
124
+ .key3,.fill3{
125
+ fill: #00ff00;
126
+ stroke: none;
127
+ stroke-width: 1px;
128
+ }
129
+ .key4,.fill4{
130
+ fill: #ffcc00;
131
+ stroke: none;
132
+ stroke-width: 1px;
133
+ }
134
+ .key5,.fill5{
135
+ fill: #00ccff;
136
+ stroke: none;
137
+ stroke-width: 1px;
138
+ }
139
+ .key6,.fill6{
140
+ fill: #ff00ff;
141
+ stroke: none;
142
+ stroke-width: 1px;
143
+ }
144
+ .key7,.fill7{
145
+ fill: #00ffff;
146
+ stroke: none;
147
+ stroke-width: 1px;
148
+ }
149
+ .key8,.fill8{
150
+ fill: #ffff00;
151
+ stroke: none;
152
+ stroke-width: 1px;
153
+ }
154
+ .key9,.fill9{
155
+ fill: #cc6666;
156
+ stroke: none;
157
+ stroke-width: 1px;
158
+ }
159
+ .key10,.fill10{
160
+ fill: #663399;
161
+ stroke: none;
162
+ stroke-width: 1px;
163
+ }
164
+ .key11,.fill11{
165
+ fill: #339900;
166
+ stroke: none;
167
+ stroke-width: 1px;
168
+ }
169
+ .key12,.fill12{
170
+ fill: #9966FF;
171
+ stroke: none;
172
+ stroke-width: 1px;
173
+ }
174
+ EOL
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ require 'statsample/graph/svgscatterplot'
181
+ require 'statsample/graph/svgboxplot'
@@ -0,0 +1,208 @@
1
+ module Statsample
2
+ module Graph
3
+ class SvgHistogram < SVG::Graph::BarBase
4
+ attr_accessor :inner_margin, :mean, :sigma, :show_normal
5
+ def initialize(config)
6
+ config[:fields]=[:dummy]
7
+ super(config)
8
+
9
+ @histogram=nil
10
+ end
11
+ include REXML
12
+
13
+ # In addition to the defaults set in Graph::initialize, sets
14
+ # [inner_margin] 14
15
+ # [key] false
16
+ def set_defaults
17
+ super
18
+ self.top_align = self.top_font = 0
19
+ init_with({
20
+ :inner_margin=>16,
21
+ :key=>false,
22
+ :show_normal=>false
23
+ })
24
+
25
+ end
26
+
27
+ def histogram=(h)
28
+ @histogram=h
29
+ @data=[{:data=>(0...@histogram.bins).to_a.collect {|i|
30
+ @histogram[i]
31
+ }}]
32
+ end
33
+ def get_x_labels
34
+ [""]
35
+ end
36
+
37
+ def get_y_labels
38
+ maxvalue = max_value
39
+ minvalue = min_value
40
+ range = maxvalue - minvalue
41
+
42
+ top_pad = range == 0 ? 10 : range / 20.0
43
+ scale_range = (maxvalue + top_pad) - minvalue
44
+
45
+ scale_division = scale_divisions || (scale_range / 10.0)
46
+
47
+ if scale_integers
48
+ scale_division = scale_division < 1 ? 1 : scale_division.round
49
+ end
50
+
51
+ rv = []
52
+ maxvalue = maxvalue%scale_division == 0 ?
53
+ maxvalue : maxvalue + scale_division
54
+ minvalue.step( maxvalue, scale_division ) {|v| rv << v}
55
+ return rv
56
+ end
57
+
58
+ def unit_width
59
+ (@graph_width-(@inner_margin*2)) / (@histogram.max-@histogram.min).to_f
60
+ end
61
+ def draw_x_label(v)
62
+ left = (v - @histogram.min)*unit_width
63
+ x=@inner_margin+left
64
+ text = @graph.add_element( "text" )
65
+ text.attributes["class"] = "xAxisLabels"
66
+ text.text = sprintf("%0.2f",v)
67
+ y = @graph_height + x_label_font_size + 3
68
+ text.attributes["x"] = x.to_s
69
+ text.attributes["y"] = y.to_s
70
+
71
+ end
72
+ def draw_x_labels
73
+ if show_x_labels
74
+ (0...@histogram.bins).each { |i|
75
+ value = @histogram[i]
76
+ range = @histogram.get_range(i)
77
+ draw_x_label(range[0])
78
+ if(i==(@histogram.bins)-1)
79
+ draw_x_label(range[1])
80
+ end
81
+ }
82
+ end
83
+ end
84
+ def draw_data
85
+ minvalue = min_value
86
+ fieldwidth = field_width
87
+
88
+ unit_size = (@graph_height.to_f - font_size*2*top_font) /
89
+ (get_y_labels.max - get_y_labels.min)
90
+ bottom = @graph_height
91
+ field_count = 0
92
+ hist_min=@histogram.min
93
+ hist_max=@histogram.max
94
+ range_hist=hist_max-hist_min
95
+ total=0
96
+ (0...@histogram.bins).each { |i|
97
+ dataset_count = 0
98
+ value = @histogram[i]
99
+ total=total+value
100
+ range = @histogram.get_range(i)
101
+ left = (range[0] - hist_min)*unit_width
102
+ bar_width = (range[1] - hist_min)*unit_width - left
103
+ length = (value.abs - (minvalue > 0 ? minvalue : 0)) * unit_size
104
+ # top is 0 if value is negative
105
+ top = bottom - (((value < 0 ? 0 : value) - minvalue) * unit_size)
106
+ @graph.add_element( "rect", {
107
+ "x" => (@inner_margin+left).to_s,
108
+ "y" => top.to_s,
109
+ "width" => bar_width.to_s,
110
+ "height" => length.to_s,
111
+ "class" => "fill#{dataset_count+1}"
112
+ })
113
+ make_datapoint_text(left + @inner_margin+ (bar_width/2), top - 6, value.to_s)
114
+ field_count += 1
115
+ }
116
+ if(show_normal)
117
+ divs=30
118
+ path=""
119
+ 0.upto(divs) {|i|
120
+ x_abs=hist_min+(range_hist/divs)*i
121
+ y=GSL::Ran::gaussian_pdf((x_abs-mean) / sigma)*total
122
+ xg=@inner_margin+((x_abs-hist_min)*unit_width)
123
+ yg=bottom-(y-minvalue)*unit_size
124
+ if i==0
125
+ path="M#{xg} #{yg} "
126
+ else
127
+ path+="L#{xg} #{yg} "
128
+ end
129
+ }
130
+ @graph.add_element("path", {
131
+ "d"=>path,
132
+ "style"=>"stroke:black;fill:none"
133
+ }
134
+ )
135
+ end
136
+ end
137
+
138
+
139
+ def get_css
140
+ return <<EOL
141
+ /* default fill styles for multiple datasets (probably only use a single dataset on this graph though) */
142
+
143
+ .key1,.fill1{
144
+ fill: #ff0000;
145
+ stroke: black;
146
+ stroke-width: 1px;
147
+ }
148
+ .key2,.fill2{
149
+ fill: #0000ff;
150
+ stroke: black;
151
+ stroke-width: 1px;
152
+ }
153
+ .key3,.fill3{
154
+ fill: #00ff00;
155
+ stroke: none;
156
+ stroke-width: 1px;
157
+ }
158
+ .key4,.fill4{
159
+ fill: #ffcc00;
160
+ stroke: none;
161
+ stroke-width: 1px;
162
+ }
163
+ .key5,.fill5{
164
+ fill: #00ccff;
165
+ stroke: none;
166
+ stroke-width: 1px;
167
+ }
168
+ .key6,.fill6{
169
+ fill: #ff00ff;
170
+ stroke: none;
171
+ stroke-width: 1px;
172
+ }
173
+ .key7,.fill7{
174
+ fill: #00ffff;
175
+ stroke: none;
176
+ stroke-width: 1px;
177
+ }
178
+ .key8,.fill8{
179
+ fill: #ffff00;
180
+ stroke: none;
181
+ stroke-width: 1px;
182
+ }
183
+ .key9,.fill9{
184
+ fill: #cc6666;
185
+ stroke: none;
186
+ stroke-width: 1px;
187
+ }
188
+ .key10,.fill10{
189
+ fill: #663399;
190
+ stroke: none;
191
+ stroke-width: 1px;
192
+ }
193
+ .key11,.fill11{
194
+ fill: #339900;
195
+ stroke: none;
196
+ stroke-width: 1px;
197
+ }
198
+ .key12,.fill12{
199
+ fill: #9966FF;
200
+ stroke: none;
201
+ stroke-width: 1px;
202
+ }
203
+ EOL
204
+ end
205
+
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,111 @@
1
+ module Statsample
2
+ module Graph
3
+ class SvgScatterplot < SVG::Graph::Plot
4
+ attr_accessor :draw_path
5
+ def initialize(ds,config={})
6
+ super(config)
7
+ @ds=ds
8
+ set_x(@ds.fields[0])
9
+ end
10
+ def set_defaults
11
+ super
12
+ init_with(
13
+ :show_data_values => false,
14
+ :draw_path => false
15
+ )
16
+ end
17
+ def set_x(x)
18
+ @x=x
19
+ @y=@ds.fields - [x]
20
+ end
21
+ def parse
22
+ data=@y.inject({}){|a,v| a[v]=[];a}
23
+ @ds.each{|row|
24
+ @y.each{|y|
25
+ data[y]+=[row[@x],row[y]] unless row[@x].nil? or row[y].nil?
26
+ }
27
+ }
28
+ data.each{|y,d|
29
+ add_data({
30
+ :data=>d, :title=>@ds.vector_label(y)
31
+ })
32
+ }
33
+ end
34
+ def get_x_labels
35
+ values=super
36
+ values.collect{|x|
37
+ if x.is_a? Integer
38
+ x
39
+ else
40
+ sprintf("%0.2f",x).to_f
41
+ end
42
+ }
43
+ end
44
+ def get_y_labels
45
+ values=super
46
+ values.collect{|x|
47
+ if x.is_a? Integer
48
+ x
49
+ else
50
+ sprintf("%0.2f",x).to_f
51
+ end
52
+ }
53
+ end
54
+ def draw_data
55
+ line = 1
56
+
57
+ x_min, x_max, x_div = x_range
58
+ y_min, y_max, y_div = y_range
59
+ x_step = (@graph_width.to_f - font_size*2) / (x_max-x_min)
60
+ y_step = (@graph_height.to_f - font_size*2) / (y_max-y_min)
61
+
62
+ for data in @data
63
+ x_points = data[:data][X]
64
+ y_points = data[:data][Y]
65
+
66
+ lpath = "L"
67
+ x_start = 0
68
+ y_start = 0
69
+ x_points.each_index { |idx|
70
+ x = (x_points[idx] - x_min) * x_step
71
+ y = @graph_height - (y_points[idx] - y_min) * y_step
72
+ x_start, y_start = x,y if idx == 0
73
+ lpath << "#{x} #{y} "
74
+ }
75
+
76
+ if area_fill
77
+ @graph.add_element( "path", {
78
+ "d" => "M#{x_start} #@graph_height #{lpath} V#@graph_height Z",
79
+ "class" => "fill#{line}"
80
+ })
81
+ end
82
+ if draw_path
83
+ @graph.add_element( "path", {
84
+ "d" => "M#{x_start} #{y_start} #{lpath}",
85
+ "class" => "line#{line}"
86
+ })
87
+ end
88
+ if show_data_points || show_data_values
89
+ x_points.each_index { |idx|
90
+ x = (x_points[idx] - x_min) * x_step
91
+ y = @graph_height - (y_points[idx] - y_min) * y_step
92
+ if show_data_points
93
+ @graph.add_element( "circle", {
94
+ "cx" => x.to_s,
95
+ "cy" => y.to_s,
96
+ "r" => "2.5",
97
+ "class" => "dataPoint#{line}"
98
+ })
99
+ add_popup(x, y, format( x_points[idx], y_points[idx] )) if add_popups
100
+ end
101
+ make_datapoint_text( x, y-6, y_points[idx] ) if show_data_values
102
+ }
103
+ end
104
+ line += 1
105
+ end
106
+ end
107
+
108
+
109
+ end
110
+ end
111
+ end