statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,11 @@
1
+ require 'statsample/graph/scatterplot'
2
+ require 'statsample/graph/boxplot'
3
+ require 'statsample/graph/histogram'
4
+ module Statsample
5
+ # Several Graph, based on Rubyvis
6
+ # * Statsample::Graph::Boxplot
7
+ # * Statsample::Graph::Histogram
8
+ # * Statsample::Graph::Scatterplot
9
+ module Graph
10
+ end
11
+ end
@@ -0,0 +1,236 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+ # = Boxplot
5
+ #
6
+ # From Wikipedia:
7
+ # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
8
+ #
9
+ # == Usage
10
+ # === Svg output
11
+ # a = Daru::Vector.new([1,2,3,4])
12
+ # b = Daru::Vector.new([3,4,5,6])
13
+ # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
14
+ # === Using ReportBuilder
15
+ # a = Daru::Vector.new([1,2,3,4])
16
+ # b = Daru::Vector.new([3,4,5,6])
17
+ # rb=ReportBuilder.new
18
+ # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
19
+ # rb.save_html('boxplot.html')
20
+
21
+ class Boxplot
22
+ include Summarizable
23
+ attr_accessor :name
24
+ # Total width of Boxplot
25
+ attr_accessor :width
26
+ # Total height of Boxplot
27
+ attr_accessor :height
28
+ # Top margin
29
+ attr_accessor :margin_top
30
+ # Bottom margin
31
+ attr_accessor :margin_bottom
32
+ # Left margin
33
+ attr_accessor :margin_left
34
+ # Right margin
35
+ attr_accessor :margin_right
36
+ # Array with assignation to groups of bars
37
+ # For example, for four vectors,
38
+ # boxplot.groups=[1,2,1,3]
39
+ # Assign same color to first and third element, and different to
40
+ # second and fourth
41
+ attr_accessor :groups
42
+ # Minimum value on y-axis. Automaticly defined from data
43
+ attr_accessor :minimum
44
+ # Maximum value on y-axis. Automaticly defined from data
45
+ attr_accessor :maximum
46
+ # Vectors to box-ploting
47
+ attr_accessor :vectors
48
+ # The rotation angle, in radians. Text is rotated clockwise relative
49
+ # to the anchor location. For example, with the default left alignment,
50
+ # an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.
51
+ attr_accessor :label_angle
52
+ attr_reader :x_scale, :y_scale
53
+ # Create a new Boxplot.
54
+ # Parameters: Hash of options
55
+ # * :vectors: Array of vectors
56
+ # * :groups: Array of same size as :vectors:, with name of groups
57
+ # to colorize vectors
58
+ def initialize(opts=Hash.new)
59
+ @vectors=opts.delete :vectors
60
+ raise "You should define vectors" if @vectors.nil?
61
+
62
+ opts_default={
63
+ :name=>_("Boxplot"),
64
+ :groups=>nil,
65
+ :width=>400,
66
+ :height=>300,
67
+ :margin_top=>10,
68
+ :margin_bottom=>20,
69
+ :margin_left=>20,
70
+ :margin_right=>20,
71
+ :minimum=>nil,
72
+ :maximum=>nil,
73
+ :label_angle=>0
74
+ }
75
+ @opts=opts_default.merge(opts)
76
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
77
+ end
78
+
79
+ # Returns a Rubyvis panel with scatterplot
80
+ def rubyvis_panel # :nodoc:
81
+ that=self
82
+
83
+ min,max=@minimum, @maximum
84
+
85
+ min||=@vectors.map {|v| v.min}.min
86
+ max||=@vectors.map {|v| v.max}.max
87
+
88
+ margin_hor=margin_left + margin_right
89
+ margin_vert=margin_top + margin_bottom
90
+ x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
91
+ y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
92
+ y_scale.nice
93
+ # cache data
94
+
95
+ colors=Rubyvis::Colors.category10
96
+
97
+ data=@vectors.map {|v|
98
+ out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
99
+ out[:iqr]=out[:percentil_75] - out[:percentil_25]
100
+
101
+ irq_max=out[:percentil_75] + out[:iqr]
102
+ irq_min=out[:percentil_25] - out[:iqr]
103
+
104
+ # Find the last data inside the margin
105
+ min = out[:percentil_25]
106
+ max = out[:percentil_75]
107
+
108
+ v.each {|d|
109
+ min=d if d < min and d > irq_min
110
+ max=d if d > max and d < irq_max
111
+ }
112
+ # Whiskers!
113
+ out[:low_whisker]=min
114
+ out[:high_whisker]=max
115
+ # And now, data outside whiskers
116
+ out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
117
+ out
118
+ }
119
+
120
+ vis=Rubyvis::Panel.new do |pan|
121
+ pan.width width - margin_hor
122
+ pan.height height - margin_vert
123
+ pan.bottom margin_bottom
124
+ pan.left margin_left
125
+ pan.right margin_right
126
+ pan.top margin_top
127
+ # Y axis
128
+ pan.rule do
129
+ data y_scale.ticks
130
+ bottom y_scale
131
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
132
+ label(:anchor=>'left') do
133
+ text y_scale.tick_format
134
+ end
135
+ end
136
+ pan.rule do
137
+ bottom 0
138
+ stroke_style 'black'
139
+ end
140
+
141
+ # Labels
142
+
143
+ pan.label do |l|
144
+ l.data data
145
+ l.text_angle that.label_angle
146
+ l.left {|v| x_scale[index] }
147
+ l.bottom(-15)
148
+ l.text {|v,x| v[:name]}
149
+ end
150
+
151
+ pan.panel do |bp|
152
+ bp.data data
153
+ bp.left {|v| x_scale[index]}
154
+ bp.width x_scale.range_band
155
+
156
+ # Bar
157
+ bp.bar do |b|
158
+ b.bottom {|v| y_scale[v[:percentil_25]]}
159
+ b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] }
160
+ b.line_width 1
161
+ b.stroke_style {|v|
162
+ if that.groups
163
+ colors.scale(that.groups[parent.index]).darker
164
+ else
165
+ colors.scale(index).darker
166
+ end
167
+ }
168
+ b.fill_style {|v|
169
+ if that.groups
170
+ colors.scale(that.groups[parent.index])
171
+ else
172
+ colors.scale(index)
173
+ end
174
+ }
175
+ end
176
+ # Median
177
+ bp.rule do |r|
178
+ r.bottom {|v| y_scale[v[:median]]}
179
+ r.width x_scale.range_band
180
+ r.line_width 2
181
+ end
182
+ ##
183
+ # Whiskeys
184
+ ##
185
+ # Low whiskey
186
+ bp.rule do |r|
187
+ r.visible {|v| v[:percentil_25] > v[:low_whisker]}
188
+ r.bottom {|v| y_scale[v[:low_whisker]]}
189
+ end
190
+
191
+ bp.rule do |r|
192
+ r.visible {|v| v[:percentil_25] > v[:low_whisker]}
193
+ r.bottom {|v| y_scale[v[:low_whisker]]}
194
+ r.left {|v| x_scale.range_band / 2.0}
195
+ r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])}
196
+ end
197
+ # High whiskey
198
+
199
+ bp.rule do |r|
200
+ r.visible {|v| v[:percentil_75] < v[:high_whisker]}
201
+ r.bottom {|v| y_scale.scale(v[:high_whisker])}
202
+ end
203
+
204
+ bp.rule do |r|
205
+ r.visible {|v| v[:percentil_75] < v[:high_whisker]}
206
+ r.bottom {|v| y_scale.scale(v[:percentil_75])}
207
+ r.left {|v| x_scale.range_band / 2.0}
208
+ r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])}
209
+ end
210
+ # Outliers
211
+ bp.dot do |dot|
212
+ dot.shape_size 4
213
+ dot.data {|v| v[:outliers]}
214
+ dot.left {|v| x_scale.range_band / 2.0}
215
+ dot.bottom {|v| y_scale.scale(v)}
216
+ dot.title {|v| v}
217
+ end
218
+ end
219
+ end
220
+ vis
221
+ end
222
+
223
+ # Returns SVG with scatterplot
224
+ def to_svg
225
+ rp=rubyvis_panel
226
+ rp.render
227
+ rp.to_svg
228
+ end
229
+ def report_building(builder) # :nodoc:
230
+ builder.section(:name=>name) do |b|
231
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end
@@ -0,0 +1,198 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+
5
+ # In statistics, a histogram is a graphical representation, showing a visual impression of the distribution of experimental data. It is an estimate of the probability distribution of a continuous variable and was first introduced by Karl Pearson [1]. A histogram consists of tabular frequencies, shown as adjacent rectangles, erected over discrete intervals (bins), with an area equal to the frequency of the observations in the interval. The height of a rectangle is also equal to the frequency density of the interval, i.e., the frequency divided by the width of the interval. The total area of the histogram is equal to the number of data.
6
+ #
7
+ # == Usage
8
+ # === Svg output
9
+ # a = Daru::Vector.new([1,2,3,4])
10
+ # puts Statsample::Graph::Histogram.new(a).to_svg
11
+ # === Using ReportBuilder
12
+ # a = Daru::Vector.new([1,2,3,4])
13
+ # rb=ReportBuilder.new
14
+ # rb.add(Statsample::Graph::Histogram.new(a))
15
+ # rb.save_html('histogram.html')
16
+
17
+ class Histogram
18
+ include Summarizable
19
+ # Histogram name
20
+ attr_accessor :name
21
+ # Total width
22
+ attr_accessor :width
23
+ # Total height
24
+ attr_accessor :height
25
+ # Top margin
26
+ attr_accessor :margin_top
27
+ # Bottom margin
28
+ attr_accessor :margin_bottom
29
+ # Left margin
30
+ attr_accessor :margin_left
31
+ # Right margin
32
+ attr_accessor :margin_right
33
+ attr_reader :hist
34
+ # Could be an array of ranges or number of bins
35
+ attr_accessor :bins
36
+ # Minimum value on x axis. Calculated automaticly from data if not set
37
+ attr_accessor :minimum_x
38
+ # Maximum value on x axis. Calculated automaticly from data if not set
39
+ attr_accessor :maximum_x
40
+ # Minimum value on y axis. Set to 0 if not set
41
+ attr_accessor :minimum_y
42
+ # Maximum value on y axis. Calculated automaticly from data if not set.
43
+ attr_accessor :maximum_y
44
+ # Add a line showing normal distribution
45
+ attr_accessor :line_normal_distribution
46
+ # data could be a vector or a histogram
47
+ def initialize(data, opts=Hash.new)
48
+ prov_name=(data.respond_to?(:name)) ? data.name : ""
49
+ opts_default={
50
+ :name=>_("Histograma (%s)") % prov_name,
51
+ :width=>400,
52
+ :height=>300,
53
+ :margin_top=>10,
54
+ :margin_bottom=>20,
55
+ :margin_left=>30,
56
+ :margin_right=>20,
57
+ :minimum_x=>nil,
58
+ :maximum_x=>nil,
59
+ :minimum_y=>nil,
60
+ :maximum_y=>nil,
61
+ :bins=>nil,
62
+ :line_normal_distribution=>false
63
+ }
64
+ @opts=opts_default.merge(opts)
65
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
66
+ @data=data
67
+ end
68
+ def pre_vis # :nodoc:
69
+ if @data.is_a? Statsample::Histogram
70
+ @hist=@data
71
+ @mean=@hist.estimated_mean
72
+ @sd=@hist.estimated_standard_deviation
73
+ elsif @data.is_a? Daru::Vector
74
+ @mean=@data.mean
75
+ @sd=@data.sd
76
+ @bins||=Math::sqrt(@data.size).floor
77
+ @hist=@data.histogram(@bins)
78
+ end
79
+ end
80
+ def rubyvis_normal_distribution(pan)
81
+ x_scale=@x_scale
82
+ y_scale=@y_scale
83
+
84
+ wob = @hist.get_range(0)[1] - @hist.get_range(0)[0]
85
+
86
+ nob = ((@maximum_x-@minimum_x) / wob.to_f).floor
87
+ sum=@hist.sum
88
+
89
+ data=nob.times.map {|i|
90
+ l=@minimum_x+i*wob
91
+ r=@minimum_x+(i+1)*wob
92
+ middle=(l+r) / 2.0
93
+ pi=Distribution::Normal.cdf((r-@mean) / @sd) - Distribution::Normal.cdf((l-@mean) / @sd)
94
+ {:x=>middle, :y=>pi*sum}
95
+ }
96
+ pan.line do |l|
97
+ l.data data
98
+ l.interpolate "cardinal"
99
+ l.stroke_style "black"
100
+ l.bottom {|d| y_scale[d[:y]]}
101
+ l.left {|d| x_scale[d[:x]]}
102
+ end
103
+
104
+ end
105
+ # Returns a Rubyvis panel with scatterplot
106
+ def rubyvis_panel # :nodoc:
107
+ pre_vis
108
+ #that=self
109
+
110
+ @minimum_x||=@hist.min
111
+ @maximum_x||=@hist.max
112
+ @minimum_y||=0
113
+ @maximum_y||=@hist.max_val
114
+
115
+ margin_hor=margin_left + margin_right
116
+ margin_vert=margin_top + margin_bottom
117
+
118
+ x_scale = pv.Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
119
+
120
+ y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
121
+
122
+ y_scale.nice
123
+
124
+ bins=@hist.bins.times.map {|i|
125
+ {
126
+ :low =>@hist.get_range(i)[0],
127
+ :high=>@hist.get_range(i)[1],
128
+ :value=>@hist.bin[i]
129
+ }
130
+ }
131
+ @x_scale=x_scale
132
+ @y_scale=y_scale
133
+ # cache data
134
+ vis=Rubyvis::Panel.new do |pan|
135
+ pan.width width - margin_hor
136
+ pan.height height - margin_vert
137
+ pan.bottom margin_bottom
138
+ pan.left margin_left
139
+ pan.right margin_right
140
+ pan.top margin_top
141
+ # Y axis
142
+ pan.rule do
143
+ data y_scale.ticks
144
+ bottom y_scale
145
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
146
+ label(:anchor=>'left') do
147
+ text y_scale.tick_format
148
+ end
149
+ end
150
+ # X axis
151
+ pan.rule do
152
+ data x_scale.ticks
153
+ left x_scale
154
+ stroke_style "black"
155
+ height 5
156
+ bottom(-5)
157
+ label(:anchor=>'bottom') do
158
+ text x_scale.tick_format
159
+ end
160
+ end
161
+
162
+ pan.bar do |bar|
163
+ bar.data(bins)
164
+ bar.left {|v| x_scale[v[:low]]}
165
+ bar.width {|v| x_scale[v[:high]] - x_scale[v[:low]]}
166
+ bar.bottom 0
167
+ bar.height {|v| y_scale[v[:value]]}
168
+ bar.stroke_style "black"
169
+ bar.line_width 1
170
+ end
171
+ rubyvis_normal_distribution(pan) if @line_normal_distribution
172
+ end
173
+ vis
174
+ end
175
+ # Returns SVG with scatterplot
176
+ def to_svg
177
+ rp=rubyvis_panel
178
+ rp.render
179
+ rp.to_svg
180
+ end
181
+ def report_building(builder) # :nodoc:
182
+ builder.section(:name=>name) do |b|
183
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
184
+ end
185
+ end
186
+ def report_building_text(generator)
187
+ pre_vis
188
+ #anchor=generator.toc_entry(_("Histogram %s") % [@name])
189
+ step= @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
190
+
191
+ @hist.range.each_with_index do |r,i|
192
+ next if i==@hist.bins
193
+ generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
194
+ end
195
+ end
196
+ end
197
+ end
198
+ end
@@ -0,0 +1,213 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+ # = Scatterplot
5
+ #
6
+ # From Wikipedia:
7
+ # A scatter plot or scattergraph is a type of mathematical diagram using
8
+ # Cartesian coordinates to display values for two variables for a set of data.
9
+ #
10
+ # The data is displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis.[2] This kind of plot is also called a scatter chart, scatter diagram and scatter graph.
11
+ # == Usage
12
+ # === Svg output
13
+ # a = Daru::Vector.new([1,2,3,4])
14
+ # b = Daru::Vector.new([3,4,5,6])
15
+ # puts Statsample::Graph::Scatterplot.new(a,b).to_svg
16
+ # === Using ReportBuilder
17
+ # a = Daru::Vector.new([1,2,3,4])
18
+ # b = Daru::Vector.new([3,4,5,6])
19
+ # rb=ReportBuilder.new
20
+ # rb.add(Statsample::Graph::Scatterplot.new(a,b))
21
+ # rb.save_html('scatter.html')
22
+
23
+ class Scatterplot
24
+ include Summarizable
25
+ attr_accessor :name
26
+ # Total width of Scatterplot
27
+ attr_accessor :width
28
+ # Total height of Scatterplot
29
+ attr_accessor :height
30
+ attr_accessor :dot_alpha
31
+ # Add a line on median of x and y axis
32
+ attr_accessor :line_median
33
+ # Top margin
34
+ attr_accessor :margin_top
35
+ # Bottom margin
36
+ attr_accessor :margin_bottom
37
+ # Left margin
38
+ attr_accessor :margin_left
39
+ # Right margin
40
+ attr_accessor :margin_right
41
+
42
+ attr_reader :data
43
+ attr_reader :v1,:v2
44
+
45
+ # Array with assignation to groups of bars
46
+ # For example, for four vectors,
47
+ # boxplot.groups=[1,2,1,3]
48
+ # Assign same color to first and third element, and different to
49
+ # second and fourth
50
+ attr_accessor :groups
51
+
52
+
53
+ attr_reader :x_scale, :y_scale
54
+ # Minimum value on x axis. Calculated automaticly from data if not set
55
+ attr_accessor :minimum_x
56
+ # Maximum value on x axis. Calculated automaticly from data if not set
57
+ attr_accessor :maximum_x
58
+ # Minimum value on y axis. Set to 0 if not set
59
+ attr_accessor :minimum_y
60
+ # Maximum value on y axis. Calculated automaticly from data if not set.
61
+ attr_accessor :maximum_y
62
+
63
+ # Create a new Scatterplot.
64
+ # Params:
65
+ # * v1: Vector on X axis
66
+ # * v2: Vector on Y axis
67
+ # * opts: Hash of options. See attributes of Scatterplot
68
+ def initialize(v1,v2,opts=Hash.new)
69
+ @v1_name,@v2_name = v1.name,v2.name
70
+ @v1,@v2 = Statsample.only_valid_clone(v1,v2)
71
+ opts_default={
72
+ :name=>_("Scatterplot (%s - %s)") % [@v1_name, @v2_name],
73
+ :width=>400,
74
+ :height=>300,
75
+ :dot_alpha=>0.5,
76
+ :line_median=>false,
77
+ :margin_top=>10,
78
+ :margin_bottom=>20,
79
+ :margin_left=>20,
80
+ :margin_right=>20,
81
+ :minimum_x=>nil,
82
+ :maximum_x=>nil,
83
+ :minimum_y=>nil,
84
+ :maximum_y=>nil,
85
+ :groups=>nil
86
+ }
87
+ @opts=opts_default.merge(opts)
88
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
89
+ @data=[]
90
+ @v1.each_with_index {|d1,i|
91
+ @data.push({:x=>d1, :y=>@v2[i]})
92
+ }
93
+ end
94
+ # Add a rule on median of X and Y axis
95
+ def add_line_median(vis) # :nodoc:
96
+ that=self
97
+ x=@x_scale
98
+ y=@y_scale
99
+ vis.execute {
100
+ rule do
101
+ data [that.v1.median]
102
+ left x
103
+ stroke_style Rubyvis.color("#933").alpha(0.5)
104
+ label(:anchor=>"top") do
105
+ text x.tick_format
106
+ end
107
+ end
108
+ rule do
109
+ data [that.v2.median]
110
+ bottom y
111
+ stroke_style Rubyvis.color("#933").alpha(0.5)
112
+ label(:anchor=>"right") do
113
+ text y.tick_format
114
+ end
115
+ end
116
+ }
117
+
118
+ end
119
+ # Returns a Rubyvis panel with scatterplot
120
+ def rubyvis_panel # :nodoc:
121
+ that=self
122
+ #p @v1.map {|v| v}
123
+
124
+ @minimum_x||=@v1.min
125
+ @maximum_x||=@v1.max
126
+ @minimum_y||=@v2.min
127
+ @maximum_y||=@v2.max
128
+
129
+ colors=Rubyvis::Colors.category10
130
+
131
+ margin_hor=margin_left + margin_right
132
+ margin_vert=margin_top + margin_bottom
133
+
134
+ x=Rubyvis::Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
135
+ y=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
136
+ @x_scale=x
137
+ @y_scale=y
138
+ vis=Rubyvis::Panel.new do |pan|
139
+ pan.width width - margin_hor
140
+ pan.height height - margin_vert
141
+ pan.bottom margin_bottom
142
+ pan.left margin_left
143
+ pan.right margin_right
144
+ pan.top margin_top
145
+ # X axis
146
+ pan.rule do
147
+ data y.ticks
148
+ bottom y
149
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
150
+ label(:anchor=>'left') do
151
+ visible {|d| d!=0 and d < that.width}
152
+ text y.tick_format
153
+ end
154
+ end
155
+
156
+ # Y axis
157
+ pan.rule do
158
+ data x.ticks
159
+ left x
160
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
161
+ label(:anchor=>'bottom') do
162
+ visible {|d| d>0 and d < that.height}
163
+ text x.tick_format
164
+ end
165
+ end
166
+ # Add lines on median
167
+ add_line_median(pan) if line_median
168
+
169
+ pan.panel do
170
+ data(that.data)
171
+ dot do
172
+ left {|d| x[d[:x]]}
173
+ bottom {|d| y[d[:y]]}
174
+
175
+ fill_style {|v|
176
+ alpha=(that.dot_alpha-0.3<=0) ? 0.1 : that.dot_alpha-0.3
177
+ if that.groups
178
+
179
+ colors.scale(that.groups[index]).alpha(alpha)
180
+ else
181
+ colors.scale(0).alpha(alpha)
182
+ end
183
+ }
184
+
185
+ stroke_style {|v|
186
+ if that.groups
187
+ colors.scale(that.groups[parent.index]).alpha(that.dot_alpha)
188
+ else
189
+ colors.scale(0).alpha(that.dot_alpha)
190
+ end
191
+ }
192
+ shape_radius 2
193
+ end
194
+ end
195
+ end
196
+ vis
197
+ end
198
+
199
+ # Returns SVG with scatterplot
200
+ def to_svg
201
+ rp = rubyvis_panel
202
+ rp.render
203
+ rp.to_svg
204
+ end
205
+
206
+ def report_building(builder) # :nodoc:
207
+ builder.section(:name=>name) do |b|
208
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
209
+ end
210
+ end
211
+ end
212
+ end
213
+ end