statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'statsample/graph/scatterplot'
|
2
|
+
require 'statsample/graph/boxplot'
|
3
|
+
require 'statsample/graph/histogram'
|
4
|
+
module Statsample
|
5
|
+
# Several Graph, based on Rubyvis
|
6
|
+
# * Statsample::Graph::Boxplot
|
7
|
+
# * Statsample::Graph::Histogram
|
8
|
+
# * Statsample::Graph::Scatterplot
|
9
|
+
module Graph
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
module Statsample
|
3
|
+
module Graph
|
4
|
+
# = Boxplot
|
5
|
+
#
|
6
|
+
# From Wikipedia:
|
7
|
+
# In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
|
8
|
+
#
|
9
|
+
# == Usage
|
10
|
+
# === Svg output
|
11
|
+
# a = Daru::Vector.new([1,2,3,4])
|
12
|
+
# b = Daru::Vector.new([3,4,5,6])
|
13
|
+
# puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
|
14
|
+
# === Using ReportBuilder
|
15
|
+
# a = Daru::Vector.new([1,2,3,4])
|
16
|
+
# b = Daru::Vector.new([3,4,5,6])
|
17
|
+
# rb=ReportBuilder.new
|
18
|
+
# rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
|
19
|
+
# rb.save_html('boxplot.html')
|
20
|
+
|
21
|
+
class Boxplot
|
22
|
+
include Summarizable
|
23
|
+
attr_accessor :name
|
24
|
+
# Total width of Boxplot
|
25
|
+
attr_accessor :width
|
26
|
+
# Total height of Boxplot
|
27
|
+
attr_accessor :height
|
28
|
+
# Top margin
|
29
|
+
attr_accessor :margin_top
|
30
|
+
# Bottom margin
|
31
|
+
attr_accessor :margin_bottom
|
32
|
+
# Left margin
|
33
|
+
attr_accessor :margin_left
|
34
|
+
# Right margin
|
35
|
+
attr_accessor :margin_right
|
36
|
+
# Array with assignation to groups of bars
|
37
|
+
# For example, for four vectors,
|
38
|
+
# boxplot.groups=[1,2,1,3]
|
39
|
+
# Assign same color to first and third element, and different to
|
40
|
+
# second and fourth
|
41
|
+
attr_accessor :groups
|
42
|
+
# Minimum value on y-axis. Automaticly defined from data
|
43
|
+
attr_accessor :minimum
|
44
|
+
# Maximum value on y-axis. Automaticly defined from data
|
45
|
+
attr_accessor :maximum
|
46
|
+
# Vectors to box-ploting
|
47
|
+
attr_accessor :vectors
|
48
|
+
# The rotation angle, in radians. Text is rotated clockwise relative
|
49
|
+
# to the anchor location. For example, with the default left alignment,
|
50
|
+
# an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.
|
51
|
+
attr_accessor :label_angle
|
52
|
+
attr_reader :x_scale, :y_scale
|
53
|
+
# Create a new Boxplot.
|
54
|
+
# Parameters: Hash of options
|
55
|
+
# * :vectors: Array of vectors
|
56
|
+
# * :groups: Array of same size as :vectors:, with name of groups
|
57
|
+
# to colorize vectors
|
58
|
+
def initialize(opts=Hash.new)
|
59
|
+
@vectors=opts.delete :vectors
|
60
|
+
raise "You should define vectors" if @vectors.nil?
|
61
|
+
|
62
|
+
opts_default={
|
63
|
+
:name=>_("Boxplot"),
|
64
|
+
:groups=>nil,
|
65
|
+
:width=>400,
|
66
|
+
:height=>300,
|
67
|
+
:margin_top=>10,
|
68
|
+
:margin_bottom=>20,
|
69
|
+
:margin_left=>20,
|
70
|
+
:margin_right=>20,
|
71
|
+
:minimum=>nil,
|
72
|
+
:maximum=>nil,
|
73
|
+
:label_angle=>0
|
74
|
+
}
|
75
|
+
@opts=opts_default.merge(opts)
|
76
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
77
|
+
end
|
78
|
+
|
79
|
+
# Returns a Rubyvis panel with scatterplot
|
80
|
+
def rubyvis_panel # :nodoc:
|
81
|
+
that=self
|
82
|
+
|
83
|
+
min,max=@minimum, @maximum
|
84
|
+
|
85
|
+
min||=@vectors.map {|v| v.min}.min
|
86
|
+
max||=@vectors.map {|v| v.max}.max
|
87
|
+
|
88
|
+
margin_hor=margin_left + margin_right
|
89
|
+
margin_vert=margin_top + margin_bottom
|
90
|
+
x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
|
91
|
+
y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
|
92
|
+
y_scale.nice
|
93
|
+
# cache data
|
94
|
+
|
95
|
+
colors=Rubyvis::Colors.category10
|
96
|
+
|
97
|
+
data=@vectors.map {|v|
|
98
|
+
out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
|
99
|
+
out[:iqr]=out[:percentil_75] - out[:percentil_25]
|
100
|
+
|
101
|
+
irq_max=out[:percentil_75] + out[:iqr]
|
102
|
+
irq_min=out[:percentil_25] - out[:iqr]
|
103
|
+
|
104
|
+
# Find the last data inside the margin
|
105
|
+
min = out[:percentil_25]
|
106
|
+
max = out[:percentil_75]
|
107
|
+
|
108
|
+
v.each {|d|
|
109
|
+
min=d if d < min and d > irq_min
|
110
|
+
max=d if d > max and d < irq_max
|
111
|
+
}
|
112
|
+
# Whiskers!
|
113
|
+
out[:low_whisker]=min
|
114
|
+
out[:high_whisker]=max
|
115
|
+
# And now, data outside whiskers
|
116
|
+
out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
|
117
|
+
out
|
118
|
+
}
|
119
|
+
|
120
|
+
vis=Rubyvis::Panel.new do |pan|
|
121
|
+
pan.width width - margin_hor
|
122
|
+
pan.height height - margin_vert
|
123
|
+
pan.bottom margin_bottom
|
124
|
+
pan.left margin_left
|
125
|
+
pan.right margin_right
|
126
|
+
pan.top margin_top
|
127
|
+
# Y axis
|
128
|
+
pan.rule do
|
129
|
+
data y_scale.ticks
|
130
|
+
bottom y_scale
|
131
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
132
|
+
label(:anchor=>'left') do
|
133
|
+
text y_scale.tick_format
|
134
|
+
end
|
135
|
+
end
|
136
|
+
pan.rule do
|
137
|
+
bottom 0
|
138
|
+
stroke_style 'black'
|
139
|
+
end
|
140
|
+
|
141
|
+
# Labels
|
142
|
+
|
143
|
+
pan.label do |l|
|
144
|
+
l.data data
|
145
|
+
l.text_angle that.label_angle
|
146
|
+
l.left {|v| x_scale[index] }
|
147
|
+
l.bottom(-15)
|
148
|
+
l.text {|v,x| v[:name]}
|
149
|
+
end
|
150
|
+
|
151
|
+
pan.panel do |bp|
|
152
|
+
bp.data data
|
153
|
+
bp.left {|v| x_scale[index]}
|
154
|
+
bp.width x_scale.range_band
|
155
|
+
|
156
|
+
# Bar
|
157
|
+
bp.bar do |b|
|
158
|
+
b.bottom {|v| y_scale[v[:percentil_25]]}
|
159
|
+
b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] }
|
160
|
+
b.line_width 1
|
161
|
+
b.stroke_style {|v|
|
162
|
+
if that.groups
|
163
|
+
colors.scale(that.groups[parent.index]).darker
|
164
|
+
else
|
165
|
+
colors.scale(index).darker
|
166
|
+
end
|
167
|
+
}
|
168
|
+
b.fill_style {|v|
|
169
|
+
if that.groups
|
170
|
+
colors.scale(that.groups[parent.index])
|
171
|
+
else
|
172
|
+
colors.scale(index)
|
173
|
+
end
|
174
|
+
}
|
175
|
+
end
|
176
|
+
# Median
|
177
|
+
bp.rule do |r|
|
178
|
+
r.bottom {|v| y_scale[v[:median]]}
|
179
|
+
r.width x_scale.range_band
|
180
|
+
r.line_width 2
|
181
|
+
end
|
182
|
+
##
|
183
|
+
# Whiskeys
|
184
|
+
##
|
185
|
+
# Low whiskey
|
186
|
+
bp.rule do |r|
|
187
|
+
r.visible {|v| v[:percentil_25] > v[:low_whisker]}
|
188
|
+
r.bottom {|v| y_scale[v[:low_whisker]]}
|
189
|
+
end
|
190
|
+
|
191
|
+
bp.rule do |r|
|
192
|
+
r.visible {|v| v[:percentil_25] > v[:low_whisker]}
|
193
|
+
r.bottom {|v| y_scale[v[:low_whisker]]}
|
194
|
+
r.left {|v| x_scale.range_band / 2.0}
|
195
|
+
r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])}
|
196
|
+
end
|
197
|
+
# High whiskey
|
198
|
+
|
199
|
+
bp.rule do |r|
|
200
|
+
r.visible {|v| v[:percentil_75] < v[:high_whisker]}
|
201
|
+
r.bottom {|v| y_scale.scale(v[:high_whisker])}
|
202
|
+
end
|
203
|
+
|
204
|
+
bp.rule do |r|
|
205
|
+
r.visible {|v| v[:percentil_75] < v[:high_whisker]}
|
206
|
+
r.bottom {|v| y_scale.scale(v[:percentil_75])}
|
207
|
+
r.left {|v| x_scale.range_band / 2.0}
|
208
|
+
r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])}
|
209
|
+
end
|
210
|
+
# Outliers
|
211
|
+
bp.dot do |dot|
|
212
|
+
dot.shape_size 4
|
213
|
+
dot.data {|v| v[:outliers]}
|
214
|
+
dot.left {|v| x_scale.range_band / 2.0}
|
215
|
+
dot.bottom {|v| y_scale.scale(v)}
|
216
|
+
dot.title {|v| v}
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
vis
|
221
|
+
end
|
222
|
+
|
223
|
+
# Returns SVG with scatterplot
|
224
|
+
def to_svg
|
225
|
+
rp=rubyvis_panel
|
226
|
+
rp.render
|
227
|
+
rp.to_svg
|
228
|
+
end
|
229
|
+
def report_building(builder) # :nodoc:
|
230
|
+
builder.section(:name=>name) do |b|
|
231
|
+
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
@@ -0,0 +1,198 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
module Statsample
|
3
|
+
module Graph
|
4
|
+
|
5
|
+
# In statistics, a histogram is a graphical representation, showing a visual impression of the distribution of experimental data. It is an estimate of the probability distribution of a continuous variable and was first introduced by Karl Pearson [1]. A histogram consists of tabular frequencies, shown as adjacent rectangles, erected over discrete intervals (bins), with an area equal to the frequency of the observations in the interval. The height of a rectangle is also equal to the frequency density of the interval, i.e., the frequency divided by the width of the interval. The total area of the histogram is equal to the number of data.
|
6
|
+
#
|
7
|
+
# == Usage
|
8
|
+
# === Svg output
|
9
|
+
# a = Daru::Vector.new([1,2,3,4])
|
10
|
+
# puts Statsample::Graph::Histogram.new(a).to_svg
|
11
|
+
# === Using ReportBuilder
|
12
|
+
# a = Daru::Vector.new([1,2,3,4])
|
13
|
+
# rb=ReportBuilder.new
|
14
|
+
# rb.add(Statsample::Graph::Histogram.new(a))
|
15
|
+
# rb.save_html('histogram.html')
|
16
|
+
|
17
|
+
class Histogram
|
18
|
+
include Summarizable
|
19
|
+
# Histogram name
|
20
|
+
attr_accessor :name
|
21
|
+
# Total width
|
22
|
+
attr_accessor :width
|
23
|
+
# Total height
|
24
|
+
attr_accessor :height
|
25
|
+
# Top margin
|
26
|
+
attr_accessor :margin_top
|
27
|
+
# Bottom margin
|
28
|
+
attr_accessor :margin_bottom
|
29
|
+
# Left margin
|
30
|
+
attr_accessor :margin_left
|
31
|
+
# Right margin
|
32
|
+
attr_accessor :margin_right
|
33
|
+
attr_reader :hist
|
34
|
+
# Could be an array of ranges or number of bins
|
35
|
+
attr_accessor :bins
|
36
|
+
# Minimum value on x axis. Calculated automaticly from data if not set
|
37
|
+
attr_accessor :minimum_x
|
38
|
+
# Maximum value on x axis. Calculated automaticly from data if not set
|
39
|
+
attr_accessor :maximum_x
|
40
|
+
# Minimum value on y axis. Set to 0 if not set
|
41
|
+
attr_accessor :minimum_y
|
42
|
+
# Maximum value on y axis. Calculated automaticly from data if not set.
|
43
|
+
attr_accessor :maximum_y
|
44
|
+
# Add a line showing normal distribution
|
45
|
+
attr_accessor :line_normal_distribution
|
46
|
+
# data could be a vector or a histogram
|
47
|
+
def initialize(data, opts=Hash.new)
|
48
|
+
prov_name=(data.respond_to?(:name)) ? data.name : ""
|
49
|
+
opts_default={
|
50
|
+
:name=>_("Histograma (%s)") % prov_name,
|
51
|
+
:width=>400,
|
52
|
+
:height=>300,
|
53
|
+
:margin_top=>10,
|
54
|
+
:margin_bottom=>20,
|
55
|
+
:margin_left=>30,
|
56
|
+
:margin_right=>20,
|
57
|
+
:minimum_x=>nil,
|
58
|
+
:maximum_x=>nil,
|
59
|
+
:minimum_y=>nil,
|
60
|
+
:maximum_y=>nil,
|
61
|
+
:bins=>nil,
|
62
|
+
:line_normal_distribution=>false
|
63
|
+
}
|
64
|
+
@opts=opts_default.merge(opts)
|
65
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
66
|
+
@data=data
|
67
|
+
end
|
68
|
+
def pre_vis # :nodoc:
|
69
|
+
if @data.is_a? Statsample::Histogram
|
70
|
+
@hist=@data
|
71
|
+
@mean=@hist.estimated_mean
|
72
|
+
@sd=@hist.estimated_standard_deviation
|
73
|
+
elsif @data.is_a? Daru::Vector
|
74
|
+
@mean=@data.mean
|
75
|
+
@sd=@data.sd
|
76
|
+
@bins||=Math::sqrt(@data.size).floor
|
77
|
+
@hist=@data.histogram(@bins)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
def rubyvis_normal_distribution(pan)
|
81
|
+
x_scale=@x_scale
|
82
|
+
y_scale=@y_scale
|
83
|
+
|
84
|
+
wob = @hist.get_range(0)[1] - @hist.get_range(0)[0]
|
85
|
+
|
86
|
+
nob = ((@maximum_x-@minimum_x) / wob.to_f).floor
|
87
|
+
sum=@hist.sum
|
88
|
+
|
89
|
+
data=nob.times.map {|i|
|
90
|
+
l=@minimum_x+i*wob
|
91
|
+
r=@minimum_x+(i+1)*wob
|
92
|
+
middle=(l+r) / 2.0
|
93
|
+
pi=Distribution::Normal.cdf((r-@mean) / @sd) - Distribution::Normal.cdf((l-@mean) / @sd)
|
94
|
+
{:x=>middle, :y=>pi*sum}
|
95
|
+
}
|
96
|
+
pan.line do |l|
|
97
|
+
l.data data
|
98
|
+
l.interpolate "cardinal"
|
99
|
+
l.stroke_style "black"
|
100
|
+
l.bottom {|d| y_scale[d[:y]]}
|
101
|
+
l.left {|d| x_scale[d[:x]]}
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
# Returns a Rubyvis panel with scatterplot
|
106
|
+
def rubyvis_panel # :nodoc:
|
107
|
+
pre_vis
|
108
|
+
#that=self
|
109
|
+
|
110
|
+
@minimum_x||=@hist.min
|
111
|
+
@maximum_x||=@hist.max
|
112
|
+
@minimum_y||=0
|
113
|
+
@maximum_y||=@hist.max_val
|
114
|
+
|
115
|
+
margin_hor=margin_left + margin_right
|
116
|
+
margin_vert=margin_top + margin_bottom
|
117
|
+
|
118
|
+
x_scale = pv.Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
|
119
|
+
|
120
|
+
y_scale=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
|
121
|
+
|
122
|
+
y_scale.nice
|
123
|
+
|
124
|
+
bins=@hist.bins.times.map {|i|
|
125
|
+
{
|
126
|
+
:low =>@hist.get_range(i)[0],
|
127
|
+
:high=>@hist.get_range(i)[1],
|
128
|
+
:value=>@hist.bin[i]
|
129
|
+
}
|
130
|
+
}
|
131
|
+
@x_scale=x_scale
|
132
|
+
@y_scale=y_scale
|
133
|
+
# cache data
|
134
|
+
vis=Rubyvis::Panel.new do |pan|
|
135
|
+
pan.width width - margin_hor
|
136
|
+
pan.height height - margin_vert
|
137
|
+
pan.bottom margin_bottom
|
138
|
+
pan.left margin_left
|
139
|
+
pan.right margin_right
|
140
|
+
pan.top margin_top
|
141
|
+
# Y axis
|
142
|
+
pan.rule do
|
143
|
+
data y_scale.ticks
|
144
|
+
bottom y_scale
|
145
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
146
|
+
label(:anchor=>'left') do
|
147
|
+
text y_scale.tick_format
|
148
|
+
end
|
149
|
+
end
|
150
|
+
# X axis
|
151
|
+
pan.rule do
|
152
|
+
data x_scale.ticks
|
153
|
+
left x_scale
|
154
|
+
stroke_style "black"
|
155
|
+
height 5
|
156
|
+
bottom(-5)
|
157
|
+
label(:anchor=>'bottom') do
|
158
|
+
text x_scale.tick_format
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
pan.bar do |bar|
|
163
|
+
bar.data(bins)
|
164
|
+
bar.left {|v| x_scale[v[:low]]}
|
165
|
+
bar.width {|v| x_scale[v[:high]] - x_scale[v[:low]]}
|
166
|
+
bar.bottom 0
|
167
|
+
bar.height {|v| y_scale[v[:value]]}
|
168
|
+
bar.stroke_style "black"
|
169
|
+
bar.line_width 1
|
170
|
+
end
|
171
|
+
rubyvis_normal_distribution(pan) if @line_normal_distribution
|
172
|
+
end
|
173
|
+
vis
|
174
|
+
end
|
175
|
+
# Returns SVG with scatterplot
|
176
|
+
def to_svg
|
177
|
+
rp=rubyvis_panel
|
178
|
+
rp.render
|
179
|
+
rp.to_svg
|
180
|
+
end
|
181
|
+
def report_building(builder) # :nodoc:
|
182
|
+
builder.section(:name=>name) do |b|
|
183
|
+
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
def report_building_text(generator)
|
187
|
+
pre_vis
|
188
|
+
#anchor=generator.toc_entry(_("Histogram %s") % [@name])
|
189
|
+
step= @hist.max_val > 40 ? ( @hist.max_val / 40).ceil : 1
|
190
|
+
|
191
|
+
@hist.range.each_with_index do |r,i|
|
192
|
+
next if i==@hist.bins
|
193
|
+
generator.text(sprintf("%5.2f : %s", r, "*" * (@hist.bin[i] / step).floor ))
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
@@ -0,0 +1,213 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
module Statsample
|
3
|
+
module Graph
|
4
|
+
# = Scatterplot
|
5
|
+
#
|
6
|
+
# From Wikipedia:
|
7
|
+
# A scatter plot or scattergraph is a type of mathematical diagram using
|
8
|
+
# Cartesian coordinates to display values for two variables for a set of data.
|
9
|
+
#
|
10
|
+
# The data is displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis.[2] This kind of plot is also called a scatter chart, scatter diagram and scatter graph.
|
11
|
+
# == Usage
|
12
|
+
# === Svg output
|
13
|
+
# a = Daru::Vector.new([1,2,3,4])
|
14
|
+
# b = Daru::Vector.new([3,4,5,6])
|
15
|
+
# puts Statsample::Graph::Scatterplot.new(a,b).to_svg
|
16
|
+
# === Using ReportBuilder
|
17
|
+
# a = Daru::Vector.new([1,2,3,4])
|
18
|
+
# b = Daru::Vector.new([3,4,5,6])
|
19
|
+
# rb=ReportBuilder.new
|
20
|
+
# rb.add(Statsample::Graph::Scatterplot.new(a,b))
|
21
|
+
# rb.save_html('scatter.html')
|
22
|
+
|
23
|
+
class Scatterplot
|
24
|
+
include Summarizable
|
25
|
+
attr_accessor :name
|
26
|
+
# Total width of Scatterplot
|
27
|
+
attr_accessor :width
|
28
|
+
# Total height of Scatterplot
|
29
|
+
attr_accessor :height
|
30
|
+
attr_accessor :dot_alpha
|
31
|
+
# Add a line on median of x and y axis
|
32
|
+
attr_accessor :line_median
|
33
|
+
# Top margin
|
34
|
+
attr_accessor :margin_top
|
35
|
+
# Bottom margin
|
36
|
+
attr_accessor :margin_bottom
|
37
|
+
# Left margin
|
38
|
+
attr_accessor :margin_left
|
39
|
+
# Right margin
|
40
|
+
attr_accessor :margin_right
|
41
|
+
|
42
|
+
attr_reader :data
|
43
|
+
attr_reader :v1,:v2
|
44
|
+
|
45
|
+
# Array with assignation to groups of bars
|
46
|
+
# For example, for four vectors,
|
47
|
+
# boxplot.groups=[1,2,1,3]
|
48
|
+
# Assign same color to first and third element, and different to
|
49
|
+
# second and fourth
|
50
|
+
attr_accessor :groups
|
51
|
+
|
52
|
+
|
53
|
+
attr_reader :x_scale, :y_scale
|
54
|
+
# Minimum value on x axis. Calculated automaticly from data if not set
|
55
|
+
attr_accessor :minimum_x
|
56
|
+
# Maximum value on x axis. Calculated automaticly from data if not set
|
57
|
+
attr_accessor :maximum_x
|
58
|
+
# Minimum value on y axis. Set to 0 if not set
|
59
|
+
attr_accessor :minimum_y
|
60
|
+
# Maximum value on y axis. Calculated automaticly from data if not set.
|
61
|
+
attr_accessor :maximum_y
|
62
|
+
|
63
|
+
# Create a new Scatterplot.
|
64
|
+
# Params:
|
65
|
+
# * v1: Vector on X axis
|
66
|
+
# * v2: Vector on Y axis
|
67
|
+
# * opts: Hash of options. See attributes of Scatterplot
|
68
|
+
def initialize(v1,v2,opts=Hash.new)
|
69
|
+
@v1_name,@v2_name = v1.name,v2.name
|
70
|
+
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
|
71
|
+
opts_default={
|
72
|
+
:name=>_("Scatterplot (%s - %s)") % [@v1_name, @v2_name],
|
73
|
+
:width=>400,
|
74
|
+
:height=>300,
|
75
|
+
:dot_alpha=>0.5,
|
76
|
+
:line_median=>false,
|
77
|
+
:margin_top=>10,
|
78
|
+
:margin_bottom=>20,
|
79
|
+
:margin_left=>20,
|
80
|
+
:margin_right=>20,
|
81
|
+
:minimum_x=>nil,
|
82
|
+
:maximum_x=>nil,
|
83
|
+
:minimum_y=>nil,
|
84
|
+
:maximum_y=>nil,
|
85
|
+
:groups=>nil
|
86
|
+
}
|
87
|
+
@opts=opts_default.merge(opts)
|
88
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
89
|
+
@data=[]
|
90
|
+
@v1.each_with_index {|d1,i|
|
91
|
+
@data.push({:x=>d1, :y=>@v2[i]})
|
92
|
+
}
|
93
|
+
end
|
94
|
+
# Add a rule on median of X and Y axis
|
95
|
+
def add_line_median(vis) # :nodoc:
|
96
|
+
that=self
|
97
|
+
x=@x_scale
|
98
|
+
y=@y_scale
|
99
|
+
vis.execute {
|
100
|
+
rule do
|
101
|
+
data [that.v1.median]
|
102
|
+
left x
|
103
|
+
stroke_style Rubyvis.color("#933").alpha(0.5)
|
104
|
+
label(:anchor=>"top") do
|
105
|
+
text x.tick_format
|
106
|
+
end
|
107
|
+
end
|
108
|
+
rule do
|
109
|
+
data [that.v2.median]
|
110
|
+
bottom y
|
111
|
+
stroke_style Rubyvis.color("#933").alpha(0.5)
|
112
|
+
label(:anchor=>"right") do
|
113
|
+
text y.tick_format
|
114
|
+
end
|
115
|
+
end
|
116
|
+
}
|
117
|
+
|
118
|
+
end
|
119
|
+
# Returns a Rubyvis panel with scatterplot
|
120
|
+
def rubyvis_panel # :nodoc:
|
121
|
+
that=self
|
122
|
+
#p @v1.map {|v| v}
|
123
|
+
|
124
|
+
@minimum_x||=@v1.min
|
125
|
+
@maximum_x||=@v1.max
|
126
|
+
@minimum_y||=@v2.min
|
127
|
+
@maximum_y||=@v2.max
|
128
|
+
|
129
|
+
colors=Rubyvis::Colors.category10
|
130
|
+
|
131
|
+
margin_hor=margin_left + margin_right
|
132
|
+
margin_vert=margin_top + margin_bottom
|
133
|
+
|
134
|
+
x=Rubyvis::Scale.linear(@minimum_x, @maximum_x).range(0, width - margin_hor)
|
135
|
+
y=Rubyvis::Scale.linear(@minimum_y, @maximum_y).range(0, height - margin_vert)
|
136
|
+
@x_scale=x
|
137
|
+
@y_scale=y
|
138
|
+
vis=Rubyvis::Panel.new do |pan|
|
139
|
+
pan.width width - margin_hor
|
140
|
+
pan.height height - margin_vert
|
141
|
+
pan.bottom margin_bottom
|
142
|
+
pan.left margin_left
|
143
|
+
pan.right margin_right
|
144
|
+
pan.top margin_top
|
145
|
+
# X axis
|
146
|
+
pan.rule do
|
147
|
+
data y.ticks
|
148
|
+
bottom y
|
149
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
150
|
+
label(:anchor=>'left') do
|
151
|
+
visible {|d| d!=0 and d < that.width}
|
152
|
+
text y.tick_format
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Y axis
|
157
|
+
pan.rule do
|
158
|
+
data x.ticks
|
159
|
+
left x
|
160
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
161
|
+
label(:anchor=>'bottom') do
|
162
|
+
visible {|d| d>0 and d < that.height}
|
163
|
+
text x.tick_format
|
164
|
+
end
|
165
|
+
end
|
166
|
+
# Add lines on median
|
167
|
+
add_line_median(pan) if line_median
|
168
|
+
|
169
|
+
pan.panel do
|
170
|
+
data(that.data)
|
171
|
+
dot do
|
172
|
+
left {|d| x[d[:x]]}
|
173
|
+
bottom {|d| y[d[:y]]}
|
174
|
+
|
175
|
+
fill_style {|v|
|
176
|
+
alpha=(that.dot_alpha-0.3<=0) ? 0.1 : that.dot_alpha-0.3
|
177
|
+
if that.groups
|
178
|
+
|
179
|
+
colors.scale(that.groups[index]).alpha(alpha)
|
180
|
+
else
|
181
|
+
colors.scale(0).alpha(alpha)
|
182
|
+
end
|
183
|
+
}
|
184
|
+
|
185
|
+
stroke_style {|v|
|
186
|
+
if that.groups
|
187
|
+
colors.scale(that.groups[parent.index]).alpha(that.dot_alpha)
|
188
|
+
else
|
189
|
+
colors.scale(0).alpha(that.dot_alpha)
|
190
|
+
end
|
191
|
+
}
|
192
|
+
shape_radius 2
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
vis
|
197
|
+
end
|
198
|
+
|
199
|
+
# Returns SVG with scatterplot
|
200
|
+
def to_svg
|
201
|
+
rp = rubyvis_panel
|
202
|
+
rp.render
|
203
|
+
rp.to_svg
|
204
|
+
end
|
205
|
+
|
206
|
+
def report_building(builder) # :nodoc:
|
207
|
+
builder.section(:name=>name) do |b|
|
208
|
+
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|