statsample 0.16.0 → 0.17.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +7 -0
- data/Manifest.txt +6 -4
- data/README.txt +5 -1
- data/Rakefile +1 -1
- data/examples/boxplot.rb +17 -0
- data/examples/dominance_analysis_bootstrap.rb +5 -0
- data/examples/histogram.rb +14 -0
- data/examples/scatterplot.rb +4 -3
- data/lib/distribution/normalbivariate.rb +1 -1
- data/lib/statsample.rb +16 -3
- data/lib/statsample/bivariate.rb +4 -2
- data/lib/statsample/converter/csv.rb +0 -2
- data/lib/statsample/converters.rb +13 -1
- data/lib/statsample/dataset.rb +23 -15
- data/lib/statsample/dominanceanalysis.rb +3 -2
- data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -1
- data/lib/statsample/factor/parallelanalysis.rb +1 -1
- data/lib/statsample/factor/principalaxis.rb +1 -1
- data/lib/statsample/graph.rb +2 -0
- data/lib/statsample/graph/boxplot.rb +234 -0
- data/lib/statsample/graph/histogram.rb +133 -0
- data/lib/statsample/graph/scatterplot.rb +1 -9
- data/lib/statsample/histogram.rb +47 -11
- data/lib/statsample/mle.rb +4 -4
- data/lib/statsample/mle/normal.rb +3 -3
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +0 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +1 -1
- data/lib/statsample/reliability.rb +1 -0
- data/lib/statsample/reliability/scaleanalysis.rb +3 -51
- data/lib/statsample/reliability/skillscaleanalysis.rb +93 -0
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test/umannwhitney.rb +1 -1
- data/lib/statsample/vector.rb +13 -36
- data/test/test_factor.rb +1 -1
- data/test/test_ggobi.rb +0 -5
- data/test/test_histogram.rb +75 -18
- data/test/test_mle.rb +0 -44
- data/test/test_reliability_skillscale.rb +41 -0
- data/test/test_statistics.rb +3 -3
- data/test/test_stest.rb +2 -2
- data/test/test_vector.rb +13 -8
- metadata +36 -18
- metadata.gz.sig +0 -0
- data/lib/statsample/combination.rb +0 -114
- data/lib/statsample/permutation.rb +0 -98
- data/test/test_combination.rb +0 -37
- data/test/test_permutation.rb +0 -42
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
=== 0.17.0 / 2010-12-09
|
2
|
+
* Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
|
3
|
+
* Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
|
4
|
+
* Delete combination and permutation clases. Backport for ruby 1.8.7 widely available
|
5
|
+
* Deleted unused variables (thanks, ruby-head)
|
6
|
+
|
1
7
|
=== 0.16.0 / 2010-11-13
|
2
8
|
* Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
|
3
9
|
* Removed all graph based on Svg::Graph.
|
@@ -6,6 +12,7 @@
|
|
6
12
|
* Added reference on references.txt
|
7
13
|
* Ruby-based random gaussian distribution generator when gsl not available
|
8
14
|
* Added population average deviation [Al Chou]
|
15
|
+
|
9
16
|
=== 0.15.1 / 2010-10-20
|
10
17
|
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
|
11
18
|
* Statsample::Dataset.delete_vector accept multiple fields.
|
data/Manifest.txt
CHANGED
@@ -12,10 +12,12 @@ data/test_binomial.csv
|
|
12
12
|
data/tetmat_matrix.txt
|
13
13
|
data/tetmat_test.txt
|
14
14
|
doc_latex/manual/equations.tex
|
15
|
+
examples/boxplot.rb
|
15
16
|
examples/correlation_matrix.rb
|
16
17
|
examples/dataset.rb
|
17
18
|
examples/dominance_analysis.rb
|
18
19
|
examples/dominance_analysis_bootstrap.rb
|
20
|
+
examples/histogram.rb
|
19
21
|
examples/icc.rb
|
20
22
|
examples/levene.rb
|
21
23
|
examples/multiple_regression.rb
|
@@ -47,7 +49,6 @@ lib/statsample/anova/twoway.rb
|
|
47
49
|
lib/statsample/bivariate.rb
|
48
50
|
lib/statsample/bivariate/pearson.rb
|
49
51
|
lib/statsample/codification.rb
|
50
|
-
lib/statsample/combination.rb
|
51
52
|
lib/statsample/converter/csv.rb
|
52
53
|
lib/statsample/converter/spss.rb
|
53
54
|
lib/statsample/converters.rb
|
@@ -62,6 +63,8 @@ lib/statsample/factor/pca.rb
|
|
62
63
|
lib/statsample/factor/principalaxis.rb
|
63
64
|
lib/statsample/factor/rotation.rb
|
64
65
|
lib/statsample/graph.rb
|
66
|
+
lib/statsample/graph/boxplot.rb
|
67
|
+
lib/statsample/graph/histogram.rb
|
65
68
|
lib/statsample/graph/scatterplot.rb
|
66
69
|
lib/statsample/histogram.rb
|
67
70
|
lib/statsample/matrix.rb
|
@@ -70,7 +73,6 @@ lib/statsample/mle/logit.rb
|
|
70
73
|
lib/statsample/mle/normal.rb
|
71
74
|
lib/statsample/mle/probit.rb
|
72
75
|
lib/statsample/multiset.rb
|
73
|
-
lib/statsample/permutation.rb
|
74
76
|
lib/statsample/regression.rb
|
75
77
|
lib/statsample/regression/binomial.rb
|
76
78
|
lib/statsample/regression/binomial/logit.rb
|
@@ -86,6 +88,7 @@ lib/statsample/reliability.rb
|
|
86
88
|
lib/statsample/reliability/icc.rb
|
87
89
|
lib/statsample/reliability/multiscaleanalysis.rb
|
88
90
|
lib/statsample/reliability/scaleanalysis.rb
|
91
|
+
lib/statsample/reliability/skillscaleanalysis.rb
|
89
92
|
lib/statsample/resample.rb
|
90
93
|
lib/statsample/rserve_extension.rb
|
91
94
|
lib/statsample/srs.rb
|
@@ -111,7 +114,6 @@ test/test_anovawithvectors.rb
|
|
111
114
|
test/test_bartlettsphericity.rb
|
112
115
|
test/test_bivariate.rb
|
113
116
|
test/test_codification.rb
|
114
|
-
test/test_combination.rb
|
115
117
|
test/test_crosstab.rb
|
116
118
|
test/test_csv.csv
|
117
119
|
test/test_csv.rb
|
@@ -126,10 +128,10 @@ test/test_logit.rb
|
|
126
128
|
test/test_matrix.rb
|
127
129
|
test/test_mle.rb
|
128
130
|
test/test_multiset.rb
|
129
|
-
test/test_permutation.rb
|
130
131
|
test/test_regression.rb
|
131
132
|
test/test_reliability.rb
|
132
133
|
test/test_reliability_icc.rb
|
134
|
+
test/test_reliability_skillscale.rb
|
133
135
|
test/test_resample.rb
|
134
136
|
test/test_rserve_extension.rb
|
135
137
|
test/test_srs.rb
|
data/README.txt
CHANGED
@@ -21,6 +21,7 @@ Include:
|
|
21
21
|
* Sample calculation related formulas
|
22
22
|
* Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
|
23
23
|
* Creates reports on text, html and rtf, using ReportBuilder gem
|
24
|
+
* Graphics: Histogram, Boxplot and Scatterplot
|
24
25
|
|
25
26
|
== FEATURES:
|
26
27
|
|
@@ -69,8 +70,11 @@ Include:
|
|
69
70
|
* Statsample::Test::UMannWhitney
|
70
71
|
* Statsample::Test::T
|
71
72
|
* Statsample::Test::F
|
73
|
+
* Module Graph provides several classes to create beautiful graphs using rubyvis
|
74
|
+
* Statsample::Graph::Boxplot
|
75
|
+
* Statsample::Graph::Histogram
|
76
|
+
* Statsample::Graph::Scatterplot
|
72
77
|
* Gem +statsample-sem+ provides a DSL to R libraries +sem+ and +OpenMx+
|
73
|
-
* Interfaces to gdchart, gnuplot and SVG::Graph (experimental)
|
74
78
|
* Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
|
75
79
|
|
76
80
|
== Examples of use:
|
data/Rakefile
CHANGED
@@ -41,7 +41,7 @@ h=Hoe.spec('statsample') do
|
|
41
41
|
#self.testlib=:minitest
|
42
42
|
self.rubyforge_name = "ruby-statsample"
|
43
43
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
44
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.
|
44
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.3.3"]
|
45
45
|
|
46
46
|
self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
|
47
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
data/examples/boxplot.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
4
|
+
|
5
|
+
require 'benchmark'
|
6
|
+
require 'statsample'
|
7
|
+
n=100
|
8
|
+
a=(n-1).times.map {|i| rand()*20+50}
|
9
|
+
b=n.times.map {|i| rand()*10+50}.to_scale
|
10
|
+
c=n.times.map {|i| rand()*5+50}.to_scale
|
11
|
+
|
12
|
+
a.push(30)
|
13
|
+
a=a.to_scale
|
14
|
+
sp=Statsample::Graph::Boxplot.new(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
|
15
|
+
rb=ReportBuilder.new
|
16
|
+
rb.add(sp)
|
17
|
+
puts rb.to_text
|
@@ -8,6 +8,11 @@ b=100.times.collect {rand}.to_scale
|
|
8
8
|
c=100.times.collect {rand}.to_scale
|
9
9
|
d=100.times.collect {rand}.to_scale
|
10
10
|
|
11
|
+
a.name="a"
|
12
|
+
b.name="b"
|
13
|
+
c.name="c"
|
14
|
+
d.name="d"
|
15
|
+
|
11
16
|
ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
12
17
|
|
13
18
|
ds['y1']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
4
|
+
|
5
|
+
require 'benchmark'
|
6
|
+
require 'statsample'
|
7
|
+
n=1000
|
8
|
+
a=n.times.map {|i| rand()*20}.to_scale
|
9
|
+
hg=Statsample::Graph::Histogram.new(a, :bins=>15)
|
10
|
+
|
11
|
+
rb=ReportBuilder.new
|
12
|
+
rb.add(a.histogram)
|
13
|
+
rb.add(hg)
|
14
|
+
puts rb.to_text
|
data/examples/scatterplot.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
4
|
+
|
3
5
|
require 'benchmark'
|
4
6
|
require 'statsample'
|
5
7
|
n=100
|
6
8
|
a=n.times.map {|i| rand(10)+i}.to_scale
|
7
9
|
b=n.times.map {|i| rand(10)+i}.to_scale
|
8
10
|
sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
|
9
|
-
rb=ReportBuilder.new
|
10
|
-
|
11
|
-
end
|
11
|
+
rb=ReportBuilder.new
|
12
|
+
rb.add(sp)
|
12
13
|
puts rb.to_text
|
@@ -220,7 +220,7 @@ module Distribution
|
|
220
220
|
asr = Math::asin(r)
|
221
221
|
(1..lg).each do |i|
|
222
222
|
[-1,1].each do |is|
|
223
|
-
sn = Math::sin(
|
223
|
+
sn = Math::sin(asr*(is* x[i][ng]+1).quo(2) )
|
224
224
|
bvn = bvn + w[i][ng] * Math::exp( ( sn*hk-hs ).quo( 1-sn*sn ) )
|
225
225
|
end # do
|
226
226
|
end # do
|
data/lib/statsample.rb
CHANGED
@@ -118,12 +118,10 @@ module Statsample
|
|
118
118
|
@@has_gsl
|
119
119
|
end
|
120
120
|
|
121
|
-
VERSION = '0.
|
121
|
+
VERSION = '0.17.0'
|
122
122
|
SPLIT_TOKEN = ","
|
123
123
|
autoload(:Database, 'statsample/converters')
|
124
124
|
autoload(:Anova, 'statsample/anova')
|
125
|
-
autoload(:Combination, 'statsample/combination')
|
126
|
-
autoload(:Permutation, 'statsample/permutation')
|
127
125
|
autoload(:CSV, 'statsample/converters')
|
128
126
|
autoload(:PlainText, 'statsample/converters')
|
129
127
|
autoload(:Excel, 'statsample/converters')
|
@@ -219,6 +217,21 @@ module Statsample
|
|
219
217
|
end
|
220
218
|
u
|
221
219
|
end
|
220
|
+
|
221
|
+
def self.nice(s,e) # :nodoc:
|
222
|
+
reverse = e<s
|
223
|
+
min = reverse ? e : s
|
224
|
+
max = reverse ? s : e
|
225
|
+
span=max-min
|
226
|
+
return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
|
227
|
+
|
228
|
+
step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
|
229
|
+
out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
|
230
|
+
out.reverse! if reverse
|
231
|
+
out
|
232
|
+
end
|
233
|
+
|
234
|
+
|
222
235
|
end
|
223
236
|
|
224
237
|
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
require 'statsample/bivariate/pearson'
|
2
2
|
|
3
|
+
|
4
|
+
|
3
5
|
module Statsample
|
4
6
|
# Diverse methods and classes to calculate bivariate relations
|
5
7
|
# Specific classes:
|
@@ -7,6 +9,8 @@ module Statsample
|
|
7
9
|
# * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
|
8
10
|
# * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
|
9
11
|
module Bivariate
|
12
|
+
autoload(:Polychoric, 'statsample/bivariate/polychoric')
|
13
|
+
autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
|
10
14
|
|
11
15
|
class << self
|
12
16
|
# Covariance between two vectors
|
@@ -335,6 +339,4 @@ module Statsample
|
|
335
339
|
end
|
336
340
|
end
|
337
341
|
|
338
|
-
require 'statsample/bivariate/polychoric'
|
339
|
-
require 'statsample/bivariate/tetrachoric'
|
340
342
|
|
@@ -181,8 +181,20 @@ module Statsample
|
|
181
181
|
# USE:
|
182
182
|
# ds = Statsample::Excel.read("test.xls")
|
183
183
|
#
|
184
|
-
def read(filename,
|
184
|
+
def read(filename, opts=Hash.new)
|
185
185
|
require 'spreadsheet'
|
186
|
+
opts_default={
|
187
|
+
:worksheet_id=>0,
|
188
|
+
:ignore_lines=>0,
|
189
|
+
:empty=>['']
|
190
|
+
}
|
191
|
+
|
192
|
+
opts=opts_default.merge opts
|
193
|
+
|
194
|
+
worksheet_id=opts[:worksheet_id]
|
195
|
+
ignore_lines=opts[:ignore_lines]
|
196
|
+
empty=opts[:empty]
|
197
|
+
|
186
198
|
first_row=true
|
187
199
|
fields=[]
|
188
200
|
fields_data={}
|
data/lib/statsample/dataset.rb
CHANGED
@@ -331,7 +331,7 @@ module Statsample
|
|
331
331
|
def bootstrap(n=nil)
|
332
332
|
n||=@cases
|
333
333
|
ds_boot=dup_empty
|
334
|
-
|
334
|
+
n.times do
|
335
335
|
ds_boot.add_case_array(case_as_array(rand(n)))
|
336
336
|
end
|
337
337
|
ds_boot.update_valid_data
|
@@ -418,7 +418,6 @@ module Statsample
|
|
418
418
|
# Returns a vector with sumatory of fields
|
419
419
|
# if fields parameter is empty, sum all fields
|
420
420
|
def vector_sum(fields=nil)
|
421
|
-
a=[]
|
422
421
|
fields||=@fields
|
423
422
|
collect_with_index do |row, i|
|
424
423
|
if(fields.find{|f| !@vectors[f].data_with_nils[i]})
|
@@ -557,6 +556,7 @@ module Statsample
|
|
557
556
|
raise DatasetException.new(self, e)
|
558
557
|
end
|
559
558
|
end
|
559
|
+
|
560
560
|
# Returns each case as an array, coding missing values as nils
|
561
561
|
def each_array_with_nils
|
562
562
|
m=fields.size
|
@@ -586,8 +586,9 @@ module Statsample
|
|
586
586
|
@fields=f
|
587
587
|
check_order
|
588
588
|
end
|
589
|
-
|
590
|
-
|
589
|
+
# Check congruence between +fields+ attribute
|
590
|
+
# and keys on +vectors
|
591
|
+
def check_order #:nodoc:
|
591
592
|
if(@vectors.keys.sort!=@fields.sort)
|
592
593
|
@fields=@fields&@vectors.keys
|
593
594
|
@fields+=@vectors.keys.sort-@fields
|
@@ -598,7 +599,7 @@ module Statsample
|
|
598
599
|
if i.is_a? Range
|
599
600
|
fields=from_to(i.begin,i.end)
|
600
601
|
vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
|
601
|
-
|
602
|
+
Dataset.new(vectors,fields)
|
602
603
|
else
|
603
604
|
raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
|
604
605
|
@vectors[i]
|
@@ -613,7 +614,7 @@ module Statsample
|
|
613
614
|
}
|
614
615
|
Statsample::Vector.new(data,type)
|
615
616
|
end
|
616
|
-
# Same as
|
617
|
+
# Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
|
617
618
|
def collect_with_index(type=:scale)
|
618
619
|
data=[]
|
619
620
|
each_with_index {|row, i|
|
@@ -661,14 +662,7 @@ module Statsample
|
|
661
662
|
end
|
662
663
|
end
|
663
664
|
|
664
|
-
|
665
|
-
require 'statsample/multiset'
|
666
|
-
if fields.size==1
|
667
|
-
to_multiset_by_split_one_field(fields[0])
|
668
|
-
else
|
669
|
-
to_multiset_by_split_multiple_fields(*fields)
|
670
|
-
end
|
671
|
-
end
|
665
|
+
|
672
666
|
|
673
667
|
# Create a new dataset with all cases which the block returns true
|
674
668
|
def filter
|
@@ -689,6 +683,20 @@ module Statsample
|
|
689
683
|
a.to_vector(@vectors[field].type)
|
690
684
|
end
|
691
685
|
|
686
|
+
# Creates a Stastample::Multiset, using one or more fields
|
687
|
+
# to split the dataset.
|
688
|
+
|
689
|
+
|
690
|
+
def to_multiset_by_split(*fields)
|
691
|
+
require 'statsample/multiset'
|
692
|
+
if fields.size==1
|
693
|
+
to_multiset_by_split_one_field(fields[0])
|
694
|
+
else
|
695
|
+
to_multiset_by_split_multiple_fields(*fields)
|
696
|
+
end
|
697
|
+
end
|
698
|
+
# Creates a Statsample::Multiset, using one field
|
699
|
+
|
692
700
|
def to_multiset_by_split_one_field(field)
|
693
701
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
694
702
|
factors=@vectors[field].factors
|
@@ -831,7 +839,7 @@ module Statsample
|
|
831
839
|
# ]
|
832
840
|
#
|
833
841
|
def one_to_many(parent_fields, pattern)
|
834
|
-
base_pattern=pattern.gsub(/%v|%n/,"")
|
842
|
+
#base_pattern=pattern.gsub(/%v|%n/,"")
|
835
843
|
re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
|
836
844
|
ds_vars=parent_fields
|
837
845
|
vars=[]
|
@@ -156,8 +156,9 @@ module Statsample
|
|
156
156
|
@models=[]
|
157
157
|
@models_data={}
|
158
158
|
for i in 1..@predictors.size
|
159
|
-
c=
|
159
|
+
c=(0...@predictors.size).to_a.combination(i)
|
160
160
|
c.each do |data|
|
161
|
+
|
161
162
|
independent=data.collect {|i1| @predictors[i1] }
|
162
163
|
@models.push(independent)
|
163
164
|
if (@build_from_dataset)
|
@@ -268,7 +269,7 @@ module Statsample
|
|
268
269
|
# Get all model of size k
|
269
270
|
def md_k(k)
|
270
271
|
out=[]
|
271
|
-
models
|
272
|
+
@models.each{|m| out.push(md(m)) if m.size==k }
|
272
273
|
out
|
273
274
|
end
|
274
275
|
|
@@ -158,8 +158,9 @@ module Statsample
|
|
158
158
|
@samples_cd={}
|
159
159
|
@samples_gd={}
|
160
160
|
@pairs=[]
|
161
|
-
c=
|
161
|
+
c=(0...@fields.size).to_a.combination(2)
|
162
162
|
c.each do |data|
|
163
|
+
p data
|
163
164
|
convert=data.collect {|i| @fields[i] }
|
164
165
|
@pairs.push(convert)
|
165
166
|
[@samples_td, @samples_cd, @samples_gd].each{|s|
|
@@ -132,7 +132,7 @@ module Factor
|
|
132
132
|
@communalities=pca.communalities(m)
|
133
133
|
@eigenvalues=pca.eigenvalues
|
134
134
|
com_sum = @communalities.inject(0) {|ac,v| ac+v}
|
135
|
-
jump=true
|
135
|
+
#jump=true
|
136
136
|
|
137
137
|
break if (com_sum-prev_sum).abs < @delta
|
138
138
|
@communalities.each_with_index do |v2,i2|
|
data/lib/statsample/graph.rb
CHANGED
@@ -0,0 +1,234 @@
|
|
1
|
+
require 'rubyvis'
|
2
|
+
module Statsample
|
3
|
+
module Graph
|
4
|
+
# = Boxplot
|
5
|
+
#
|
6
|
+
# From Wikipedia:
|
7
|
+
# In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
|
8
|
+
#
|
9
|
+
# == Usage
|
10
|
+
# === Svg output
|
11
|
+
# a=[1,2,3,4].to_scale
|
12
|
+
# b=[3,4,5,6].to_scale
|
13
|
+
# puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
|
14
|
+
# === Using ReportBuilder
|
15
|
+
# a=[1,2,3,4].to_scale
|
16
|
+
# b=[3,4,5,6].to_scale
|
17
|
+
# rb=ReportBuilder.new
|
18
|
+
# rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
|
19
|
+
# rb.save_html('boxplot.html')
|
20
|
+
|
21
|
+
class Boxplot
|
22
|
+
include Summarizable
|
23
|
+
attr_accessor :name
|
24
|
+
# Total width of Boxplot
|
25
|
+
attr_accessor :width
|
26
|
+
# Total height of Boxplot
|
27
|
+
attr_accessor :height
|
28
|
+
# Top margin
|
29
|
+
attr_accessor :margin_top
|
30
|
+
# Bottom margin
|
31
|
+
attr_accessor :margin_bottom
|
32
|
+
# Left margin
|
33
|
+
attr_accessor :margin_left
|
34
|
+
# Right margin
|
35
|
+
attr_accessor :margin_right
|
36
|
+
# Array with assignation to groups of bars
|
37
|
+
# For example, for four vectors,
|
38
|
+
# boxplot.groups=[1,2,1,3]
|
39
|
+
# Assign same color to first and third element, and different to
|
40
|
+
# second and fourth
|
41
|
+
attr_accessor :groups
|
42
|
+
# Minimum value on y-axis. Automaticly defined from data
|
43
|
+
attr_accessor :minimum
|
44
|
+
# Maximum value on y-axis. Automaticly defined from data
|
45
|
+
attr_accessor :maximum
|
46
|
+
# Vectors to box-ploting
|
47
|
+
attr_accessor :vectors
|
48
|
+
|
49
|
+
attr_reader :x_scale, :y_scale
|
50
|
+
# Create a new Boxplot.
|
51
|
+
# Parameters: Hash of options
|
52
|
+
# * :vectors: Array of vectors
|
53
|
+
# * :groups: Array of same size as :vectors:, with name of groups
|
54
|
+
# to colorize vectors
|
55
|
+
def initialize(opts=Hash.new)
|
56
|
+
@vectors=opts.delete :vectors
|
57
|
+
raise "You should define vectors" if @vectors.nil?
|
58
|
+
|
59
|
+
opts_default={
|
60
|
+
:name=>_("Boxplot"),
|
61
|
+
:groups=>nil,
|
62
|
+
:width=>400,
|
63
|
+
:height=>300,
|
64
|
+
:margin_top=>10,
|
65
|
+
:margin_bottom=>20,
|
66
|
+
:margin_left=>20,
|
67
|
+
:margin_right=>20,
|
68
|
+
:minimum=>nil,
|
69
|
+
:maximum=>nil
|
70
|
+
}
|
71
|
+
@opts=opts_default.merge(opts)
|
72
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns a Rubyvis panel with scatterplot
|
76
|
+
def rubyvis_panel # :nodoc:
|
77
|
+
that=self
|
78
|
+
|
79
|
+
min,max=@minimum, @maximum
|
80
|
+
|
81
|
+
min||=@vectors.map {|v| v.min}.min
|
82
|
+
max||=@vectors.map {|v| v.max}.max
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
margin_hor=margin_left + margin_right
|
87
|
+
margin_vert=margin_top + margin_bottom
|
88
|
+
x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
|
89
|
+
y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
|
90
|
+
y_scale.nice
|
91
|
+
# cache data
|
92
|
+
|
93
|
+
colors=Rubyvis::Colors.category10
|
94
|
+
|
95
|
+
data=@vectors.map {|v|
|
96
|
+
out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
|
97
|
+
out[:iqr]=out[:percentil_75]-out[:percentil_25]
|
98
|
+
|
99
|
+
irq_max=out[:percentil_75]+out[:iqr]
|
100
|
+
irq_min=out[:percentil_25]-out[:iqr]
|
101
|
+
|
102
|
+
# Find the last data inside the margin
|
103
|
+
min=out[:percentil_25]
|
104
|
+
max=out[:percentil_75]
|
105
|
+
|
106
|
+
v.each {|d|
|
107
|
+
min=d if d<min and d>irq_min
|
108
|
+
max=d if d>max and d<irq_max
|
109
|
+
}
|
110
|
+
# Whiskers!
|
111
|
+
out[:low_whisker]=min
|
112
|
+
out[:high_whisker]=max
|
113
|
+
# And now, data outside whiskers
|
114
|
+
out[:outliers]=v.data_with_nils.find_all {|d|
|
115
|
+
d<min or d>max
|
116
|
+
}
|
117
|
+
out
|
118
|
+
}
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
vis=Rubyvis::Panel.new do |pan|
|
123
|
+
pan.width width - margin_hor
|
124
|
+
pan.height height - margin_vert
|
125
|
+
pan.bottom margin_bottom
|
126
|
+
pan.left margin_left
|
127
|
+
pan.right margin_right
|
128
|
+
pan.top margin_top
|
129
|
+
# Y axis
|
130
|
+
pan.rule do
|
131
|
+
data y_scale.ticks
|
132
|
+
bottom y_scale
|
133
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
134
|
+
label(:anchor=>'left') do
|
135
|
+
visible {|d| true}
|
136
|
+
text y_scale.tick_format
|
137
|
+
end
|
138
|
+
end
|
139
|
+
pan.rule do
|
140
|
+
bottom 0
|
141
|
+
stroke_style 'black'
|
142
|
+
end
|
143
|
+
pan.label do |l|
|
144
|
+
l.data data
|
145
|
+
l.left {|v| x_scale.scale(index)}
|
146
|
+
l.bottom -15
|
147
|
+
l.text {|v,x| v[:name]}
|
148
|
+
end
|
149
|
+
|
150
|
+
pan.panel do |bp|
|
151
|
+
bp.data data
|
152
|
+
bp.left {|v| x_scale.scale(index)}
|
153
|
+
bp.width x_scale.range_band
|
154
|
+
|
155
|
+
|
156
|
+
# Bar
|
157
|
+
bp.bar do |b|
|
158
|
+
b.bottom {|v| y_scale.scale(v[:percentil_25])}
|
159
|
+
b.height {|v| y_scale.scale(v[:percentil_75]) - y_scale.scale(v[:percentil_25]) }
|
160
|
+
b.line_width 1
|
161
|
+
b.stroke_style {|v|
|
162
|
+
if that.groups
|
163
|
+
colors.scale(that.groups[parent.index]).darker
|
164
|
+
else
|
165
|
+
colors.scale(index).darker
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
}
|
170
|
+
b.fill_style {|v|
|
171
|
+
if that.groups
|
172
|
+
colors.scale(that.groups[parent.index])
|
173
|
+
else
|
174
|
+
colors.scale(index)
|
175
|
+
end
|
176
|
+
}
|
177
|
+
end
|
178
|
+
# Median
|
179
|
+
bp.rule do |r|
|
180
|
+
r.bottom {|v| y_scale.scale(v[:median])}
|
181
|
+
r.width x_scale.range_band
|
182
|
+
r.line_width 2
|
183
|
+
end
|
184
|
+
|
185
|
+
# Whiskeys
|
186
|
+
bp.rule do |r|
|
187
|
+
r.visible {|v| v[:percentil_25]>v[:low_whisker]}
|
188
|
+
r.bottom {|v| y_scale.scale(v[:low_whisker])}
|
189
|
+
end
|
190
|
+
bp.rule do |r|
|
191
|
+
r.visible {|v| v[:percentil_25]>v[:low_whisker]}
|
192
|
+
r.bottom {|v| y_scale.scale(v[:low_whisker])}
|
193
|
+
r.left {|v| x_scale.range_band / 2.0}
|
194
|
+
r.height {|v| y_scale.scale(v[:percentil_25])-y_scale.scale(v[:low_whisker])}
|
195
|
+
end
|
196
|
+
bp.rule do |r|
|
197
|
+
r.visible {|v| v[:percentil_75]<v[:high_whisker]}
|
198
|
+
r.bottom {|v| y_scale.scale(v[:high_whisker])}
|
199
|
+
end
|
200
|
+
|
201
|
+
bp.rule do |r|
|
202
|
+
r.visible {|v| v[:percentil_75]<v[:high_whisker]}
|
203
|
+
r.bottom {|v| y_scale.scale(v[:percentil_75])}
|
204
|
+
r.left {|v| x_scale.range_band / 2.0}
|
205
|
+
r.height {|v| y_scale.scale(v[:high_whisker])-y_scale.scale(v[:percentil_75])}
|
206
|
+
end
|
207
|
+
|
208
|
+
bp.dot do |dot|
|
209
|
+
dot.shape_size 4
|
210
|
+
dot.data {|v| v[:outliers]}
|
211
|
+
dot.left {|v| x_scale.range_band / 2.0}
|
212
|
+
dot.bottom {|v| y_scale.scale(v)}
|
213
|
+
dot.title {|v| v}
|
214
|
+
end
|
215
|
+
|
216
|
+
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
# Returns SVG with scatterplot
|
221
|
+
def to_svg
|
222
|
+
rp=rubyvis_panel
|
223
|
+
rp.render
|
224
|
+
rp.to_svg
|
225
|
+
end
|
226
|
+
def report_building(builder) # :nodoc:
|
227
|
+
builder.section(:name=>name) do |b|
|
228
|
+
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
229
|
+
end
|
230
|
+
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|