statsample 0.16.0 → 0.17.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +7 -0
  3. data/Manifest.txt +6 -4
  4. data/README.txt +5 -1
  5. data/Rakefile +1 -1
  6. data/examples/boxplot.rb +17 -0
  7. data/examples/dominance_analysis_bootstrap.rb +5 -0
  8. data/examples/histogram.rb +14 -0
  9. data/examples/scatterplot.rb +4 -3
  10. data/lib/distribution/normalbivariate.rb +1 -1
  11. data/lib/statsample.rb +16 -3
  12. data/lib/statsample/bivariate.rb +4 -2
  13. data/lib/statsample/converter/csv.rb +0 -2
  14. data/lib/statsample/converters.rb +13 -1
  15. data/lib/statsample/dataset.rb +23 -15
  16. data/lib/statsample/dominanceanalysis.rb +3 -2
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -1
  18. data/lib/statsample/factor/parallelanalysis.rb +1 -1
  19. data/lib/statsample/factor/principalaxis.rb +1 -1
  20. data/lib/statsample/graph.rb +2 -0
  21. data/lib/statsample/graph/boxplot.rb +234 -0
  22. data/lib/statsample/graph/histogram.rb +133 -0
  23. data/lib/statsample/graph/scatterplot.rb +1 -9
  24. data/lib/statsample/histogram.rb +47 -11
  25. data/lib/statsample/mle.rb +4 -4
  26. data/lib/statsample/mle/normal.rb +3 -3
  27. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  28. data/lib/statsample/regression/multiple/gslengine.rb +0 -1
  29. data/lib/statsample/regression/multiple/matrixengine.rb +1 -1
  30. data/lib/statsample/reliability.rb +1 -0
  31. data/lib/statsample/reliability/scaleanalysis.rb +3 -51
  32. data/lib/statsample/reliability/skillscaleanalysis.rb +93 -0
  33. data/lib/statsample/srs.rb +1 -1
  34. data/lib/statsample/test/umannwhitney.rb +1 -1
  35. data/lib/statsample/vector.rb +13 -36
  36. data/test/test_factor.rb +1 -1
  37. data/test/test_ggobi.rb +0 -5
  38. data/test/test_histogram.rb +75 -18
  39. data/test/test_mle.rb +0 -44
  40. data/test/test_reliability_skillscale.rb +41 -0
  41. data/test/test_statistics.rb +3 -3
  42. data/test/test_stest.rb +2 -2
  43. data/test/test_vector.rb +13 -8
  44. metadata +36 -18
  45. metadata.gz.sig +0 -0
  46. data/lib/statsample/combination.rb +0 -114
  47. data/lib/statsample/permutation.rb +0 -98
  48. data/test/test_combination.rb +0 -37
  49. data/test/test_permutation.rb +0 -42
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ === 0.17.0 / 2010-12-09
2
+ * Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
3
+ * Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
4
+ * Delete combination and permutation clases. Backport for ruby 1.8.7 widely available
5
+ * Deleted unused variables (thanks, ruby-head)
6
+
1
7
  === 0.16.0 / 2010-11-13
2
8
  * Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
3
9
  * Removed all graph based on Svg::Graph.
@@ -6,6 +12,7 @@
6
12
  * Added reference on references.txt
7
13
  * Ruby-based random gaussian distribution generator when gsl not available
8
14
  * Added population average deviation [Al Chou]
15
+
9
16
  === 0.15.1 / 2010-10-20
10
17
  * Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
11
18
  * Statsample::Dataset.delete_vector accept multiple fields.
data/Manifest.txt CHANGED
@@ -12,10 +12,12 @@ data/test_binomial.csv
12
12
  data/tetmat_matrix.txt
13
13
  data/tetmat_test.txt
14
14
  doc_latex/manual/equations.tex
15
+ examples/boxplot.rb
15
16
  examples/correlation_matrix.rb
16
17
  examples/dataset.rb
17
18
  examples/dominance_analysis.rb
18
19
  examples/dominance_analysis_bootstrap.rb
20
+ examples/histogram.rb
19
21
  examples/icc.rb
20
22
  examples/levene.rb
21
23
  examples/multiple_regression.rb
@@ -47,7 +49,6 @@ lib/statsample/anova/twoway.rb
47
49
  lib/statsample/bivariate.rb
48
50
  lib/statsample/bivariate/pearson.rb
49
51
  lib/statsample/codification.rb
50
- lib/statsample/combination.rb
51
52
  lib/statsample/converter/csv.rb
52
53
  lib/statsample/converter/spss.rb
53
54
  lib/statsample/converters.rb
@@ -62,6 +63,8 @@ lib/statsample/factor/pca.rb
62
63
  lib/statsample/factor/principalaxis.rb
63
64
  lib/statsample/factor/rotation.rb
64
65
  lib/statsample/graph.rb
66
+ lib/statsample/graph/boxplot.rb
67
+ lib/statsample/graph/histogram.rb
65
68
  lib/statsample/graph/scatterplot.rb
66
69
  lib/statsample/histogram.rb
67
70
  lib/statsample/matrix.rb
@@ -70,7 +73,6 @@ lib/statsample/mle/logit.rb
70
73
  lib/statsample/mle/normal.rb
71
74
  lib/statsample/mle/probit.rb
72
75
  lib/statsample/multiset.rb
73
- lib/statsample/permutation.rb
74
76
  lib/statsample/regression.rb
75
77
  lib/statsample/regression/binomial.rb
76
78
  lib/statsample/regression/binomial/logit.rb
@@ -86,6 +88,7 @@ lib/statsample/reliability.rb
86
88
  lib/statsample/reliability/icc.rb
87
89
  lib/statsample/reliability/multiscaleanalysis.rb
88
90
  lib/statsample/reliability/scaleanalysis.rb
91
+ lib/statsample/reliability/skillscaleanalysis.rb
89
92
  lib/statsample/resample.rb
90
93
  lib/statsample/rserve_extension.rb
91
94
  lib/statsample/srs.rb
@@ -111,7 +114,6 @@ test/test_anovawithvectors.rb
111
114
  test/test_bartlettsphericity.rb
112
115
  test/test_bivariate.rb
113
116
  test/test_codification.rb
114
- test/test_combination.rb
115
117
  test/test_crosstab.rb
116
118
  test/test_csv.csv
117
119
  test/test_csv.rb
@@ -126,10 +128,10 @@ test/test_logit.rb
126
128
  test/test_matrix.rb
127
129
  test/test_mle.rb
128
130
  test/test_multiset.rb
129
- test/test_permutation.rb
130
131
  test/test_regression.rb
131
132
  test/test_reliability.rb
132
133
  test/test_reliability_icc.rb
134
+ test/test_reliability_skillscale.rb
133
135
  test/test_resample.rb
134
136
  test/test_rserve_extension.rb
135
137
  test/test_srs.rb
data/README.txt CHANGED
@@ -21,6 +21,7 @@ Include:
21
21
  * Sample calculation related formulas
22
22
  * Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
23
23
  * Creates reports on text, html and rtf, using ReportBuilder gem
24
+ * Graphics: Histogram, Boxplot and Scatterplot
24
25
 
25
26
  == FEATURES:
26
27
 
@@ -69,8 +70,11 @@ Include:
69
70
  * Statsample::Test::UMannWhitney
70
71
  * Statsample::Test::T
71
72
  * Statsample::Test::F
73
+ * Module Graph provides several classes to create beautiful graphs using rubyvis
74
+ * Statsample::Graph::Boxplot
75
+ * Statsample::Graph::Histogram
76
+ * Statsample::Graph::Scatterplot
72
77
  * Gem +statsample-sem+ provides a DSL to R libraries +sem+ and +OpenMx+
73
- * Interfaces to gdchart, gnuplot and SVG::Graph (experimental)
74
78
  * Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
75
79
 
76
80
  == Examples of use:
data/Rakefile CHANGED
@@ -41,7 +41,7 @@ h=Hoe.spec('statsample') do
41
41
  #self.testlib=:minitest
42
42
  self.rubyforge_name = "ruby-statsample"
43
43
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
44
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.2.2"]
44
+ self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.3.3"]
45
45
 
46
46
  self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
47
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
5
+ require 'benchmark'
6
+ require 'statsample'
7
+ n=100
8
+ a=(n-1).times.map {|i| rand()*20+50}
9
+ b=n.times.map {|i| rand()*10+50}.to_scale
10
+ c=n.times.map {|i| rand()*5+50}.to_scale
11
+
12
+ a.push(30)
13
+ a=a.to_scale
14
+ sp=Statsample::Graph::Boxplot.new(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
15
+ rb=ReportBuilder.new
16
+ rb.add(sp)
17
+ puts rb.to_text
@@ -8,6 +8,11 @@ b=100.times.collect {rand}.to_scale
8
8
  c=100.times.collect {rand}.to_scale
9
9
  d=100.times.collect {rand}.to_scale
10
10
 
11
+ a.name="a"
12
+ b.name="b"
13
+ c.name="c"
14
+ d.name="d"
15
+
11
16
  ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
12
17
 
13
18
  ds['y1']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
5
+ require 'benchmark'
6
+ require 'statsample'
7
+ n=1000
8
+ a=n.times.map {|i| rand()*20}.to_scale
9
+ hg=Statsample::Graph::Histogram.new(a, :bins=>15)
10
+
11
+ rb=ReportBuilder.new
12
+ rb.add(a.histogram)
13
+ rb.add(hg)
14
+ puts rb.to_text
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
3
5
  require 'benchmark'
4
6
  require 'statsample'
5
7
  n=100
6
8
  a=n.times.map {|i| rand(10)+i}.to_scale
7
9
  b=n.times.map {|i| rand(10)+i}.to_scale
8
10
  sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
9
- rb=ReportBuilder.new do |b|
10
- b.parse_element(sp)
11
- end
11
+ rb=ReportBuilder.new
12
+ rb.add(sp)
12
13
  puts rb.to_text
@@ -220,7 +220,7 @@ module Distribution
220
220
  asr = Math::asin(r)
221
221
  (1..lg).each do |i|
222
222
  [-1,1].each do |is|
223
- sn = Math::sin( asr *( is * x[i][ng] + 1 ).quo(2) )
223
+ sn = Math::sin(asr*(is* x[i][ng]+1).quo(2) )
224
224
  bvn = bvn + w[i][ng] * Math::exp( ( sn*hk-hs ).quo( 1-sn*sn ) )
225
225
  end # do
226
226
  end # do
data/lib/statsample.rb CHANGED
@@ -118,12 +118,10 @@ module Statsample
118
118
  @@has_gsl
119
119
  end
120
120
 
121
- VERSION = '0.16.0'
121
+ VERSION = '0.17.0'
122
122
  SPLIT_TOKEN = ","
123
123
  autoload(:Database, 'statsample/converters')
124
124
  autoload(:Anova, 'statsample/anova')
125
- autoload(:Combination, 'statsample/combination')
126
- autoload(:Permutation, 'statsample/permutation')
127
125
  autoload(:CSV, 'statsample/converters')
128
126
  autoload(:PlainText, 'statsample/converters')
129
127
  autoload(:Excel, 'statsample/converters')
@@ -219,6 +217,21 @@ module Statsample
219
217
  end
220
218
  u
221
219
  end
220
+
221
+ def self.nice(s,e) # :nodoc:
222
+ reverse = e<s
223
+ min = reverse ? e : s
224
+ max = reverse ? s : e
225
+ span=max-min
226
+ return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
227
+
228
+ step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
229
+ out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
230
+ out.reverse! if reverse
231
+ out
232
+ end
233
+
234
+
222
235
  end
223
236
 
224
237
 
@@ -1,5 +1,7 @@
1
1
  require 'statsample/bivariate/pearson'
2
2
 
3
+
4
+
3
5
  module Statsample
4
6
  # Diverse methods and classes to calculate bivariate relations
5
7
  # Specific classes:
@@ -7,6 +9,8 @@ module Statsample
7
9
  # * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
8
10
  # * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
9
11
  module Bivariate
12
+ autoload(:Polychoric, 'statsample/bivariate/polychoric')
13
+ autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
10
14
 
11
15
  class << self
12
16
  # Covariance between two vectors
@@ -335,6 +339,4 @@ module Statsample
335
339
  end
336
340
  end
337
341
 
338
- require 'statsample/bivariate/polychoric'
339
- require 'statsample/bivariate/tetrachoric'
340
342
 
@@ -1,5 +1,3 @@
1
-
2
-
3
1
  module Statsample
4
2
  class CSV < SpreadsheetBase
5
3
  if RUBY_VERSION<"1.9"
@@ -181,8 +181,20 @@ module Statsample
181
181
  # USE:
182
182
  # ds = Statsample::Excel.read("test.xls")
183
183
  #
184
- def read(filename, worksheet_id=0, ignore_lines=0, empty=[''])
184
+ def read(filename, opts=Hash.new)
185
185
  require 'spreadsheet'
186
+ opts_default={
187
+ :worksheet_id=>0,
188
+ :ignore_lines=>0,
189
+ :empty=>['']
190
+ }
191
+
192
+ opts=opts_default.merge opts
193
+
194
+ worksheet_id=opts[:worksheet_id]
195
+ ignore_lines=opts[:ignore_lines]
196
+ empty=opts[:empty]
197
+
186
198
  first_row=true
187
199
  fields=[]
188
200
  fields_data={}
@@ -331,7 +331,7 @@ module Statsample
331
331
  def bootstrap(n=nil)
332
332
  n||=@cases
333
333
  ds_boot=dup_empty
334
- for i in 1..n
334
+ n.times do
335
335
  ds_boot.add_case_array(case_as_array(rand(n)))
336
336
  end
337
337
  ds_boot.update_valid_data
@@ -418,7 +418,6 @@ module Statsample
418
418
  # Returns a vector with sumatory of fields
419
419
  # if fields parameter is empty, sum all fields
420
420
  def vector_sum(fields=nil)
421
- a=[]
422
421
  fields||=@fields
423
422
  collect_with_index do |row, i|
424
423
  if(fields.find{|f| !@vectors[f].data_with_nils[i]})
@@ -557,6 +556,7 @@ module Statsample
557
556
  raise DatasetException.new(self, e)
558
557
  end
559
558
  end
559
+
560
560
  # Returns each case as an array, coding missing values as nils
561
561
  def each_array_with_nils
562
562
  m=fields.size
@@ -586,8 +586,9 @@ module Statsample
586
586
  @fields=f
587
587
  check_order
588
588
  end
589
-
590
- def check_order
589
+ # Check congruence between +fields+ attribute
590
+ # and keys on +vectors
591
+ def check_order #:nodoc:
591
592
  if(@vectors.keys.sort!=@fields.sort)
592
593
  @fields=@fields&@vectors.keys
593
594
  @fields+=@vectors.keys.sort-@fields
@@ -598,7 +599,7 @@ module Statsample
598
599
  if i.is_a? Range
599
600
  fields=from_to(i.begin,i.end)
600
601
  vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
601
- ds=Dataset.new(vectors,fields)
602
+ Dataset.new(vectors,fields)
602
603
  else
603
604
  raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
604
605
  @vectors[i]
@@ -613,7 +614,7 @@ module Statsample
613
614
  }
614
615
  Statsample::Vector.new(data,type)
615
616
  end
616
- # Same as #collect, but giving case index as second parameter on yield.
617
+ # Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
617
618
  def collect_with_index(type=:scale)
618
619
  data=[]
619
620
  each_with_index {|row, i|
@@ -661,14 +662,7 @@ module Statsample
661
662
  end
662
663
  end
663
664
 
664
- def to_multiset_by_split(*fields)
665
- require 'statsample/multiset'
666
- if fields.size==1
667
- to_multiset_by_split_one_field(fields[0])
668
- else
669
- to_multiset_by_split_multiple_fields(*fields)
670
- end
671
- end
665
+
672
666
 
673
667
  # Create a new dataset with all cases which the block returns true
674
668
  def filter
@@ -689,6 +683,20 @@ module Statsample
689
683
  a.to_vector(@vectors[field].type)
690
684
  end
691
685
 
686
+ # Creates a Stastample::Multiset, using one or more fields
687
+ # to split the dataset.
688
+
689
+
690
+ def to_multiset_by_split(*fields)
691
+ require 'statsample/multiset'
692
+ if fields.size==1
693
+ to_multiset_by_split_one_field(fields[0])
694
+ else
695
+ to_multiset_by_split_multiple_fields(*fields)
696
+ end
697
+ end
698
+ # Creates a Statsample::Multiset, using one field
699
+
692
700
  def to_multiset_by_split_one_field(field)
693
701
  raise ArgumentError,"Should use a correct field name" if !@fields.include? field
694
702
  factors=@vectors[field].factors
@@ -831,7 +839,7 @@ module Statsample
831
839
  # ]
832
840
  #
833
841
  def one_to_many(parent_fields, pattern)
834
- base_pattern=pattern.gsub(/%v|%n/,"")
842
+ #base_pattern=pattern.gsub(/%v|%n/,"")
835
843
  re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
836
844
  ds_vars=parent_fields
837
845
  vars=[]
@@ -156,8 +156,9 @@ module Statsample
156
156
  @models=[]
157
157
  @models_data={}
158
158
  for i in 1..@predictors.size
159
- c=Statsample::Combination.new(i,@predictors.size)
159
+ c=(0...@predictors.size).to_a.combination(i)
160
160
  c.each do |data|
161
+
161
162
  independent=data.collect {|i1| @predictors[i1] }
162
163
  @models.push(independent)
163
164
  if (@build_from_dataset)
@@ -268,7 +269,7 @@ module Statsample
268
269
  # Get all model of size k
269
270
  def md_k(k)
270
271
  out=[]
271
- models=@models.each{|m| out.push(md(m)) if m.size==k }
272
+ @models.each{|m| out.push(md(m)) if m.size==k }
272
273
  out
273
274
  end
274
275
 
@@ -158,8 +158,9 @@ module Statsample
158
158
  @samples_cd={}
159
159
  @samples_gd={}
160
160
  @pairs=[]
161
- c=Statsample::Combination.new(2,@fields.size)
161
+ c=(0...@fields.size).to_a.combination(2)
162
162
  c.each do |data|
163
+ p data
163
164
  convert=data.collect {|i| @fields[i] }
164
165
  @pairs.push(convert)
165
166
  [@samples_td, @samples_cd, @samples_gd].each{|s|
@@ -27,7 +27,7 @@ module Statsample
27
27
  ds.fields=vars.times.map {|i| "v#{i+1}"}
28
28
  ds.cases=cases
29
29
  opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
30
- pa=new(ds, opts)
30
+ new(ds, opts)
31
31
  end
32
32
  include DirtyMemoize
33
33
  include Summarizable
@@ -132,7 +132,7 @@ module Factor
132
132
  @communalities=pca.communalities(m)
133
133
  @eigenvalues=pca.eigenvalues
134
134
  com_sum = @communalities.inject(0) {|ac,v| ac+v}
135
- jump=true
135
+ #jump=true
136
136
 
137
137
  break if (com_sum-prev_sum).abs < @delta
138
138
  @communalities.each_with_index do |v2,i2|
@@ -1,4 +1,6 @@
1
1
  require 'statsample/graph/scatterplot'
2
+ require 'statsample/graph/boxplot'
3
+ require 'statsample/graph/histogram'
2
4
  module Statsample
3
5
  module Graph
4
6
  end
@@ -0,0 +1,234 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+ # = Boxplot
5
+ #
6
+ # From Wikipedia:
7
+ # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
8
+ #
9
+ # == Usage
10
+ # === Svg output
11
+ # a=[1,2,3,4].to_scale
12
+ # b=[3,4,5,6].to_scale
13
+ # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
14
+ # === Using ReportBuilder
15
+ # a=[1,2,3,4].to_scale
16
+ # b=[3,4,5,6].to_scale
17
+ # rb=ReportBuilder.new
18
+ # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
19
+ # rb.save_html('boxplot.html')
20
+
21
+ class Boxplot
22
+ include Summarizable
23
+ attr_accessor :name
24
+ # Total width of Boxplot
25
+ attr_accessor :width
26
+ # Total height of Boxplot
27
+ attr_accessor :height
28
+ # Top margin
29
+ attr_accessor :margin_top
30
+ # Bottom margin
31
+ attr_accessor :margin_bottom
32
+ # Left margin
33
+ attr_accessor :margin_left
34
+ # Right margin
35
+ attr_accessor :margin_right
36
+ # Array with assignation to groups of bars
37
+ # For example, for four vectors,
38
+ # boxplot.groups=[1,2,1,3]
39
+ # Assign same color to first and third element, and different to
40
+ # second and fourth
41
+ attr_accessor :groups
42
+ # Minimum value on y-axis. Automaticly defined from data
43
+ attr_accessor :minimum
44
+ # Maximum value on y-axis. Automaticly defined from data
45
+ attr_accessor :maximum
46
+ # Vectors to box-ploting
47
+ attr_accessor :vectors
48
+
49
+ attr_reader :x_scale, :y_scale
50
+ # Create a new Boxplot.
51
+ # Parameters: Hash of options
52
+ # * :vectors: Array of vectors
53
+ # * :groups: Array of same size as :vectors:, with name of groups
54
+ # to colorize vectors
55
+ def initialize(opts=Hash.new)
56
+ @vectors=opts.delete :vectors
57
+ raise "You should define vectors" if @vectors.nil?
58
+
59
+ opts_default={
60
+ :name=>_("Boxplot"),
61
+ :groups=>nil,
62
+ :width=>400,
63
+ :height=>300,
64
+ :margin_top=>10,
65
+ :margin_bottom=>20,
66
+ :margin_left=>20,
67
+ :margin_right=>20,
68
+ :minimum=>nil,
69
+ :maximum=>nil
70
+ }
71
+ @opts=opts_default.merge(opts)
72
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
73
+ end
74
+
75
+ # Returns a Rubyvis panel with scatterplot
76
+ def rubyvis_panel # :nodoc:
77
+ that=self
78
+
79
+ min,max=@minimum, @maximum
80
+
81
+ min||=@vectors.map {|v| v.min}.min
82
+ max||=@vectors.map {|v| v.max}.max
83
+
84
+
85
+
86
+ margin_hor=margin_left + margin_right
87
+ margin_vert=margin_top + margin_bottom
88
+ x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
89
+ y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
90
+ y_scale.nice
91
+ # cache data
92
+
93
+ colors=Rubyvis::Colors.category10
94
+
95
+ data=@vectors.map {|v|
96
+ out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
97
+ out[:iqr]=out[:percentil_75]-out[:percentil_25]
98
+
99
+ irq_max=out[:percentil_75]+out[:iqr]
100
+ irq_min=out[:percentil_25]-out[:iqr]
101
+
102
+ # Find the last data inside the margin
103
+ min=out[:percentil_25]
104
+ max=out[:percentil_75]
105
+
106
+ v.each {|d|
107
+ min=d if d<min and d>irq_min
108
+ max=d if d>max and d<irq_max
109
+ }
110
+ # Whiskers!
111
+ out[:low_whisker]=min
112
+ out[:high_whisker]=max
113
+ # And now, data outside whiskers
114
+ out[:outliers]=v.data_with_nils.find_all {|d|
115
+ d<min or d>max
116
+ }
117
+ out
118
+ }
119
+
120
+
121
+
122
+ vis=Rubyvis::Panel.new do |pan|
123
+ pan.width width - margin_hor
124
+ pan.height height - margin_vert
125
+ pan.bottom margin_bottom
126
+ pan.left margin_left
127
+ pan.right margin_right
128
+ pan.top margin_top
129
+ # Y axis
130
+ pan.rule do
131
+ data y_scale.ticks
132
+ bottom y_scale
133
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
134
+ label(:anchor=>'left') do
135
+ visible {|d| true}
136
+ text y_scale.tick_format
137
+ end
138
+ end
139
+ pan.rule do
140
+ bottom 0
141
+ stroke_style 'black'
142
+ end
143
+ pan.label do |l|
144
+ l.data data
145
+ l.left {|v| x_scale.scale(index)}
146
+ l.bottom -15
147
+ l.text {|v,x| v[:name]}
148
+ end
149
+
150
+ pan.panel do |bp|
151
+ bp.data data
152
+ bp.left {|v| x_scale.scale(index)}
153
+ bp.width x_scale.range_band
154
+
155
+
156
+ # Bar
157
+ bp.bar do |b|
158
+ b.bottom {|v| y_scale.scale(v[:percentil_25])}
159
+ b.height {|v| y_scale.scale(v[:percentil_75]) - y_scale.scale(v[:percentil_25]) }
160
+ b.line_width 1
161
+ b.stroke_style {|v|
162
+ if that.groups
163
+ colors.scale(that.groups[parent.index]).darker
164
+ else
165
+ colors.scale(index).darker
166
+ end
167
+
168
+
169
+ }
170
+ b.fill_style {|v|
171
+ if that.groups
172
+ colors.scale(that.groups[parent.index])
173
+ else
174
+ colors.scale(index)
175
+ end
176
+ }
177
+ end
178
+ # Median
179
+ bp.rule do |r|
180
+ r.bottom {|v| y_scale.scale(v[:median])}
181
+ r.width x_scale.range_band
182
+ r.line_width 2
183
+ end
184
+
185
+ # Whiskeys
186
+ bp.rule do |r|
187
+ r.visible {|v| v[:percentil_25]>v[:low_whisker]}
188
+ r.bottom {|v| y_scale.scale(v[:low_whisker])}
189
+ end
190
+ bp.rule do |r|
191
+ r.visible {|v| v[:percentil_25]>v[:low_whisker]}
192
+ r.bottom {|v| y_scale.scale(v[:low_whisker])}
193
+ r.left {|v| x_scale.range_band / 2.0}
194
+ r.height {|v| y_scale.scale(v[:percentil_25])-y_scale.scale(v[:low_whisker])}
195
+ end
196
+ bp.rule do |r|
197
+ r.visible {|v| v[:percentil_75]<v[:high_whisker]}
198
+ r.bottom {|v| y_scale.scale(v[:high_whisker])}
199
+ end
200
+
201
+ bp.rule do |r|
202
+ r.visible {|v| v[:percentil_75]<v[:high_whisker]}
203
+ r.bottom {|v| y_scale.scale(v[:percentil_75])}
204
+ r.left {|v| x_scale.range_band / 2.0}
205
+ r.height {|v| y_scale.scale(v[:high_whisker])-y_scale.scale(v[:percentil_75])}
206
+ end
207
+
208
+ bp.dot do |dot|
209
+ dot.shape_size 4
210
+ dot.data {|v| v[:outliers]}
211
+ dot.left {|v| x_scale.range_band / 2.0}
212
+ dot.bottom {|v| y_scale.scale(v)}
213
+ dot.title {|v| v}
214
+ end
215
+
216
+
217
+ end
218
+ end
219
+ end
220
+ # Returns SVG with scatterplot
221
+ def to_svg
222
+ rp=rubyvis_panel
223
+ rp.render
224
+ rp.to_svg
225
+ end
226
+ def report_building(builder) # :nodoc:
227
+ builder.section(:name=>name) do |b|
228
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
229
+ end
230
+
231
+ end
232
+ end
233
+ end
234
+ end