statsample 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +7 -0
  3. data/Manifest.txt +6 -4
  4. data/README.txt +5 -1
  5. data/Rakefile +1 -1
  6. data/examples/boxplot.rb +17 -0
  7. data/examples/dominance_analysis_bootstrap.rb +5 -0
  8. data/examples/histogram.rb +14 -0
  9. data/examples/scatterplot.rb +4 -3
  10. data/lib/distribution/normalbivariate.rb +1 -1
  11. data/lib/statsample.rb +16 -3
  12. data/lib/statsample/bivariate.rb +4 -2
  13. data/lib/statsample/converter/csv.rb +0 -2
  14. data/lib/statsample/converters.rb +13 -1
  15. data/lib/statsample/dataset.rb +23 -15
  16. data/lib/statsample/dominanceanalysis.rb +3 -2
  17. data/lib/statsample/dominanceanalysis/bootstrap.rb +2 -1
  18. data/lib/statsample/factor/parallelanalysis.rb +1 -1
  19. data/lib/statsample/factor/principalaxis.rb +1 -1
  20. data/lib/statsample/graph.rb +2 -0
  21. data/lib/statsample/graph/boxplot.rb +234 -0
  22. data/lib/statsample/graph/histogram.rb +133 -0
  23. data/lib/statsample/graph/scatterplot.rb +1 -9
  24. data/lib/statsample/histogram.rb +47 -11
  25. data/lib/statsample/mle.rb +4 -4
  26. data/lib/statsample/mle/normal.rb +3 -3
  27. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  28. data/lib/statsample/regression/multiple/gslengine.rb +0 -1
  29. data/lib/statsample/regression/multiple/matrixengine.rb +1 -1
  30. data/lib/statsample/reliability.rb +1 -0
  31. data/lib/statsample/reliability/scaleanalysis.rb +3 -51
  32. data/lib/statsample/reliability/skillscaleanalysis.rb +93 -0
  33. data/lib/statsample/srs.rb +1 -1
  34. data/lib/statsample/test/umannwhitney.rb +1 -1
  35. data/lib/statsample/vector.rb +13 -36
  36. data/test/test_factor.rb +1 -1
  37. data/test/test_ggobi.rb +0 -5
  38. data/test/test_histogram.rb +75 -18
  39. data/test/test_mle.rb +0 -44
  40. data/test/test_reliability_skillscale.rb +41 -0
  41. data/test/test_statistics.rb +3 -3
  42. data/test/test_stest.rb +2 -2
  43. data/test/test_vector.rb +13 -8
  44. metadata +36 -18
  45. metadata.gz.sig +0 -0
  46. data/lib/statsample/combination.rb +0 -114
  47. data/lib/statsample/permutation.rb +0 -98
  48. data/test/test_combination.rb +0 -37
  49. data/test/test_permutation.rb +0 -42
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ === 0.17.0 / 2010-12-09
2
+ * Added Statsample::Graph::Histogram and Statsample::Graph::Boxplot
3
+ * Added Statsample::Reliability::SkillScaleAnalysis for analysis of skill based scales.
4
+ * Delete combination and permutation clases. Backport for ruby 1.8.7 widely available
5
+ * Deleted unused variables (thanks, ruby-head)
6
+
1
7
  === 0.16.0 / 2010-11-13
2
8
  * Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
3
9
  * Removed all graph based on Svg::Graph.
@@ -6,6 +12,7 @@
6
12
  * Added reference on references.txt
7
13
  * Ruby-based random gaussian distribution generator when gsl not available
8
14
  * Added population average deviation [Al Chou]
15
+
9
16
  === 0.15.1 / 2010-10-20
10
17
  * Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
11
18
  * Statsample::Dataset.delete_vector accept multiple fields.
data/Manifest.txt CHANGED
@@ -12,10 +12,12 @@ data/test_binomial.csv
12
12
  data/tetmat_matrix.txt
13
13
  data/tetmat_test.txt
14
14
  doc_latex/manual/equations.tex
15
+ examples/boxplot.rb
15
16
  examples/correlation_matrix.rb
16
17
  examples/dataset.rb
17
18
  examples/dominance_analysis.rb
18
19
  examples/dominance_analysis_bootstrap.rb
20
+ examples/histogram.rb
19
21
  examples/icc.rb
20
22
  examples/levene.rb
21
23
  examples/multiple_regression.rb
@@ -47,7 +49,6 @@ lib/statsample/anova/twoway.rb
47
49
  lib/statsample/bivariate.rb
48
50
  lib/statsample/bivariate/pearson.rb
49
51
  lib/statsample/codification.rb
50
- lib/statsample/combination.rb
51
52
  lib/statsample/converter/csv.rb
52
53
  lib/statsample/converter/spss.rb
53
54
  lib/statsample/converters.rb
@@ -62,6 +63,8 @@ lib/statsample/factor/pca.rb
62
63
  lib/statsample/factor/principalaxis.rb
63
64
  lib/statsample/factor/rotation.rb
64
65
  lib/statsample/graph.rb
66
+ lib/statsample/graph/boxplot.rb
67
+ lib/statsample/graph/histogram.rb
65
68
  lib/statsample/graph/scatterplot.rb
66
69
  lib/statsample/histogram.rb
67
70
  lib/statsample/matrix.rb
@@ -70,7 +73,6 @@ lib/statsample/mle/logit.rb
70
73
  lib/statsample/mle/normal.rb
71
74
  lib/statsample/mle/probit.rb
72
75
  lib/statsample/multiset.rb
73
- lib/statsample/permutation.rb
74
76
  lib/statsample/regression.rb
75
77
  lib/statsample/regression/binomial.rb
76
78
  lib/statsample/regression/binomial/logit.rb
@@ -86,6 +88,7 @@ lib/statsample/reliability.rb
86
88
  lib/statsample/reliability/icc.rb
87
89
  lib/statsample/reliability/multiscaleanalysis.rb
88
90
  lib/statsample/reliability/scaleanalysis.rb
91
+ lib/statsample/reliability/skillscaleanalysis.rb
89
92
  lib/statsample/resample.rb
90
93
  lib/statsample/rserve_extension.rb
91
94
  lib/statsample/srs.rb
@@ -111,7 +114,6 @@ test/test_anovawithvectors.rb
111
114
  test/test_bartlettsphericity.rb
112
115
  test/test_bivariate.rb
113
116
  test/test_codification.rb
114
- test/test_combination.rb
115
117
  test/test_crosstab.rb
116
118
  test/test_csv.csv
117
119
  test/test_csv.rb
@@ -126,10 +128,10 @@ test/test_logit.rb
126
128
  test/test_matrix.rb
127
129
  test/test_mle.rb
128
130
  test/test_multiset.rb
129
- test/test_permutation.rb
130
131
  test/test_regression.rb
131
132
  test/test_reliability.rb
132
133
  test/test_reliability_icc.rb
134
+ test/test_reliability_skillscale.rb
133
135
  test/test_resample.rb
134
136
  test/test_rserve_extension.rb
135
137
  test/test_srs.rb
data/README.txt CHANGED
@@ -21,6 +21,7 @@ Include:
21
21
  * Sample calculation related formulas
22
22
  * Structural Equation Modeling (SEM), using R libraries +sem+ and +OpenMx+
23
23
  * Creates reports on text, html and rtf, using ReportBuilder gem
24
+ * Graphics: Histogram, Boxplot and Scatterplot
24
25
 
25
26
  == FEATURES:
26
27
 
@@ -69,8 +70,11 @@ Include:
69
70
  * Statsample::Test::UMannWhitney
70
71
  * Statsample::Test::T
71
72
  * Statsample::Test::F
73
+ * Module Graph provides several classes to create beautiful graphs using rubyvis
74
+ * Statsample::Graph::Boxplot
75
+ * Statsample::Graph::Histogram
76
+ * Statsample::Graph::Scatterplot
72
77
  * Gem +statsample-sem+ provides a DSL to R libraries +sem+ and +OpenMx+
73
- * Interfaces to gdchart, gnuplot and SVG::Graph (experimental)
74
78
  * Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
75
79
 
76
80
  == Examples of use:
data/Rakefile CHANGED
@@ -41,7 +41,7 @@ h=Hoe.spec('statsample') do
41
41
  #self.testlib=:minitest
42
42
  self.rubyforge_name = "ruby-statsample"
43
43
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
44
- self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.2.2"]
44
+ self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.3.3"]
45
45
 
46
46
  self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
47
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
5
+ require 'benchmark'
6
+ require 'statsample'
7
+ n=100
8
+ a=(n-1).times.map {|i| rand()*20+50}
9
+ b=n.times.map {|i| rand()*10+50}.to_scale
10
+ c=n.times.map {|i| rand()*5+50}.to_scale
11
+
12
+ a.push(30)
13
+ a=a.to_scale
14
+ sp=Statsample::Graph::Boxplot.new(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
15
+ rb=ReportBuilder.new
16
+ rb.add(sp)
17
+ puts rb.to_text
@@ -8,6 +8,11 @@ b=100.times.collect {rand}.to_scale
8
8
  c=100.times.collect {rand}.to_scale
9
9
  d=100.times.collect {rand}.to_scale
10
10
 
11
+ a.name="a"
12
+ b.name="b"
13
+ c.name="c"
14
+ d.name="d"
15
+
11
16
  ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
12
17
 
13
18
  ds['y1']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()}
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/ruby
2
+ $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
5
+ require 'benchmark'
6
+ require 'statsample'
7
+ n=1000
8
+ a=n.times.map {|i| rand()*20}.to_scale
9
+ hg=Statsample::Graph::Histogram.new(a, :bins=>15)
10
+
11
+ rb=ReportBuilder.new
12
+ rb.add(a.histogram)
13
+ rb.add(hg)
14
+ puts rb.to_text
@@ -1,12 +1,13 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
+ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
+
3
5
  require 'benchmark'
4
6
  require 'statsample'
5
7
  n=100
6
8
  a=n.times.map {|i| rand(10)+i}.to_scale
7
9
  b=n.times.map {|i| rand(10)+i}.to_scale
8
10
  sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
9
- rb=ReportBuilder.new do |b|
10
- b.parse_element(sp)
11
- end
11
+ rb=ReportBuilder.new
12
+ rb.add(sp)
12
13
  puts rb.to_text
@@ -220,7 +220,7 @@ module Distribution
220
220
  asr = Math::asin(r)
221
221
  (1..lg).each do |i|
222
222
  [-1,1].each do |is|
223
- sn = Math::sin( asr *( is * x[i][ng] + 1 ).quo(2) )
223
+ sn = Math::sin(asr*(is* x[i][ng]+1).quo(2) )
224
224
  bvn = bvn + w[i][ng] * Math::exp( ( sn*hk-hs ).quo( 1-sn*sn ) )
225
225
  end # do
226
226
  end # do
data/lib/statsample.rb CHANGED
@@ -118,12 +118,10 @@ module Statsample
118
118
  @@has_gsl
119
119
  end
120
120
 
121
- VERSION = '0.16.0'
121
+ VERSION = '0.17.0'
122
122
  SPLIT_TOKEN = ","
123
123
  autoload(:Database, 'statsample/converters')
124
124
  autoload(:Anova, 'statsample/anova')
125
- autoload(:Combination, 'statsample/combination')
126
- autoload(:Permutation, 'statsample/permutation')
127
125
  autoload(:CSV, 'statsample/converters')
128
126
  autoload(:PlainText, 'statsample/converters')
129
127
  autoload(:Excel, 'statsample/converters')
@@ -219,6 +217,21 @@ module Statsample
219
217
  end
220
218
  u
221
219
  end
220
+
221
+ def self.nice(s,e) # :nodoc:
222
+ reverse = e<s
223
+ min = reverse ? e : s
224
+ max = reverse ? s : e
225
+ span=max-min
226
+ return [s, e] if (!span or (span.respond_to? :infinite? and span.infinite?))
227
+
228
+ step=10**((Math::log(span).quo(Math::log(10))).round - 1).to_f
229
+ out=[(min.quo(step)).floor * step, (max.quo(step)).ceil * step]
230
+ out.reverse! if reverse
231
+ out
232
+ end
233
+
234
+
222
235
  end
223
236
 
224
237
 
@@ -1,5 +1,7 @@
1
1
  require 'statsample/bivariate/pearson'
2
2
 
3
+
4
+
3
5
  module Statsample
4
6
  # Diverse methods and classes to calculate bivariate relations
5
7
  # Specific classes:
@@ -7,6 +9,8 @@ module Statsample
7
9
  # * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
8
10
  # * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
9
11
  module Bivariate
12
+ autoload(:Polychoric, 'statsample/bivariate/polychoric')
13
+ autoload(:Tetrachoric, 'statsample/bivariate/tetrachoric')
10
14
 
11
15
  class << self
12
16
  # Covariance between two vectors
@@ -335,6 +339,4 @@ module Statsample
335
339
  end
336
340
  end
337
341
 
338
- require 'statsample/bivariate/polychoric'
339
- require 'statsample/bivariate/tetrachoric'
340
342
 
@@ -1,5 +1,3 @@
1
-
2
-
3
1
  module Statsample
4
2
  class CSV < SpreadsheetBase
5
3
  if RUBY_VERSION<"1.9"
@@ -181,8 +181,20 @@ module Statsample
181
181
  # USE:
182
182
  # ds = Statsample::Excel.read("test.xls")
183
183
  #
184
- def read(filename, worksheet_id=0, ignore_lines=0, empty=[''])
184
+ def read(filename, opts=Hash.new)
185
185
  require 'spreadsheet'
186
+ opts_default={
187
+ :worksheet_id=>0,
188
+ :ignore_lines=>0,
189
+ :empty=>['']
190
+ }
191
+
192
+ opts=opts_default.merge opts
193
+
194
+ worksheet_id=opts[:worksheet_id]
195
+ ignore_lines=opts[:ignore_lines]
196
+ empty=opts[:empty]
197
+
186
198
  first_row=true
187
199
  fields=[]
188
200
  fields_data={}
@@ -331,7 +331,7 @@ module Statsample
331
331
  def bootstrap(n=nil)
332
332
  n||=@cases
333
333
  ds_boot=dup_empty
334
- for i in 1..n
334
+ n.times do
335
335
  ds_boot.add_case_array(case_as_array(rand(n)))
336
336
  end
337
337
  ds_boot.update_valid_data
@@ -418,7 +418,6 @@ module Statsample
418
418
  # Returns a vector with sumatory of fields
419
419
  # if fields parameter is empty, sum all fields
420
420
  def vector_sum(fields=nil)
421
- a=[]
422
421
  fields||=@fields
423
422
  collect_with_index do |row, i|
424
423
  if(fields.find{|f| !@vectors[f].data_with_nils[i]})
@@ -557,6 +556,7 @@ module Statsample
557
556
  raise DatasetException.new(self, e)
558
557
  end
559
558
  end
559
+
560
560
  # Returns each case as an array, coding missing values as nils
561
561
  def each_array_with_nils
562
562
  m=fields.size
@@ -586,8 +586,9 @@ module Statsample
586
586
  @fields=f
587
587
  check_order
588
588
  end
589
-
590
- def check_order
589
+ # Check congruence between +fields+ attribute
590
+ # and keys on +vectors
591
+ def check_order #:nodoc:
591
592
  if(@vectors.keys.sort!=@fields.sort)
592
593
  @fields=@fields&@vectors.keys
593
594
  @fields+=@vectors.keys.sort-@fields
@@ -598,7 +599,7 @@ module Statsample
598
599
  if i.is_a? Range
599
600
  fields=from_to(i.begin,i.end)
600
601
  vectors=fields.inject({}) {|a,v| a[v]=@vectors[v];a}
601
- ds=Dataset.new(vectors,fields)
602
+ Dataset.new(vectors,fields)
602
603
  else
603
604
  raise Exception,"Vector '#{i}' doesn't exists on dataset" unless @vectors.has_key?(i)
604
605
  @vectors[i]
@@ -613,7 +614,7 @@ module Statsample
613
614
  }
614
615
  Statsample::Vector.new(data,type)
615
616
  end
616
- # Same as #collect, but giving case index as second parameter on yield.
617
+ # Same as Statsample::Vector.collect, but giving case index as second parameter on yield.
617
618
  def collect_with_index(type=:scale)
618
619
  data=[]
619
620
  each_with_index {|row, i|
@@ -661,14 +662,7 @@ module Statsample
661
662
  end
662
663
  end
663
664
 
664
- def to_multiset_by_split(*fields)
665
- require 'statsample/multiset'
666
- if fields.size==1
667
- to_multiset_by_split_one_field(fields[0])
668
- else
669
- to_multiset_by_split_multiple_fields(*fields)
670
- end
671
- end
665
+
672
666
 
673
667
  # Create a new dataset with all cases which the block returns true
674
668
  def filter
@@ -689,6 +683,20 @@ module Statsample
689
683
  a.to_vector(@vectors[field].type)
690
684
  end
691
685
 
686
+ # Creates a Stastample::Multiset, using one or more fields
687
+ # to split the dataset.
688
+
689
+
690
+ def to_multiset_by_split(*fields)
691
+ require 'statsample/multiset'
692
+ if fields.size==1
693
+ to_multiset_by_split_one_field(fields[0])
694
+ else
695
+ to_multiset_by_split_multiple_fields(*fields)
696
+ end
697
+ end
698
+ # Creates a Statsample::Multiset, using one field
699
+
692
700
  def to_multiset_by_split_one_field(field)
693
701
  raise ArgumentError,"Should use a correct field name" if !@fields.include? field
694
702
  factors=@vectors[field].factors
@@ -831,7 +839,7 @@ module Statsample
831
839
  # ]
832
840
  #
833
841
  def one_to_many(parent_fields, pattern)
834
- base_pattern=pattern.gsub(/%v|%n/,"")
842
+ #base_pattern=pattern.gsub(/%v|%n/,"")
835
843
  re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
836
844
  ds_vars=parent_fields
837
845
  vars=[]
@@ -156,8 +156,9 @@ module Statsample
156
156
  @models=[]
157
157
  @models_data={}
158
158
  for i in 1..@predictors.size
159
- c=Statsample::Combination.new(i,@predictors.size)
159
+ c=(0...@predictors.size).to_a.combination(i)
160
160
  c.each do |data|
161
+
161
162
  independent=data.collect {|i1| @predictors[i1] }
162
163
  @models.push(independent)
163
164
  if (@build_from_dataset)
@@ -268,7 +269,7 @@ module Statsample
268
269
  # Get all model of size k
269
270
  def md_k(k)
270
271
  out=[]
271
- models=@models.each{|m| out.push(md(m)) if m.size==k }
272
+ @models.each{|m| out.push(md(m)) if m.size==k }
272
273
  out
273
274
  end
274
275
 
@@ -158,8 +158,9 @@ module Statsample
158
158
  @samples_cd={}
159
159
  @samples_gd={}
160
160
  @pairs=[]
161
- c=Statsample::Combination.new(2,@fields.size)
161
+ c=(0...@fields.size).to_a.combination(2)
162
162
  c.each do |data|
163
+ p data
163
164
  convert=data.collect {|i| @fields[i] }
164
165
  @pairs.push(convert)
165
166
  [@samples_td, @samples_cd, @samples_gd].each{|s|
@@ -27,7 +27,7 @@ module Statsample
27
27
  ds.fields=vars.times.map {|i| "v#{i+1}"}
28
28
  ds.cases=cases
29
29
  opts=opts.merge({:bootstrap_method=> :random, :no_data=>true})
30
- pa=new(ds, opts)
30
+ new(ds, opts)
31
31
  end
32
32
  include DirtyMemoize
33
33
  include Summarizable
@@ -132,7 +132,7 @@ module Factor
132
132
  @communalities=pca.communalities(m)
133
133
  @eigenvalues=pca.eigenvalues
134
134
  com_sum = @communalities.inject(0) {|ac,v| ac+v}
135
- jump=true
135
+ #jump=true
136
136
 
137
137
  break if (com_sum-prev_sum).abs < @delta
138
138
  @communalities.each_with_index do |v2,i2|
@@ -1,4 +1,6 @@
1
1
  require 'statsample/graph/scatterplot'
2
+ require 'statsample/graph/boxplot'
3
+ require 'statsample/graph/histogram'
2
4
  module Statsample
3
5
  module Graph
4
6
  end
@@ -0,0 +1,234 @@
1
+ require 'rubyvis'
2
+ module Statsample
3
+ module Graph
4
+ # = Boxplot
5
+ #
6
+ # From Wikipedia:
7
+ # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
8
+ #
9
+ # == Usage
10
+ # === Svg output
11
+ # a=[1,2,3,4].to_scale
12
+ # b=[3,4,5,6].to_scale
13
+ # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
14
+ # === Using ReportBuilder
15
+ # a=[1,2,3,4].to_scale
16
+ # b=[3,4,5,6].to_scale
17
+ # rb=ReportBuilder.new
18
+ # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
19
+ # rb.save_html('boxplot.html')
20
+
21
+ class Boxplot
22
+ include Summarizable
23
+ attr_accessor :name
24
+ # Total width of Boxplot
25
+ attr_accessor :width
26
+ # Total height of Boxplot
27
+ attr_accessor :height
28
+ # Top margin
29
+ attr_accessor :margin_top
30
+ # Bottom margin
31
+ attr_accessor :margin_bottom
32
+ # Left margin
33
+ attr_accessor :margin_left
34
+ # Right margin
35
+ attr_accessor :margin_right
36
+ # Array with assignation to groups of bars
37
+ # For example, for four vectors,
38
+ # boxplot.groups=[1,2,1,3]
39
+ # Assign same color to first and third element, and different to
40
+ # second and fourth
41
+ attr_accessor :groups
42
+ # Minimum value on y-axis. Automaticly defined from data
43
+ attr_accessor :minimum
44
+ # Maximum value on y-axis. Automaticly defined from data
45
+ attr_accessor :maximum
46
+ # Vectors to box-ploting
47
+ attr_accessor :vectors
48
+
49
+ attr_reader :x_scale, :y_scale
50
+ # Create a new Boxplot.
51
+ # Parameters: Hash of options
52
+ # * :vectors: Array of vectors
53
+ # * :groups: Array of same size as :vectors:, with name of groups
54
+ # to colorize vectors
55
+ def initialize(opts=Hash.new)
56
+ @vectors=opts.delete :vectors
57
+ raise "You should define vectors" if @vectors.nil?
58
+
59
+ opts_default={
60
+ :name=>_("Boxplot"),
61
+ :groups=>nil,
62
+ :width=>400,
63
+ :height=>300,
64
+ :margin_top=>10,
65
+ :margin_bottom=>20,
66
+ :margin_left=>20,
67
+ :margin_right=>20,
68
+ :minimum=>nil,
69
+ :maximum=>nil
70
+ }
71
+ @opts=opts_default.merge(opts)
72
+ opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
73
+ end
74
+
75
+ # Returns a Rubyvis panel with scatterplot
76
+ def rubyvis_panel # :nodoc:
77
+ that=self
78
+
79
+ min,max=@minimum, @maximum
80
+
81
+ min||=@vectors.map {|v| v.min}.min
82
+ max||=@vectors.map {|v| v.max}.max
83
+
84
+
85
+
86
+ margin_hor=margin_left + margin_right
87
+ margin_vert=margin_top + margin_bottom
88
+ x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
89
+ y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
90
+ y_scale.nice
91
+ # cache data
92
+
93
+ colors=Rubyvis::Colors.category10
94
+
95
+ data=@vectors.map {|v|
96
+ out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
97
+ out[:iqr]=out[:percentil_75]-out[:percentil_25]
98
+
99
+ irq_max=out[:percentil_75]+out[:iqr]
100
+ irq_min=out[:percentil_25]-out[:iqr]
101
+
102
+ # Find the last data inside the margin
103
+ min=out[:percentil_25]
104
+ max=out[:percentil_75]
105
+
106
+ v.each {|d|
107
+ min=d if d<min and d>irq_min
108
+ max=d if d>max and d<irq_max
109
+ }
110
+ # Whiskers!
111
+ out[:low_whisker]=min
112
+ out[:high_whisker]=max
113
+ # And now, data outside whiskers
114
+ out[:outliers]=v.data_with_nils.find_all {|d|
115
+ d<min or d>max
116
+ }
117
+ out
118
+ }
119
+
120
+
121
+
122
+ vis=Rubyvis::Panel.new do |pan|
123
+ pan.width width - margin_hor
124
+ pan.height height - margin_vert
125
+ pan.bottom margin_bottom
126
+ pan.left margin_left
127
+ pan.right margin_right
128
+ pan.top margin_top
129
+ # Y axis
130
+ pan.rule do
131
+ data y_scale.ticks
132
+ bottom y_scale
133
+ stroke_style {|d| d!=0 ? "#eee" : "#000"}
134
+ label(:anchor=>'left') do
135
+ visible {|d| true}
136
+ text y_scale.tick_format
137
+ end
138
+ end
139
+ pan.rule do
140
+ bottom 0
141
+ stroke_style 'black'
142
+ end
143
+ pan.label do |l|
144
+ l.data data
145
+ l.left {|v| x_scale.scale(index)}
146
+ l.bottom -15
147
+ l.text {|v,x| v[:name]}
148
+ end
149
+
150
+ pan.panel do |bp|
151
+ bp.data data
152
+ bp.left {|v| x_scale.scale(index)}
153
+ bp.width x_scale.range_band
154
+
155
+
156
+ # Bar
157
+ bp.bar do |b|
158
+ b.bottom {|v| y_scale.scale(v[:percentil_25])}
159
+ b.height {|v| y_scale.scale(v[:percentil_75]) - y_scale.scale(v[:percentil_25]) }
160
+ b.line_width 1
161
+ b.stroke_style {|v|
162
+ if that.groups
163
+ colors.scale(that.groups[parent.index]).darker
164
+ else
165
+ colors.scale(index).darker
166
+ end
167
+
168
+
169
+ }
170
+ b.fill_style {|v|
171
+ if that.groups
172
+ colors.scale(that.groups[parent.index])
173
+ else
174
+ colors.scale(index)
175
+ end
176
+ }
177
+ end
178
+ # Median
179
+ bp.rule do |r|
180
+ r.bottom {|v| y_scale.scale(v[:median])}
181
+ r.width x_scale.range_band
182
+ r.line_width 2
183
+ end
184
+
185
+ # Whiskeys
186
+ bp.rule do |r|
187
+ r.visible {|v| v[:percentil_25]>v[:low_whisker]}
188
+ r.bottom {|v| y_scale.scale(v[:low_whisker])}
189
+ end
190
+ bp.rule do |r|
191
+ r.visible {|v| v[:percentil_25]>v[:low_whisker]}
192
+ r.bottom {|v| y_scale.scale(v[:low_whisker])}
193
+ r.left {|v| x_scale.range_band / 2.0}
194
+ r.height {|v| y_scale.scale(v[:percentil_25])-y_scale.scale(v[:low_whisker])}
195
+ end
196
+ bp.rule do |r|
197
+ r.visible {|v| v[:percentil_75]<v[:high_whisker]}
198
+ r.bottom {|v| y_scale.scale(v[:high_whisker])}
199
+ end
200
+
201
+ bp.rule do |r|
202
+ r.visible {|v| v[:percentil_75]<v[:high_whisker]}
203
+ r.bottom {|v| y_scale.scale(v[:percentil_75])}
204
+ r.left {|v| x_scale.range_band / 2.0}
205
+ r.height {|v| y_scale.scale(v[:high_whisker])-y_scale.scale(v[:percentil_75])}
206
+ end
207
+
208
+ bp.dot do |dot|
209
+ dot.shape_size 4
210
+ dot.data {|v| v[:outliers]}
211
+ dot.left {|v| x_scale.range_band / 2.0}
212
+ dot.bottom {|v| y_scale.scale(v)}
213
+ dot.title {|v| v}
214
+ end
215
+
216
+
217
+ end
218
+ end
219
+ end
220
+ # Returns SVG with scatterplot
221
+ def to_svg
222
+ rp=rubyvis_panel
223
+ rp.render
224
+ rp.to_svg
225
+ end
226
+ def report_building(builder) # :nodoc:
227
+ builder.section(:name=>name) do |b|
228
+ b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
229
+ end
230
+
231
+ end
232
+ end
233
+ end
234
+ end