statsample 0.15.1 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +8 -0
- data/Manifest.txt +2 -6
- data/Rakefile +2 -2
- data/examples/dominance_analysis.rb +1 -2
- data/examples/multivariate_correlation.rb +1 -0
- data/examples/parallel_analysis.rb +7 -6
- data/examples/scatterplot.rb +12 -0
- data/lib/distribution/normal.rb +29 -1
- data/lib/statsample.rb +5 -2
- data/lib/statsample/bivariate.rb +3 -2
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +42 -14
- data/lib/statsample/factor/parallelanalysis.rb +5 -5
- data/lib/statsample/graph.rb +1 -4
- data/lib/statsample/graph/scatterplot.rb +169 -0
- data/lib/statsample/matrix.rb +5 -5
- data/lib/statsample/mle/logit.rb +5 -4
- data/lib/statsample/multiset.rb +4 -1
- data/lib/statsample/vector.rb +9 -2
- data/references.txt +1 -0
- data/test/test_anovaoneway.rb +1 -1
- data/test/test_factor.rb +29 -24
- data/test/test_reliability_icc.rb +0 -2
- data/test/test_vector.rb +4 -0
- metadata +78 -42
- metadata.gz.sig +0 -0
- data/lib/statsample/graph/gdchart.rb +0 -45
- data/lib/statsample/graph/svgboxplot.rb +0 -108
- data/lib/statsample/graph/svggraph.rb +0 -184
- data/lib/statsample/graph/svghistogram.rb +0 -206
- data/lib/statsample/graph/svgscatterplot.rb +0 -118
- data/test/test_svg_graph.rb +0 -54
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
=== 0.16.0 / 2010-11-13
|
2
|
+
* Works on ruby 1.9.2 and HEAD. Updated Rakefile and manifest
|
3
|
+
* Removed all graph based on Svg::Graph.
|
4
|
+
* First operative version of Graph with Rubyvis
|
5
|
+
* Corrected bug on Distribution::Normal.cdf.
|
6
|
+
* Added reference on references.txt
|
7
|
+
* Ruby-based random gaussian distribution generator when gsl not available
|
8
|
+
* Added population average deviation [Al Chou]
|
1
9
|
=== 0.15.1 / 2010-10-20
|
2
10
|
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
|
3
11
|
* Statsample::Dataset.delete_vector accept multiple fields.
|
data/Manifest.txt
CHANGED
@@ -25,6 +25,7 @@ examples/parallel_analysis_tetrachoric.rb
|
|
25
25
|
examples/polychoric.rb
|
26
26
|
examples/principal_axis.rb
|
27
27
|
examples/reliability.rb
|
28
|
+
examples/scatterplot.rb
|
28
29
|
examples/t_test.rb
|
29
30
|
examples/tetrachoric.rb
|
30
31
|
examples/u_test.rb
|
@@ -61,11 +62,7 @@ lib/statsample/factor/pca.rb
|
|
61
62
|
lib/statsample/factor/principalaxis.rb
|
62
63
|
lib/statsample/factor/rotation.rb
|
63
64
|
lib/statsample/graph.rb
|
64
|
-
lib/statsample/graph/
|
65
|
-
lib/statsample/graph/svgboxplot.rb
|
66
|
-
lib/statsample/graph/svggraph.rb
|
67
|
-
lib/statsample/graph/svghistogram.rb
|
68
|
-
lib/statsample/graph/svgscatterplot.rb
|
65
|
+
lib/statsample/graph/scatterplot.rb
|
69
66
|
lib/statsample/histogram.rb
|
70
67
|
lib/statsample/matrix.rb
|
71
68
|
lib/statsample/mle.rb
|
@@ -139,7 +136,6 @@ test/test_srs.rb
|
|
139
136
|
test/test_statistics.rb
|
140
137
|
test/test_stest.rb
|
141
138
|
test/test_stratified.rb
|
142
|
-
test/test_svg_graph.rb
|
143
139
|
test/test_test_f.rb
|
144
140
|
test/test_test_t.rb
|
145
141
|
test/test_umannwhitney.rb
|
data/Rakefile
CHANGED
@@ -41,9 +41,9 @@ h=Hoe.spec('statsample') do
|
|
41
41
|
#self.testlib=:minitest
|
42
42
|
self.rubyforge_name = "ruby-statsample"
|
43
43
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
44
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] <<
|
44
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.2.2"]
|
45
45
|
|
46
|
-
self.extra_dev_deps << ["shoulda"]
|
46
|
+
self.extra_dev_deps << ["shoulda"] << ["minitest", "~>2.0"]
|
47
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
48
48
|
self.post_install_message = <<-EOF
|
49
49
|
***************************************************
|
@@ -9,7 +9,7 @@ c=sample.times.collect {rand}.to_scale
|
|
9
9
|
d=sample.times.collect {rand}.to_scale
|
10
10
|
|
11
11
|
ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
12
|
-
ds['y']=ds.collect{|row| row['a']*5+row['b']*3+row['c']*2+row['d']+rand()}
|
12
|
+
ds['y']=ds.collect{|row| row['a']*5 + row['b']*3 + row['c']*2 + row['d'] + rand()}
|
13
13
|
rb=ReportBuilder.new(:name=>"Dominance Analysis")
|
14
14
|
|
15
15
|
cm=Statsample::Bivariate.correlation_matrix(ds)
|
@@ -23,5 +23,4 @@ rb.add(lr)
|
|
23
23
|
da=Statsample::DominanceAnalysis.new(ds,'y',:name=>"Dominance Analysis using group of predictors", :predictors=>['a', 'b', %w{c d}])
|
24
24
|
rb.add(da)
|
25
25
|
|
26
|
-
|
27
26
|
puts rb.to_text
|
@@ -16,6 +16,7 @@ require 'mathn'
|
|
16
16
|
[0.02,0.02,0.03,0,0.06,0.22,1,-0.01,-0.05],
|
17
17
|
[-0.12,-0.1,-0.06,-0.02,0.18,-0.07,-0.01,1,-0.03],
|
18
18
|
[0.08,0.15,0.12,-0.02,0.02,0.36,-0.05,-0.03,1]]
|
19
|
+
|
19
20
|
complete.extend Statsample::CovariateMatrix
|
20
21
|
complete.fields=%w{adhd cd odd sex age monly mwork mage poverty}
|
21
22
|
|
@@ -14,17 +14,18 @@ vectors={}
|
|
14
14
|
|
15
15
|
variables.times do |i|
|
16
16
|
vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.ugaussian()}.to_scale
|
17
|
+
vectors["v#{i}"].name="Vector #{i}"
|
17
18
|
end
|
18
19
|
ds=vectors.to_dataset
|
19
20
|
|
20
21
|
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)
|
21
22
|
pca=Statsample::Factor::PCA.new(Statsample::Bivariate.correlation_matrix(ds))
|
22
|
-
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
rb=ReportBuilder.new(:name=>"Parallel Analysis with simulation") do
|
24
|
+
text "There are 3 real factors on data"
|
25
|
+
parse_element pca
|
26
|
+
text "Traditional Kaiser criterion (k>1) returns #{pca.m} factors"
|
27
|
+
parse_element pa
|
28
|
+
text "Parallel Analysis returns #{pa.number_of_factors} factors to preserve"
|
28
29
|
end
|
29
30
|
|
30
31
|
puts rb.to_text
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
+
require 'benchmark'
|
4
|
+
require 'statsample'
|
5
|
+
n=100
|
6
|
+
a=n.times.map {|i| rand(10)+i}.to_scale
|
7
|
+
b=n.times.map {|i| rand(10)+i}.to_scale
|
8
|
+
sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
|
9
|
+
rb=ReportBuilder.new do |b|
|
10
|
+
b.parse_element(sp)
|
11
|
+
end
|
12
|
+
puts rb.to_text
|
data/lib/distribution/normal.rb
CHANGED
@@ -3,6 +3,34 @@ module Distribution
|
|
3
3
|
# Uses Statistics2 module
|
4
4
|
module Normal
|
5
5
|
class << self
|
6
|
+
# Return a proc which return a random number within a gaussian distribution -> N(0,1)
|
7
|
+
# == Reference:
|
8
|
+
# * http://www.taygeta.com/random/gaussian.html
|
9
|
+
def rng_ugaussian
|
10
|
+
if Distribution.has_gsl?
|
11
|
+
rng=GSL::Rng.alloc()
|
12
|
+
lambda { rng.ugaussian()}
|
13
|
+
else
|
14
|
+
returned,y1,y2=0,0,0
|
15
|
+
lambda {
|
16
|
+
if returned==0
|
17
|
+
begin
|
18
|
+
x1 = 2.0 * rand - 1.0
|
19
|
+
x2 = 2.0 * rand - 1.0
|
20
|
+
w = x1 * x1 + x2 * x2
|
21
|
+
end while ( w >= 1.0 )
|
22
|
+
w = Math::sqrt( (-2.0 * Math::log( w ) ) / w )
|
23
|
+
y1 = x1 * w
|
24
|
+
y2 = x2 * w
|
25
|
+
returned=1
|
26
|
+
y1
|
27
|
+
else
|
28
|
+
returned=0
|
29
|
+
y2
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
33
|
+
end
|
6
34
|
# Return the P-value of the corresponding integral
|
7
35
|
def p_value(pr)
|
8
36
|
Statistics2.pnormaldist(pr)
|
@@ -16,7 +44,7 @@ module Distribution
|
|
16
44
|
Statistics2.normaldist(x)
|
17
45
|
end
|
18
46
|
|
19
|
-
if
|
47
|
+
if Distribution.has_gsl?
|
20
48
|
alias :cdf_ruby :cdf
|
21
49
|
def cdf(x) # :nodoc:
|
22
50
|
GSL::Cdf::gaussian_P(x)
|
data/lib/statsample.rb
CHANGED
@@ -20,10 +20,13 @@
|
|
20
20
|
|
21
21
|
#$:.unshift(File.dirname(__FILE__))
|
22
22
|
require 'matrix'
|
23
|
+
require 'extendmatrix'
|
23
24
|
require 'distribution'
|
24
25
|
require 'dirty-memoize'
|
25
26
|
require 'reportbuilder'
|
26
27
|
|
28
|
+
|
29
|
+
|
27
30
|
class Numeric
|
28
31
|
def square ; self * self ; end
|
29
32
|
end
|
@@ -115,7 +118,7 @@ module Statsample
|
|
115
118
|
@@has_gsl
|
116
119
|
end
|
117
120
|
|
118
|
-
VERSION = '0.
|
121
|
+
VERSION = '0.16.0'
|
119
122
|
SPLIT_TOKEN = ","
|
120
123
|
autoload(:Database, 'statsample/converters')
|
121
124
|
autoload(:Anova, 'statsample/anova')
|
@@ -253,4 +256,4 @@ end
|
|
253
256
|
require 'statsample/vector'
|
254
257
|
require 'statsample/dataset'
|
255
258
|
require 'statsample/crosstab'
|
256
|
-
require 'statsample/matrix'
|
259
|
+
require 'statsample/matrix'
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -7,8 +7,6 @@ module Statsample
|
|
7
7
|
# * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
|
8
8
|
# * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
|
9
9
|
module Bivariate
|
10
|
-
autoload(:Polychoric, "statsample/bivariate/polychoric")
|
11
|
-
autoload(:Tetrachoric, "statsample/bivariate/tetrachoric")
|
12
10
|
|
13
11
|
class << self
|
14
12
|
# Covariance between two vectors
|
@@ -337,3 +335,6 @@ module Statsample
|
|
337
335
|
end
|
338
336
|
end
|
339
337
|
|
338
|
+
require 'statsample/bivariate/polychoric'
|
339
|
+
require 'statsample/bivariate/tetrachoric'
|
340
|
+
|
data/lib/statsample/dataset.rb
CHANGED
@@ -89,8 +89,6 @@ module Statsample
|
|
89
89
|
# b 1 0
|
90
90
|
#
|
91
91
|
# Useful to process outputs from databases
|
92
|
-
#
|
93
|
-
|
94
92
|
def self.crosstab_by_asignation(rows,columns,values)
|
95
93
|
raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
|
96
94
|
cols_values=columns.factors
|
@@ -125,8 +123,7 @@ module Statsample
|
|
125
123
|
# a variable of the Dataset named as its key
|
126
124
|
# [fields] Array of names for vectors. Is only used for set the
|
127
125
|
# order of variables. If empty, vectors keys on alfabethic order as
|
128
|
-
# used as fields
|
129
|
-
#
|
126
|
+
# used as fields.
|
130
127
|
def initialize(vectors={}, fields=[])
|
131
128
|
@@n_dataset||=0
|
132
129
|
@@n_dataset+=1
|
@@ -154,10 +151,12 @@ module Statsample
|
|
154
151
|
end
|
155
152
|
matrix
|
156
153
|
end
|
157
|
-
|
154
|
+
#
|
158
155
|
# Creates a copy of the given dataset, deleting all the cases with
|
159
|
-
# missing data on one of the vectors
|
156
|
+
# missing data on one of the vectors.
|
157
|
+
#
|
160
158
|
# @param array of fields to include. No value include all fields
|
159
|
+
#
|
161
160
|
def dup_only_valid(*fields_to_include)
|
162
161
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
163
162
|
fields_to_include=fields_to_include[0]
|
@@ -177,10 +176,14 @@ module Statsample
|
|
177
176
|
end
|
178
177
|
ds
|
179
178
|
end
|
180
|
-
|
181
|
-
# Returns a duplicate of the
|
182
|
-
#
|
179
|
+
#
|
180
|
+
# Returns a duplicate of the Dataset.
|
181
|
+
# All vectors are copied, so any modification on new
|
182
|
+
# dataset doesn't affect original dataset's vectors.
|
183
|
+
# If fields given as parameter, only include those vectors.
|
184
|
+
#
|
183
185
|
# @param array of fields to include. No value include all fields
|
186
|
+
# @return {Statsample::Dataset}
|
184
187
|
def dup(*fields_to_include)
|
185
188
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
186
189
|
fields_to_include=fields_to_include[0]
|
@@ -206,9 +209,10 @@ module Statsample
|
|
206
209
|
|
207
210
|
# Returns (when possible) a cheap copy of dataset.
|
208
211
|
# If no vector have missing values, returns original vectors.
|
209
|
-
# If missing values presents, uses Dataset.dup_only_valid
|
212
|
+
# If missing values presents, uses Dataset.dup_only_valid.
|
213
|
+
#
|
210
214
|
# @param array of fields to include. No value include all fields
|
211
|
-
|
215
|
+
# @return {Statsample::Dataset}
|
212
216
|
def clone_only_valid(*fields_to_include)
|
213
217
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
214
218
|
fields_to_include=fields_to_include[0]
|
@@ -223,6 +227,7 @@ module Statsample
|
|
223
227
|
# Returns a shallow copy of Dataset.
|
224
228
|
# Object id will be distinct, but @vectors will be the same.
|
225
229
|
# @param array of fields to include. No value include all fields
|
230
|
+
# @return {Statsample::Dataset}
|
226
231
|
def clone(*fields_to_include)
|
227
232
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
228
233
|
fields_to_include=fields_to_include[0]
|
@@ -238,6 +243,8 @@ module Statsample
|
|
238
243
|
ds
|
239
244
|
end
|
240
245
|
# Creates a copy of the given dataset, without data on vectors
|
246
|
+
#
|
247
|
+
# @return {Statsample::Dataset}
|
241
248
|
def dup_empty
|
242
249
|
vectors=@vectors.inject({}) {|a,v|
|
243
250
|
a[v[0]]=v[1].dup_empty
|
@@ -248,6 +255,8 @@ module Statsample
|
|
248
255
|
# Merge vectors from two datasets
|
249
256
|
# In case of name collition, the vectors names are changed to
|
250
257
|
# x_1, x_2 ....
|
258
|
+
#
|
259
|
+
# @return {Statsample::Dataset}
|
251
260
|
def merge(other_ds)
|
252
261
|
raise "Cases should be equal (this:#{@cases}; other:#{other_ds.cases}" unless @cases==other_ds.cases
|
253
262
|
types = @fields.collect{|f| @vectors[f].type} + other_ds.fields.collect{|f| other_ds[f].type}
|
@@ -264,7 +273,9 @@ module Statsample
|
|
264
273
|
ds_new.update_valid_data
|
265
274
|
ds_new
|
266
275
|
end
|
267
|
-
# Returns a dataset with standarized data
|
276
|
+
# Returns a dataset with standarized data.
|
277
|
+
#
|
278
|
+
# @return {Statsample::Dataset}
|
268
279
|
def standarize
|
269
280
|
ds=dup()
|
270
281
|
ds.fields.each do |f|
|
@@ -273,6 +284,9 @@ module Statsample
|
|
273
284
|
ds
|
274
285
|
end
|
275
286
|
# Generate a matrix, based on fields of dataset
|
287
|
+
#
|
288
|
+
# @return {::Matrix}
|
289
|
+
|
276
290
|
def collect_matrix
|
277
291
|
rows=@fields.collect{|row|
|
278
292
|
@fields.collect{|col|
|
@@ -281,27 +295,39 @@ module Statsample
|
|
281
295
|
}
|
282
296
|
Matrix.rows(rows)
|
283
297
|
end
|
298
|
+
|
284
299
|
# We have the same datasets if +vectors+ and +fields+ are the same
|
300
|
+
#
|
301
|
+
# @return {Boolean}
|
285
302
|
def ==(d2)
|
286
303
|
@vectors==d2.vectors and @fields==d2.fields
|
287
304
|
end
|
288
305
|
# Returns vector <tt>c</tt>
|
306
|
+
#
|
307
|
+
# @return {Statsample::Vector}
|
289
308
|
def col(c)
|
290
309
|
@vectors[c]
|
291
310
|
end
|
292
311
|
alias_method :vector, :col
|
293
312
|
# Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
|
313
|
+
#
|
314
|
+
# @return self
|
294
315
|
def add_vector(name, vector)
|
295
316
|
raise ArgumentError, "Vector have different size" if vector.size!=@cases
|
296
317
|
@vectors[name]=vector
|
297
318
|
check_order
|
319
|
+
self
|
298
320
|
end
|
299
|
-
# Returns true if dataset have vector <tt>v</tt
|
321
|
+
# Returns true if dataset have vector <tt>v</tt>.
|
322
|
+
#
|
323
|
+
# @return {Boolean}
|
300
324
|
def has_vector? (v)
|
301
325
|
return @vectors.has_key?(v)
|
302
326
|
end
|
303
327
|
# Creates a dataset with the random data, of a n size
|
304
|
-
# If n not given, uses original number of cases
|
328
|
+
# If n not given, uses original number of cases.
|
329
|
+
#
|
330
|
+
# @return {Statsample::Dataset}
|
305
331
|
def bootstrap(n=nil)
|
306
332
|
n||=@cases
|
307
333
|
ds_boot=dup_empty
|
@@ -314,6 +340,8 @@ module Statsample
|
|
314
340
|
# Fast version of #add_case.
|
315
341
|
# Can only add one case and no error check if performed
|
316
342
|
# You SHOULD use #update_valid_data at the end of insertion cycle
|
343
|
+
#
|
344
|
+
#
|
317
345
|
def add_case_array(v)
|
318
346
|
v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
|
319
347
|
end
|
@@ -10,12 +10,12 @@ module Statsample
|
|
10
10
|
# == Usage
|
11
11
|
# *With real dataset*
|
12
12
|
# # ds should be any valid dataset
|
13
|
-
# pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>100, :bootstrap_method=>:
|
13
|
+
# pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>100, :bootstrap_method=>:data)
|
14
14
|
#
|
15
15
|
# *With number of cases and variables*
|
16
16
|
# pa=Statsample::Factor::ParallelAnalysis.with_random_data(100,8)
|
17
17
|
#
|
18
|
-
# == Reference
|
18
|
+
# == Reference
|
19
19
|
# * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
|
20
20
|
# * O'Connor, B. (2000). SPSS and SAS programs for determining the number of components using parallel analysis and Velicer's MAP test. Behavior Research Methods, Instruments, & Computers, 32(3), 396-402.
|
21
21
|
# * Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
|
@@ -127,7 +127,7 @@ module Statsample
|
|
127
127
|
@ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v})
|
128
128
|
@ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:scale}
|
129
129
|
if bootstrap_method==:parameter or bootstrap_method==:random
|
130
|
-
rng =
|
130
|
+
rng = Distribution::Normal.rng_ugaussian
|
131
131
|
end
|
132
132
|
|
133
133
|
@iterations.times do |i|
|
@@ -137,7 +137,7 @@ module Statsample
|
|
137
137
|
ds_bootstrap=Statsample::Dataset.new(@ds.fields)
|
138
138
|
@fields.each do |f|
|
139
139
|
if bootstrap_method==:random
|
140
|
-
ds_bootstrap[f]=@n_cases.times.map {|c| rng.
|
140
|
+
ds_bootstrap[f]=@n_cases.times.map {|c| rng.call}.to_scale
|
141
141
|
elsif bootstrap_method==:data
|
142
142
|
ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases).to_scale
|
143
143
|
else
|
@@ -153,7 +153,7 @@ module Statsample
|
|
153
153
|
end
|
154
154
|
ev=matrix.eigenvalues
|
155
155
|
@ds_eigenvalues.add_case_array(ev)
|
156
|
-
rescue Tetrachoric::RequerimentNotMeet => e
|
156
|
+
rescue Statsample::Bivariate::Tetrachoric::RequerimentNotMeet => e
|
157
157
|
puts "Error: #{e}" if $DEBUG
|
158
158
|
redo
|
159
159
|
end
|
data/lib/statsample/graph.rb
CHANGED
@@ -0,0 +1,169 @@
|
|
1
|
+
require 'tmpdir'
|
2
|
+
require 'rubyvis'
|
3
|
+
module Statsample
|
4
|
+
module Graph
|
5
|
+
# = Scatterplot
|
6
|
+
#
|
7
|
+
# From Wikipedia:
|
8
|
+
# A scatter plot or scattergraph is a type of mathematical diagram using
|
9
|
+
# Cartesian coordinates to display values for two variables for a set of data.
|
10
|
+
#
|
11
|
+
# The data is displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis.[2] This kind of plot is also called a scatter chart, scatter diagram and scatter graph.
|
12
|
+
# == Usage
|
13
|
+
# === Svg output
|
14
|
+
# a=[1,2,3,4].to_scale
|
15
|
+
# b=[3,4,5,6].to_scale
|
16
|
+
# puts Statsample::Graph::Scatterplot.new(a,b).to_svg
|
17
|
+
# === Using ReportBuilder
|
18
|
+
# a=[1,2,3,4].to_scale
|
19
|
+
# b=[3,4,5,6].to_scale
|
20
|
+
# rb=ReportBuilder.new
|
21
|
+
# rb.add(Statsample::Graph::Scatterplot.new(a,b))
|
22
|
+
# rb.save_html('scatter.html')
|
23
|
+
|
24
|
+
class Scatterplot
|
25
|
+
include Summarizable
|
26
|
+
attr_accessor :name
|
27
|
+
# Total width of Scatterplot
|
28
|
+
attr_accessor :width
|
29
|
+
# Total height of Scatterplot
|
30
|
+
attr_accessor :height
|
31
|
+
attr_accessor :dot_alpha
|
32
|
+
# Add a line on median of x and y axis
|
33
|
+
attr_accessor :line_median
|
34
|
+
# Top margin
|
35
|
+
attr_accessor :margin_top
|
36
|
+
# Bottom margin
|
37
|
+
attr_accessor :margin_bottom
|
38
|
+
# Left margin
|
39
|
+
attr_accessor :margin_left
|
40
|
+
# Right margin
|
41
|
+
attr_accessor :margin_right
|
42
|
+
|
43
|
+
attr_reader :data
|
44
|
+
attr_reader :v1,:v2
|
45
|
+
attr_reader :x_scale, :y_scale
|
46
|
+
# Create a new Scatterplot.
|
47
|
+
# Params:
|
48
|
+
# * v1: Vector on X axis
|
49
|
+
# * v2: Vector on Y axis
|
50
|
+
# * opts: Hash of options. See attributes of Scatterplot
|
51
|
+
def initialize(v1,v2,opts=Hash.new)
|
52
|
+
@v1_name,@v2_name = v1.name,v2.name
|
53
|
+
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
|
54
|
+
opts_default={
|
55
|
+
:name=>_("Scatterplot (%s - %s)") % [@v1_name, @v2_name],
|
56
|
+
:width=>400,
|
57
|
+
:height=>300,
|
58
|
+
:dot_alpha=>0.5,
|
59
|
+
:line_median=>false,
|
60
|
+
:margin_top=>10,
|
61
|
+
:margin_bottom=>20,
|
62
|
+
:margin_left=>20,
|
63
|
+
:margin_right=>20
|
64
|
+
|
65
|
+
}
|
66
|
+
@opts=opts_default.merge(opts)
|
67
|
+
opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
|
68
|
+
@data=[]
|
69
|
+
@v1.each_with_index {|d1,i|
|
70
|
+
@data.push({:x=>d1,:y=>@v2[i]})
|
71
|
+
}
|
72
|
+
end
|
73
|
+
# Add a rule on median of X and Y axis
|
74
|
+
def add_line_median(vis) # :nodoc:
|
75
|
+
that=self
|
76
|
+
x=@x_scale
|
77
|
+
y=@y_scale
|
78
|
+
vis.execute {
|
79
|
+
rule do
|
80
|
+
data [that.v1.median]
|
81
|
+
left x
|
82
|
+
stroke_style Rubyvis.color("#933").alpha(0.5)
|
83
|
+
label(:anchor=>"top") do
|
84
|
+
text x.tick_format
|
85
|
+
end
|
86
|
+
end
|
87
|
+
rule do
|
88
|
+
data [that.v2.median]
|
89
|
+
bottom y
|
90
|
+
stroke_style Rubyvis.color("#933").alpha(0.5)
|
91
|
+
label(:anchor=>"right") do
|
92
|
+
text y.tick_format
|
93
|
+
end
|
94
|
+
end
|
95
|
+
}
|
96
|
+
|
97
|
+
end
|
98
|
+
# Returns a Rubyvis panel with scatterplot
|
99
|
+
def rubyvis_panel # :nodoc:
|
100
|
+
that=self
|
101
|
+
#p @v1.map {|v| v}
|
102
|
+
x=Rubyvis::Scale.linear(@v1.to_a).range(0,width)
|
103
|
+
y=Rubyvis::Scale.linear(@v2.to_a).range(0,height)
|
104
|
+
@x_scale=x
|
105
|
+
@y_scale=y
|
106
|
+
vis=Rubyvis::Panel.new do |pan|
|
107
|
+
pan.width width - (margin_left + margin_right)
|
108
|
+
pan.height height - (margin_top + margin_bottom)
|
109
|
+
pan.bottom margin_bottom
|
110
|
+
pan.left margin_left
|
111
|
+
pan.right margin_right
|
112
|
+
pan.top margin_top
|
113
|
+
# X axis
|
114
|
+
pan.rule do
|
115
|
+
data y.ticks
|
116
|
+
bottom y
|
117
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
118
|
+
label(:anchor=>'left') do
|
119
|
+
visible {|d| d>0 and d<that.width}
|
120
|
+
text y.tick_format
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Y axis
|
125
|
+
pan.rule do
|
126
|
+
data x.ticks
|
127
|
+
left x
|
128
|
+
stroke_style {|d| d!=0 ? "#eee" : "#000"}
|
129
|
+
label(:anchor=>'bottom') do
|
130
|
+
visible {|d| d>0 and d < that.height}
|
131
|
+
text x.tick_format
|
132
|
+
end
|
133
|
+
end
|
134
|
+
# Add lines on median
|
135
|
+
add_line_median(pan) if line_median
|
136
|
+
|
137
|
+
pan.panel do
|
138
|
+
data(that.data)
|
139
|
+
dot do
|
140
|
+
left {|d| x.scale(d[:x])}
|
141
|
+
bottom {|d| y.scale(d[:y])}
|
142
|
+
stroke_style Rubyvis.color("red").alpha(that.dot_alpha)
|
143
|
+
shape_radius 2
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
# Returns SVG with scatterplot
|
149
|
+
def to_svg
|
150
|
+
rp=rubyvis_panel
|
151
|
+
rp.render
|
152
|
+
rp.to_svg
|
153
|
+
end
|
154
|
+
def report_building(builder) # :nodoc:
|
155
|
+
img_svg=to_svg
|
156
|
+
builder.section(:name=>name) do |b|
|
157
|
+
Dir.mktmpdir {|dir|
|
158
|
+
time=Time.new.to_f
|
159
|
+
File.open("#{dir}/image_#{time}.svg","w") {|fp|
|
160
|
+
fp.write img_svg
|
161
|
+
}
|
162
|
+
b.image("#{dir}/image_#{time}.svg", :width=>width, :height=>height)
|
163
|
+
}
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|