statsample 0.18.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
@@ -5,9 +5,13 @@ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
5
5
  require 'benchmark'
6
6
  require 'statsample'
7
7
  n=100
8
- a=n.times.map {|i| rand(10)+i}.to_scale
9
- b=n.times.map {|i| rand(10)+i}.to_scale
10
- sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
11
- rb=ReportBuilder.new
12
- rb.add(sp)
13
- puts rb.to_text
8
+
9
+ Statsample::Analysis.store(Statsample::Graph::Scatterplot) do
10
+ x=rnorm(n)
11
+ y=x+rnorm(n,0.5,0.2)
12
+ scatterplot(x,y)
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run
17
+ end
data/examples/t_test.rb CHANGED
@@ -1,11 +1,20 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib')
3
3
  require 'statsample'
4
- a=10.times.map {rand(100)}.to_scale
5
- t_1=Statsample::Test.t_one_sample(a,{:u=>50})
6
- puts t_1.summary
7
4
 
8
- b=20.times.map {(rand(20))**2+50}.to_scale
5
+ Statsample::Analysis.store(Statsample::Test::T) do
6
+
7
+
8
+ a=rnorm(10)
9
+ t_1=Statsample::Test.t_one_sample(a,{:u=>50})
10
+ summary t_1
11
+
12
+ b=rnorm(10,2)
13
+
14
+ t_2=Statsample::Test.t_two_samples_independent(a,b)
15
+ summary t_2
16
+ end
9
17
 
10
- t_2=Statsample::Test.t_two_samples_independent(a,b)
11
- puts t_2.summary
18
+ if __FILE__==$0
19
+ Statsample::Analysis.run_batch
20
+ end
@@ -2,9 +2,16 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
+
6
+ Statsample::Analysis.store(Statsample::Bivariate::Tetrachoric) do
7
+
5
8
  a=40
6
9
  b=10
7
10
  c=20
8
11
  d=30
9
- tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
10
- puts tetra.summary
12
+ summary tetrachoric(a,b,c,d)
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run_batch
17
+ end
data/examples/u_test.rb CHANGED
@@ -1,8 +1,16 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib')
3
3
  require 'statsample'
4
- a=10.times.map {rand(100)}.to_scale
5
- b=20.times.map {(rand(20))**2+50}.to_scale
6
4
 
7
- u=Statsample::Test::UMannWhitney.new(a,b)
8
- puts u.summary
5
+ Statsample::Analysis.store(Statsample::Test::UMannWhitney) do
6
+
7
+ a=10.times.map {rand(100)}.to_scale
8
+ b=20.times.map {(rand(20))**2+50}.to_scale
9
+
10
+ u=Statsample::Test::UMannWhitney.new(a,b)
11
+ summary u
12
+ end
13
+
14
+ if __FILE__==$0
15
+ Statsample::Analysis.run_batch
16
+ end
data/examples/vector.rb CHANGED
@@ -2,5 +2,16 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
- a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
- puts a.summary
5
+
6
+ Statsample::Analysis.store(Statsample::Vector) do
7
+
8
+ a=Statsample::Vector.new_scale(1000) {r=rand(5); r==4 ? nil: r;}
9
+ summary a
10
+ b=c(1,2,3,4,6..10)
11
+ summary b
12
+
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run_batch
17
+ end
@@ -2,34 +2,41 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
- samples=100
6
- variables=10
7
- rng = GSL::Rng.alloc()
8
- f1=samples.times.collect {rng.ugaussian()}.to_scale
9
- f2=samples.times.collect {rng.ugaussian()}.to_scale
10
5
 
11
- vectors={}
12
-
13
- variables.times do |i|
6
+ Statsample::Analysis.store(Statsample::Factor::MAP) do
7
+
8
+ rng=Distribution::Normal.rng
9
+ samples=100
10
+ variables=10
11
+
12
+ f1=rnorm(samples)
13
+ f2=rnorm(samples)
14
+
15
+ vectors={}
16
+
17
+ variables.times do |i|
14
18
  vectors["v#{i}"]=samples.times.collect {|nv|
15
- if i<5
16
- f1[nv]*5 + f2[nv] *2 +rng.ugaussian()
17
- else
18
- f1[nv]*2 + f2[nv] *3 +rng.ugaussian()
19
- end
19
+ if i<5
20
+ f1[nv]*5 + f2[nv] *2 +rng.call
21
+ else
22
+ f1[nv]*2 + f2[nv] *3 +rng.call
23
+ end
20
24
  }.to_scale
25
+ end
26
+
27
+
28
+ ds=vectors.to_dataset
29
+ cor=cor(ds)
30
+ pca=pca(cor)
31
+
32
+ map=Statsample::Factor::MAP.new(cor)
33
+
34
+ echo ("There are 2 real factors on data")
35
+ summary(pca)
36
+ echo("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
37
+ summary(map)
38
+ echo("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
21
39
  end
22
- ds=vectors.to_dataset
23
- cor=Statsample::Bivariate.correlation_matrix(ds)
24
- map=Statsample::Factor::MAP.new(cor)
25
- pca=Statsample::Factor::PCA.new(cor)
26
-
27
- rb=ReportBuilder.new(:name=>"Velicer's MAP test") do |g|
28
- g.text("There are 2 real factors on data")
29
- g.parse_element(pca)
30
- g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
31
- g.parse_element(map)
32
- g.text("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
40
+ if __FILE__==$0
41
+ Statsample::Analysis.run_batch
33
42
  end
34
-
35
- puts rb.to_text
data/lib/statsample.rb CHANGED
@@ -41,6 +41,17 @@ class String
41
41
  end
42
42
  end
43
43
 
44
+ class Module
45
+ def include_aliasing(m, suffix="ruby")
46
+ m.instance_methods.each do |f|
47
+ if instance_methods.include? f
48
+ alias_method("#{f}_#{suffix}",f)
49
+ remove_method f
50
+ end
51
+ end
52
+ include m
53
+ end
54
+ end
44
55
 
45
56
  class Array
46
57
  # Recode repeated values on an array, adding the number of repetition
@@ -105,21 +116,27 @@ end
105
116
  # * Interfaces to gdchart, gnuplot and SVG::Graph
106
117
  #
107
118
  module Statsample
108
- @@has_gsl=nil
109
- def self.has_gsl?
110
- if @@has_gsl.nil?
111
- begin
112
- require 'rbgsl'
113
- @@has_gsl=true
114
- rescue LoadError
115
- @@has_gsl=false
119
+
120
+ def self.create_has_library(library)
121
+ define_singleton_method("has_#{library}?") do
122
+ cv="@@#{library}"
123
+ if !class_variable_defined? cv
124
+ begin
125
+ require library.to_s
126
+ class_variable_set(cv,true)
127
+ rescue LoadError
128
+ class_variable_set(cv,false)
129
+ end
116
130
  end
131
+ class_variable_get(cv)
117
132
  end
118
- @@has_gsl
119
133
  end
120
134
 
121
- VERSION = '0.18.0'
135
+ create_has_library :gsl
136
+
137
+ VERSION = '1.0.0'
122
138
  SPLIT_TOKEN = ","
139
+ autoload(:Analysis, 'statsample/analysis')
123
140
  autoload(:Database, 'statsample/converters')
124
141
  autoload(:Anova, 'statsample/anova')
125
142
  autoload(:CSV, 'statsample/converters')
@@ -214,11 +231,12 @@ module Statsample
214
231
  ds=Statsample::Dataset.new(h).dup_only_valid
215
232
  ds.vectors.values
216
233
  end
234
+
217
235
  # Cheap version of #only_valid.
218
236
  # If any vectors have missing_values, return only valid.
219
- # If not, return the vectors it self
237
+ # If not, return the vectors itself
220
238
  def only_valid_clone(*vs)
221
- if vs.any? {|v| v.has_missing_data?}
239
+ if vs.any? {|v| v.flawed?}
222
240
  only_valid(*vs)
223
241
  else
224
242
  vs
@@ -294,3 +312,5 @@ require 'statsample/vector'
294
312
  require 'statsample/dataset'
295
313
  require 'statsample/crosstab'
296
314
  require 'statsample/matrix'
315
+ require 'statsample/shorthand'
316
+
@@ -0,0 +1,79 @@
1
+ require 'statsample/analysis/suite'
2
+ require 'statsample/analysis/suitereportbuilder'
3
+
4
+ module Statsample
5
+ # DSL to create analysis without hazzle.
6
+ # * Shortcuts methods to avoid use complete namescapes, many based on R
7
+ # * Attach/detach vectors to workspace, like R
8
+ # == Example
9
+ # an1=Statsample::Analysis.store(:first) do
10
+ # # Load excel file with x,y,z vectors
11
+ # ds=excel('data.xls')
12
+ # # See variables on ds dataset
13
+ # names(ds)
14
+ # # Attach the vectors to workspace, like R
15
+ # attach(ds)
16
+ # # vector 'x' is attached to workspace like a method,
17
+ # # so you can use like any variable
18
+ # mean,sd=x.mean, x.sd
19
+ # # Shameless R robbery
20
+ # a=c( 1:10)
21
+ # b=c(21:30)
22
+ # summary(cor(ds)) # Call summary method on correlation matrix
23
+ # end
24
+ # # You can run the analysis by its name
25
+ # Statsample::Analysis.run(:first)
26
+ # # or using the returned variables
27
+ # an1.run
28
+ # # You can also generate a report using ReportBuilder.
29
+ # # puts and pp are overloaded, so its output will be
30
+ # # redirected to report.
31
+ # # Summary method call 'report_building' on the object,
32
+ # # instead of calling summary
33
+ # an1.generate("report.html")
34
+ module Analysis
35
+ @@stored_analysis={}
36
+ @@last_analysis=nil
37
+ def self.stored_analysis
38
+ @@stored_analysis
39
+ end
40
+ def self.last
41
+ @@stored_analysis[@@last_analysis]
42
+ end
43
+ def self.store(name,opts=Hash.new,&block)
44
+ raise "You should provide a block" if !block
45
+ @@last_analysis=name
46
+ @@stored_analysis[name]=Suite.new(name,opts,&block)
47
+ end
48
+ # Run analysis +name+
49
+ # Withoud arguments, run the latest analysis
50
+ # Only 'echo' will be returned to screen
51
+ def self.run(name=nil)
52
+ name||=@@last_analysis
53
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
54
+ stored_analysis[name].run
55
+ end
56
+ # Run analysis and return to screen all
57
+ # echo and summary callings
58
+ def self.run_batch(name=nil)
59
+ name||=@@last_analysis
60
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
61
+ puts stored_analysis[name].to_text
62
+ end
63
+ def self.save(filename, name=nil)
64
+ name||=@@last_analysis
65
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
66
+ puts stored_analysis[name].generate(filename)
67
+ end
68
+
69
+
70
+ # Run analysis and return as string
71
+ # output of echo callings
72
+ def self.to_text(name=nil)
73
+ name||=@@last_analysis
74
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
75
+ stored_analysis[name].to_text
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,72 @@
1
+ module Statsample
2
+ module Analysis
3
+ class Suite
4
+ include Statsample::Shorthand
5
+ attr_accessor :output
6
+ attr_accessor :name
7
+ attr_reader :block
8
+ def initialize(name,opts=Hash.new(),&block)
9
+ @name=name
10
+ @block=block
11
+ @attached=[]
12
+ @output=opts[:output] || ::STDOUT
13
+
14
+ end
15
+ # Run the analysis, putting output on
16
+ def run
17
+ @block.arity<1 ? instance_eval(&@block) : @block.call(self)
18
+ end
19
+ def echo(*args)
20
+ @output.puts(*args)
21
+ end
22
+ def summary(obj)
23
+ obj.summary
24
+ end
25
+ def generate(filename)
26
+ ar=SuiteReportBuilder.new(name,&block)
27
+ ar.generate(filename)
28
+ end
29
+ def to_text
30
+ ar=SuiteReportBuilder.new(name, &block)
31
+ ar.to_text
32
+ end
33
+
34
+ def attach(ds)
35
+ @attached.push(ds)
36
+ end
37
+ def detach(ds=nil)
38
+ if ds.nil?
39
+ @attached.pop
40
+ else
41
+ @attached.delete(ds)
42
+ end
43
+ end
44
+ alias :old_boxplot :boxplot
45
+ alias :old_histogram :histogram
46
+ alias :old_scatterplot :scatterplot
47
+
48
+ def show_svg(svg)
49
+ require 'tmpdir'
50
+ fn=Dir.tmpdir+"/image_#{Time.now.to_f}.svg"
51
+ File.open(fn,"w") {|fp| fp.write svg}
52
+ `xdg-open '#{fn}'`
53
+ end
54
+ def boxplot(*args)
55
+ show_svg(old_boxplot(*args).to_svg)
56
+ end
57
+ def histogram(*args)
58
+ show_svg(old_histogram(*args).to_svg)
59
+ end
60
+ def scatterplot(*args)
61
+ show_svg(old_scatterplot(*args).to_svg)
62
+ end
63
+
64
+ def method_missing(name, *args,&block)
65
+ @attached.reverse.each do |ds|
66
+ return ds[name.to_s] if ds.fields.include? (name.to_s)
67
+ end
68
+ raise "Method #{name} doesn't exists"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,38 @@
1
+ module Statsample
2
+ module Analysis
3
+ class SuiteReportBuilder < Suite
4
+ attr_accessor :rb
5
+ def initialize(name,&block)
6
+ super(name,&block)
7
+ @rb=ReportBuilder.new(:name=>name)
8
+ end
9
+ def generate(filename)
10
+ run if @block
11
+ @rb.save(filename)
12
+ end
13
+ def to_text
14
+ run if @block
15
+ @rb.to_text
16
+ end
17
+ def summary(o)
18
+ @rb.add(o)
19
+ end
20
+ def echo(*args)
21
+ args.each do |a|
22
+ @rb.add(a)
23
+ end
24
+ end
25
+
26
+ def boxplot(*args)
27
+ @rb.add(old_boxplot(*args))
28
+ end
29
+ def histogram(*args)
30
+ @rb.add(old_histogram(*args))
31
+ end
32
+ def boxplot(*args)
33
+ @rb.add(old_boxplot(*args))
34
+ end
35
+
36
+ end
37
+ end
38
+ end