statsample 0.18.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
@@ -5,9 +5,13 @@ $:.unshift('/home/cdx/dev/reportbuilder/lib/')
5
5
  require 'benchmark'
6
6
  require 'statsample'
7
7
  n=100
8
- a=n.times.map {|i| rand(10)+i}.to_scale
9
- b=n.times.map {|i| rand(10)+i}.to_scale
10
- sp=Statsample::Graph::Scatterplot.new(a,b, :width=>200, :height=>200)
11
- rb=ReportBuilder.new
12
- rb.add(sp)
13
- puts rb.to_text
8
+
9
+ Statsample::Analysis.store(Statsample::Graph::Scatterplot) do
10
+ x=rnorm(n)
11
+ y=x+rnorm(n,0.5,0.2)
12
+ scatterplot(x,y)
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run
17
+ end
data/examples/t_test.rb CHANGED
@@ -1,11 +1,20 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib')
3
3
  require 'statsample'
4
- a=10.times.map {rand(100)}.to_scale
5
- t_1=Statsample::Test.t_one_sample(a,{:u=>50})
6
- puts t_1.summary
7
4
 
8
- b=20.times.map {(rand(20))**2+50}.to_scale
5
+ Statsample::Analysis.store(Statsample::Test::T) do
6
+
7
+
8
+ a=rnorm(10)
9
+ t_1=Statsample::Test.t_one_sample(a,{:u=>50})
10
+ summary t_1
11
+
12
+ b=rnorm(10,2)
13
+
14
+ t_2=Statsample::Test.t_two_samples_independent(a,b)
15
+ summary t_2
16
+ end
9
17
 
10
- t_2=Statsample::Test.t_two_samples_independent(a,b)
11
- puts t_2.summary
18
+ if __FILE__==$0
19
+ Statsample::Analysis.run_batch
20
+ end
@@ -2,9 +2,16 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
+
6
+ Statsample::Analysis.store(Statsample::Bivariate::Tetrachoric) do
7
+
5
8
  a=40
6
9
  b=10
7
10
  c=20
8
11
  d=30
9
- tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
10
- puts tetra.summary
12
+ summary tetrachoric(a,b,c,d)
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run_batch
17
+ end
data/examples/u_test.rb CHANGED
@@ -1,8 +1,16 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib')
3
3
  require 'statsample'
4
- a=10.times.map {rand(100)}.to_scale
5
- b=20.times.map {(rand(20))**2+50}.to_scale
6
4
 
7
- u=Statsample::Test::UMannWhitney.new(a,b)
8
- puts u.summary
5
+ Statsample::Analysis.store(Statsample::Test::UMannWhitney) do
6
+
7
+ a=10.times.map {rand(100)}.to_scale
8
+ b=20.times.map {(rand(20))**2+50}.to_scale
9
+
10
+ u=Statsample::Test::UMannWhitney.new(a,b)
11
+ summary u
12
+ end
13
+
14
+ if __FILE__==$0
15
+ Statsample::Analysis.run_batch
16
+ end
data/examples/vector.rb CHANGED
@@ -2,5 +2,16 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
- a=1000.times.collect {r=rand(5); r==4 ? nil: r;}.to_scale
6
- puts a.summary
5
+
6
+ Statsample::Analysis.store(Statsample::Vector) do
7
+
8
+ a=Statsample::Vector.new_scale(1000) {r=rand(5); r==4 ? nil: r;}
9
+ summary a
10
+ b=c(1,2,3,4,6..10)
11
+ summary b
12
+
13
+ end
14
+
15
+ if __FILE__==$0
16
+ Statsample::Analysis.run_batch
17
+ end
@@ -2,34 +2,41 @@
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
3
 
4
4
  require 'statsample'
5
- samples=100
6
- variables=10
7
- rng = GSL::Rng.alloc()
8
- f1=samples.times.collect {rng.ugaussian()}.to_scale
9
- f2=samples.times.collect {rng.ugaussian()}.to_scale
10
5
 
11
- vectors={}
12
-
13
- variables.times do |i|
6
+ Statsample::Analysis.store(Statsample::Factor::MAP) do
7
+
8
+ rng=Distribution::Normal.rng
9
+ samples=100
10
+ variables=10
11
+
12
+ f1=rnorm(samples)
13
+ f2=rnorm(samples)
14
+
15
+ vectors={}
16
+
17
+ variables.times do |i|
14
18
  vectors["v#{i}"]=samples.times.collect {|nv|
15
- if i<5
16
- f1[nv]*5 + f2[nv] *2 +rng.ugaussian()
17
- else
18
- f1[nv]*2 + f2[nv] *3 +rng.ugaussian()
19
- end
19
+ if i<5
20
+ f1[nv]*5 + f2[nv] *2 +rng.call
21
+ else
22
+ f1[nv]*2 + f2[nv] *3 +rng.call
23
+ end
20
24
  }.to_scale
25
+ end
26
+
27
+
28
+ ds=vectors.to_dataset
29
+ cor=cor(ds)
30
+ pca=pca(cor)
31
+
32
+ map=Statsample::Factor::MAP.new(cor)
33
+
34
+ echo ("There are 2 real factors on data")
35
+ summary(pca)
36
+ echo("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
37
+ summary(map)
38
+ echo("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
21
39
  end
22
- ds=vectors.to_dataset
23
- cor=Statsample::Bivariate.correlation_matrix(ds)
24
- map=Statsample::Factor::MAP.new(cor)
25
- pca=Statsample::Factor::PCA.new(cor)
26
-
27
- rb=ReportBuilder.new(:name=>"Velicer's MAP test") do |g|
28
- g.text("There are 2 real factors on data")
29
- g.parse_element(pca)
30
- g.text("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
31
- g.parse_element(map)
32
- g.text("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
40
+ if __FILE__==$0
41
+ Statsample::Analysis.run_batch
33
42
  end
34
-
35
- puts rb.to_text
data/lib/statsample.rb CHANGED
@@ -41,6 +41,17 @@ class String
41
41
  end
42
42
  end
43
43
 
44
+ class Module
45
+ def include_aliasing(m, suffix="ruby")
46
+ m.instance_methods.each do |f|
47
+ if instance_methods.include? f
48
+ alias_method("#{f}_#{suffix}",f)
49
+ remove_method f
50
+ end
51
+ end
52
+ include m
53
+ end
54
+ end
44
55
 
45
56
  class Array
46
57
  # Recode repeated values on an array, adding the number of repetition
@@ -105,21 +116,27 @@ end
105
116
  # * Interfaces to gdchart, gnuplot and SVG::Graph
106
117
  #
107
118
  module Statsample
108
- @@has_gsl=nil
109
- def self.has_gsl?
110
- if @@has_gsl.nil?
111
- begin
112
- require 'rbgsl'
113
- @@has_gsl=true
114
- rescue LoadError
115
- @@has_gsl=false
119
+
120
+ def self.create_has_library(library)
121
+ define_singleton_method("has_#{library}?") do
122
+ cv="@@#{library}"
123
+ if !class_variable_defined? cv
124
+ begin
125
+ require library.to_s
126
+ class_variable_set(cv,true)
127
+ rescue LoadError
128
+ class_variable_set(cv,false)
129
+ end
116
130
  end
131
+ class_variable_get(cv)
117
132
  end
118
- @@has_gsl
119
133
  end
120
134
 
121
- VERSION = '0.18.0'
135
+ create_has_library :gsl
136
+
137
+ VERSION = '1.0.0'
122
138
  SPLIT_TOKEN = ","
139
+ autoload(:Analysis, 'statsample/analysis')
123
140
  autoload(:Database, 'statsample/converters')
124
141
  autoload(:Anova, 'statsample/anova')
125
142
  autoload(:CSV, 'statsample/converters')
@@ -214,11 +231,12 @@ module Statsample
214
231
  ds=Statsample::Dataset.new(h).dup_only_valid
215
232
  ds.vectors.values
216
233
  end
234
+
217
235
  # Cheap version of #only_valid.
218
236
  # If any vectors have missing_values, return only valid.
219
- # If not, return the vectors it self
237
+ # If not, return the vectors itself
220
238
  def only_valid_clone(*vs)
221
- if vs.any? {|v| v.has_missing_data?}
239
+ if vs.any? {|v| v.flawed?}
222
240
  only_valid(*vs)
223
241
  else
224
242
  vs
@@ -294,3 +312,5 @@ require 'statsample/vector'
294
312
  require 'statsample/dataset'
295
313
  require 'statsample/crosstab'
296
314
  require 'statsample/matrix'
315
+ require 'statsample/shorthand'
316
+
@@ -0,0 +1,79 @@
1
+ require 'statsample/analysis/suite'
2
+ require 'statsample/analysis/suitereportbuilder'
3
+
4
+ module Statsample
5
+ # DSL to create analysis without hazzle.
6
+ # * Shortcuts methods to avoid use complete namescapes, many based on R
7
+ # * Attach/detach vectors to workspace, like R
8
+ # == Example
9
+ # an1=Statsample::Analysis.store(:first) do
10
+ # # Load excel file with x,y,z vectors
11
+ # ds=excel('data.xls')
12
+ # # See variables on ds dataset
13
+ # names(ds)
14
+ # # Attach the vectors to workspace, like R
15
+ # attach(ds)
16
+ # # vector 'x' is attached to workspace like a method,
17
+ # # so you can use like any variable
18
+ # mean,sd=x.mean, x.sd
19
+ # # Shameless R robbery
20
+ # a=c( 1:10)
21
+ # b=c(21:30)
22
+ # summary(cor(ds)) # Call summary method on correlation matrix
23
+ # end
24
+ # # You can run the analysis by its name
25
+ # Statsample::Analysis.run(:first)
26
+ # # or using the returned variables
27
+ # an1.run
28
+ # # You can also generate a report using ReportBuilder.
29
+ # # puts and pp are overloaded, so its output will be
30
+ # # redirected to report.
31
+ # # Summary method call 'report_building' on the object,
32
+ # # instead of calling summary
33
+ # an1.generate("report.html")
34
+ module Analysis
35
+ @@stored_analysis={}
36
+ @@last_analysis=nil
37
+ def self.stored_analysis
38
+ @@stored_analysis
39
+ end
40
+ def self.last
41
+ @@stored_analysis[@@last_analysis]
42
+ end
43
+ def self.store(name,opts=Hash.new,&block)
44
+ raise "You should provide a block" if !block
45
+ @@last_analysis=name
46
+ @@stored_analysis[name]=Suite.new(name,opts,&block)
47
+ end
48
+ # Run analysis +name+
49
+ # Withoud arguments, run the latest analysis
50
+ # Only 'echo' will be returned to screen
51
+ def self.run(name=nil)
52
+ name||=@@last_analysis
53
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
54
+ stored_analysis[name].run
55
+ end
56
+ # Run analysis and return to screen all
57
+ # echo and summary callings
58
+ def self.run_batch(name=nil)
59
+ name||=@@last_analysis
60
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
61
+ puts stored_analysis[name].to_text
62
+ end
63
+ def self.save(filename, name=nil)
64
+ name||=@@last_analysis
65
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
66
+ puts stored_analysis[name].generate(filename)
67
+ end
68
+
69
+
70
+ # Run analysis and return as string
71
+ # output of echo callings
72
+ def self.to_text(name=nil)
73
+ name||=@@last_analysis
74
+ raise "Analysis #{name} doesn't exists" unless stored_analysis[name]
75
+ stored_analysis[name].to_text
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,72 @@
1
+ module Statsample
2
+ module Analysis
3
+ class Suite
4
+ include Statsample::Shorthand
5
+ attr_accessor :output
6
+ attr_accessor :name
7
+ attr_reader :block
8
+ def initialize(name,opts=Hash.new(),&block)
9
+ @name=name
10
+ @block=block
11
+ @attached=[]
12
+ @output=opts[:output] || ::STDOUT
13
+
14
+ end
15
+ # Run the analysis, putting output on
16
+ def run
17
+ @block.arity<1 ? instance_eval(&@block) : @block.call(self)
18
+ end
19
+ def echo(*args)
20
+ @output.puts(*args)
21
+ end
22
+ def summary(obj)
23
+ obj.summary
24
+ end
25
+ def generate(filename)
26
+ ar=SuiteReportBuilder.new(name,&block)
27
+ ar.generate(filename)
28
+ end
29
+ def to_text
30
+ ar=SuiteReportBuilder.new(name, &block)
31
+ ar.to_text
32
+ end
33
+
34
+ def attach(ds)
35
+ @attached.push(ds)
36
+ end
37
+ def detach(ds=nil)
38
+ if ds.nil?
39
+ @attached.pop
40
+ else
41
+ @attached.delete(ds)
42
+ end
43
+ end
44
+ alias :old_boxplot :boxplot
45
+ alias :old_histogram :histogram
46
+ alias :old_scatterplot :scatterplot
47
+
48
+ def show_svg(svg)
49
+ require 'tmpdir'
50
+ fn=Dir.tmpdir+"/image_#{Time.now.to_f}.svg"
51
+ File.open(fn,"w") {|fp| fp.write svg}
52
+ `xdg-open '#{fn}'`
53
+ end
54
+ def boxplot(*args)
55
+ show_svg(old_boxplot(*args).to_svg)
56
+ end
57
+ def histogram(*args)
58
+ show_svg(old_histogram(*args).to_svg)
59
+ end
60
+ def scatterplot(*args)
61
+ show_svg(old_scatterplot(*args).to_svg)
62
+ end
63
+
64
+ def method_missing(name, *args,&block)
65
+ @attached.reverse.each do |ds|
66
+ return ds[name.to_s] if ds.fields.include? (name.to_s)
67
+ end
68
+ raise "Method #{name} doesn't exists"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,38 @@
1
+ module Statsample
2
+ module Analysis
3
+ class SuiteReportBuilder < Suite
4
+ attr_accessor :rb
5
+ def initialize(name,&block)
6
+ super(name,&block)
7
+ @rb=ReportBuilder.new(:name=>name)
8
+ end
9
+ def generate(filename)
10
+ run if @block
11
+ @rb.save(filename)
12
+ end
13
+ def to_text
14
+ run if @block
15
+ @rb.to_text
16
+ end
17
+ def summary(o)
18
+ @rb.add(o)
19
+ end
20
+ def echo(*args)
21
+ args.each do |a|
22
+ @rb.add(a)
23
+ end
24
+ end
25
+
26
+ def boxplot(*args)
27
+ @rb.add(old_boxplot(*args))
28
+ end
29
+ def histogram(*args)
30
+ @rb.add(old_histogram(*args))
31
+ end
32
+ def boxplot(*args)
33
+ @rb.add(old_boxplot(*args))
34
+ end
35
+
36
+ end
37
+ end
38
+ end