statsample 0.18.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,26 @@
1
+ === 1.0.0 / 2011-01-27
2
+
3
+ * Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
4
+ * Created benchmarks directory
5
+ * Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
6
+ * Optimized simple regression. Better library detection
7
+ * New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
8
+ * Almost complete GSL cleanup on Vector
9
+ * Updated some doc on Vector
10
+ * Used GSL::Matrix on Factor classes when available
11
+ * SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
12
+ * Modified examples using Statsample::Analysis
13
+ * Simplified eigen calculations
14
+ * Updated some examples. Added correlation matrix speed suite
15
+ * Correlation matrix optimized. Better specs
16
+ * Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation
17
+ * Moved tests fixtures from data to test/fixtures
18
+ * Fixed some errors on tests
19
+ * Bug fix: constant_se on binomial regression have an error
20
+ * All test should work on ruby 1.9.3
21
+ * New Vector.[] and Vector.new_scale
22
+ * Detect linearly dependent predictors on OLS.
23
+
1
24
  === 0.18.0 / 2011-01-07
2
25
  * New Statsample.load_excel
3
26
  * New Statsample.load_csv
data/Manifest.txt CHANGED
@@ -3,14 +3,19 @@ LICENSE.txt
3
3
  Manifest.txt
4
4
  README.txt
5
5
  Rakefile
6
+ benchmarks/correlation_matrix_15_variables.rb
7
+ benchmarks/correlation_matrix_5_variables.rb
8
+ benchmarks/correlation_matrix_methods/correlation_matrix.ds
9
+ benchmarks/correlation_matrix_methods/correlation_matrix.html
10
+ benchmarks/correlation_matrix_methods/correlation_matrix.rb
11
+ benchmarks/correlation_matrix_methods/correlation_matrix.xls
12
+ benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
13
+ benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
14
+ benchmarks/correlation_matrix_methods/results.ds
15
+ benchmarks/factor_map.rb
16
+ benchmarks/helpers_benchmark.rb
6
17
  bin/statsample
7
- data/crime.txt
8
- data/hartman_23.matrix
9
18
  data/locale/es/LC_MESSAGES/statsample.mo
10
- data/repeated_fields.csv
11
- data/test_binomial.csv
12
- data/tetmat_matrix.txt
13
- data/tetmat_test.txt
14
19
  doc_latex/manual/equations.tex
15
20
  examples/boxplot.rb
16
21
  examples/correlation_matrix.rb
@@ -23,7 +28,6 @@ examples/levene.rb
23
28
  examples/multiple_regression.rb
24
29
  examples/multivariate_correlation.rb
25
30
  examples/parallel_analysis.rb
26
- examples/parallel_analysis_tetrachoric.rb
27
31
  examples/polychoric.rb
28
32
  examples/principal_axis.rb
29
33
  examples/reliability.rb
@@ -34,15 +38,11 @@ examples/u_test.rb
34
38
  examples/vector.rb
35
39
  examples/velicer_map_test.rb
36
40
  grab_references.rb
37
- lib/distribution.rb
38
- lib/distribution/chisquare.rb
39
- lib/distribution/f.rb
40
- lib/distribution/normal.rb
41
- lib/distribution/normalbivariate.rb
42
- lib/distribution/normalmultivariate.rb
43
- lib/distribution/t.rb
44
41
  lib/spss.rb
45
42
  lib/statsample.rb
43
+ lib/statsample/analysis.rb
44
+ lib/statsample/analysis/suite.rb
45
+ lib/statsample/analysis/suitereportbuilder.rb
46
46
  lib/statsample/anova.rb
47
47
  lib/statsample/anova/oneway.rb
48
48
  lib/statsample/anova/twoway.rb
@@ -91,6 +91,7 @@ lib/statsample/reliability/scaleanalysis.rb
91
91
  lib/statsample/reliability/skillscaleanalysis.rb
92
92
  lib/statsample/resample.rb
93
93
  lib/statsample/rserve_extension.rb
94
+ lib/statsample/shorthand.rb
94
95
  lib/statsample/srs.rb
95
96
  lib/statsample/test.rb
96
97
  lib/statsample/test/bartlettsphericity.rb
@@ -100,6 +101,7 @@ lib/statsample/test/levene.rb
100
101
  lib/statsample/test/t.rb
101
102
  lib/statsample/test/umannwhitney.rb
102
103
  lib/statsample/vector.rb
104
+ lib/statsample/vector/gsl.rb
103
105
  po/es/statsample.mo
104
106
  po/es/statsample.po
105
107
  po/statsample.pot
@@ -107,7 +109,16 @@ references.txt
107
109
  setup.rb
108
110
  test/fixtures/bank2.dat
109
111
  test/fixtures/correlation_matrix.rb
112
+ test/fixtures/crime.txt
113
+ test/fixtures/hartman_23.matrix
114
+ test/fixtures/repeated_fields.csv
115
+ test/fixtures/test_binomial.csv
116
+ test/fixtures/test_csv.csv
117
+ test/fixtures/test_xls.xls
118
+ test/fixtures/tetmat_matrix.txt
119
+ test/fixtures/tetmat_test.txt
110
120
  test/helpers_tests.rb
121
+ test/test_analysis.rb
111
122
  test/test_anovaoneway.rb
112
123
  test/test_anovatwoway.rb
113
124
  test/test_anovatwowaywithdataset.rb
@@ -116,12 +127,12 @@ test/test_bartlettsphericity.rb
116
127
  test/test_bivariate.rb
117
128
  test/test_codification.rb
118
129
  test/test_crosstab.rb
119
- test/test_csv.csv
120
130
  test/test_csv.rb
121
131
  test/test_dataset.rb
122
- test/test_distribution.rb
123
132
  test/test_dominance_analysis.rb
124
133
  test/test_factor.rb
134
+ test/test_factor_map.rb
135
+ test/test_factor_pa.rb
125
136
  test/test_ggobi.rb
126
137
  test/test_gsl.rb
127
138
  test/test_histogram.rb
@@ -144,4 +155,4 @@ test/test_test_t.rb
144
155
  test/test_umannwhitney.rb
145
156
  test/test_vector.rb
146
157
  test/test_xls.rb
147
- test/test_xls.xls
158
+ web/Rakefile
data/Rakefile CHANGED
@@ -40,9 +40,10 @@ h=Hoe.spec('statsample') do
40
40
  #self.testlib=:minitest
41
41
  self.rubyforge_name = "ruby-statsample"
42
42
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
43
- self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
43
+ self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"] << ["distribution", "~>0.2.0"]
44
+
45
+ self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"] << ["rserve-client", "~>0"] << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
44
46
 
45
- self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
46
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
47
48
  self.post_install_message = <<-EOF
48
49
  ***************************************************
@@ -0,0 +1,31 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=250
5
+ vars=20
6
+
7
+
8
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
9
+ author 'Clbustos'
10
+ date '2011-01-18'
11
+ summary "
12
+ A correlation matrix could be constructed using matrix algebra or
13
+ mannualy, calculating covariances, means and sd for each pair of vectors.
14
+ In this test, we test the calculation using #{vars} variables with
15
+ #{cases} cases on each vector
16
+ "
17
+
18
+ reps 200 #number of repetitions
19
+
20
+ ds=vars.times.inject({}) {|ac,v|
21
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
22
+ ac
23
+ }.to_dataset
24
+
25
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
26
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
27
+ end
28
+
29
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
30
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
31
+ end
@@ -0,0 +1,32 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=500
5
+ vars=5
6
+
7
+
8
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
9
+ author 'Clbustos'
10
+ date '2011-01-18'
11
+ summary "
12
+ A correlation matrix could be constructed using matrix algebra or
13
+ mannualy, calculating covariances, means and sd for each pair of vectors.
14
+ In this test, we test the calculation using #{vars} variables with
15
+ #{cases} cases on each vector
16
+ "
17
+
18
+ reps 200 #number of repetitions
19
+
20
+
21
+ ds=vars.times.inject({}) {|ac,v|
22
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
23
+ ac
24
+ }.to_dataset
25
+
26
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
27
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
28
+ end
29
+
30
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
31
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
32
+ end
@@ -0,0 +1,93 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
5
+ <title>Correlation matrix analysis</title>
6
+ <style>
7
+ body {
8
+ margin:0;
9
+ padding:1em;
10
+ }
11
+ table {
12
+ border-collapse: collapse;
13
+
14
+ }
15
+ table td {
16
+ border: 1px solid black;
17
+ }
18
+ .section {
19
+ margin:0.5em;
20
+ }
21
+ </style>
22
+
23
+ </head><body>
24
+ <h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
25
+ <ul>
26
+ <li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
27
+ <ul>
28
+ <li><a href='#toc_2'>ANOVA</a></li>
29
+ </ul>
30
+ <li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
31
+ <ul>
32
+ <li><a href='#toc_4'>ANOVA</a></li>
33
+ </ul>
34
+ </ul>
35
+ </div>
36
+ <div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
37
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
38
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
39
+ <p>Cases(listwise)=63(63)</p>
40
+ <p>R=0.978844</p>
41
+ <p>R^2=0.958137</p>
42
+ <p>R^2 Adj=0.956008</p>
43
+ <p>Std.Error R=3.092024</p>
44
+ <p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
45
+ <div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
46
+ <a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
47
+ <tbody>
48
+ <tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
49
+ <tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
50
+ <tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
51
+ </tbody>
52
+ </table>
53
+
54
+ </div>
55
+ <a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
56
+ <tbody>
57
+ <tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
58
+ <tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
59
+ <tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
60
+ <tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
61
+ </tbody>
62
+ </table>
63
+
64
+ </div>
65
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
66
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
67
+ <p>Cases(listwise)=63(63)</p>
68
+ <p>R=0.999637</p>
69
+ <p>R^2=0.999275</p>
70
+ <p>R^2 Adj=0.999238</p>
71
+ <p>Std.Error R=0.538365</p>
72
+ <p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
73
+ <div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
74
+ <a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
75
+ <tbody>
76
+ <tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
77
+ <tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
78
+ <tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
79
+ </tbody>
80
+ </table>
81
+
82
+ </div>
83
+ <a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
84
+ <tbody>
85
+ <tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
86
+ <tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
87
+ <tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
88
+ <tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
89
+ </tbody>
90
+ </table>
91
+
92
+ </div>
93
+ </body></html>
@@ -0,0 +1,75 @@
1
+ # This test create a database to adjust the best algorithm
2
+ # to use on correlation matrix
3
+ require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
4
+ require 'statsample'
5
+ require 'benchmark'
6
+
7
+ def create_dataset(vars,cases)
8
+ ran=Distribution::Normal.rng
9
+ ds=vars.times.inject({}) {|ac,v|
10
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
11
+ ac
12
+ }.to_dataset
13
+ end
14
+
15
+ def prediction_pairwise(vars,cases)
16
+ Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
17
+ end
18
+ def prediction_optimized(vars,cases)
19
+ Statsample::Bivariate.prediction_optimized(vars,cases) / 10
20
+ end
21
+
22
+
23
+
24
+ if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
25
+ reps=100 #number of repetitions
26
+ ds_sizes=[5,10,30,50,100,150,200,500,1000]
27
+ ds_vars=[3,4,5,10,20,30,40]
28
+ #ds_sizes=[5,10]
29
+ #ds_vars=[3,5,20]
30
+ rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})
31
+
32
+ ds_sizes.each do |cases|
33
+ ds_vars.each do |vars|
34
+ ds=create_dataset(vars,cases)
35
+ time_optimized= Benchmark.realtime do
36
+ reps.times {
37
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
38
+ ds.clear_gsl
39
+ }
40
+ end
41
+
42
+ time_pairwise= Benchmark.realtime do
43
+ reps.times {
44
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
45
+ }
46
+ end
47
+
48
+ puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
49
+
50
+ rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
51
+ end
52
+ end
53
+
54
+ else
55
+ rs=Statsample.load("correlation_matrix.ds")
56
+ end
57
+
58
+
59
+ rs.fields.each {|f| rs[f].type=:scale}
60
+
61
+ rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
62
+
63
+ rs.update_valid_data
64
+ rs.save("correlation_matrix.ds")
65
+ Statsample::Excel.write(rs,"correlation_matrix.xls")
66
+
67
+
68
+
69
+ rb=ReportBuilder.new(:name=>"Correlation matrix analysis")
70
+
71
+ rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
72
+ rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
73
+
74
+
75
+ rb.save_html("correlation_matrix.html")
@@ -0,0 +1,37 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+
5
+
6
+ name "Statsample::Factor::Map with and without GSL"
7
+ author 'Clbustos'
8
+ date '2011-01-18'
9
+ summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
10
+ "
11
+
12
+ reps 20 #number of repetitions
13
+
14
+ m=Matrix[
15
+ [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
16
+ [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
17
+ [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
18
+ [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
19
+ [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
20
+ [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
21
+ [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
22
+ [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
23
+ ]
24
+
25
+ map=Statsample::Factor::MAP.new(m)
26
+
27
+
28
+ measure "Statsample::Factor::MAP without GSL" do
29
+ map.use_gsl=false
30
+ map.compute
31
+ end
32
+
33
+ measure "Statsample::Factor::MAP with GSL" do
34
+ map.use_gsl=true
35
+ map.compute
36
+ end
37
+
@@ -0,0 +1,5 @@
1
+ $:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
2
+ $:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
3
+
4
+ require 'statsample'
5
+ require 'bench_press'
data/examples/boxplot.rb CHANGED
@@ -1,17 +1,16 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
- $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
-
5
- require 'benchmark'
6
3
  require 'statsample'
7
- n=100
8
- a=(n-1).times.map {|i| rand()*20+50}
9
- b=n.times.map {|i| rand()*10+50}.to_scale
10
- c=n.times.map {|i| rand()*5+50}.to_scale
11
-
12
- a.push(30)
13
- a=a.to_scale
14
- sp=Statsample::Graph::Boxplot.new(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
15
- rb=ReportBuilder.new
16
- rb.add(sp)
17
- puts rb.to_text
4
+ Statsample::Analysis.store(Statsample::Graph::Boxplot) do
5
+ n=30
6
+ a=rnorm(n-1,50,10)
7
+ b=rnorm(n, 30,5)
8
+ c=rnorm(n,5,1)
9
+ a.push(2)
10
+ boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
11
+
12
+ end
13
+
14
+ if __FILE__==$0
15
+ Statsample::Analysis.run
16
+ end