statsample 0.18.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. data.tar.gz.sig +0 -0
  2. data/History.txt +23 -0
  3. data/Manifest.txt +28 -17
  4. data/Rakefile +3 -2
  5. data/benchmarks/correlation_matrix_15_variables.rb +31 -0
  6. data/benchmarks/correlation_matrix_5_variables.rb +32 -0
  7. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  8. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  9. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  11. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  13. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  14. data/benchmarks/factor_map.rb +37 -0
  15. data/benchmarks/helpers_benchmark.rb +5 -0
  16. data/examples/boxplot.rb +13 -14
  17. data/examples/correlation_matrix.rb +16 -8
  18. data/examples/dataset.rb +13 -4
  19. data/examples/dominance_analysis.rb +23 -17
  20. data/examples/dominance_analysis_bootstrap.rb +28 -22
  21. data/examples/histogram.rb +8 -9
  22. data/examples/icc.rb +20 -21
  23. data/examples/levene.rb +10 -4
  24. data/examples/multiple_regression.rb +9 -28
  25. data/examples/multivariate_correlation.rb +9 -3
  26. data/examples/parallel_analysis.rb +20 -16
  27. data/examples/polychoric.rb +15 -9
  28. data/examples/principal_axis.rb +18 -6
  29. data/examples/reliability.rb +26 -13
  30. data/examples/scatterplot.rb +10 -6
  31. data/examples/t_test.rb +15 -6
  32. data/examples/tetrachoric.rb +9 -2
  33. data/examples/u_test.rb +12 -4
  34. data/examples/vector.rb +13 -2
  35. data/examples/velicer_map_test.rb +33 -26
  36. data/lib/statsample.rb +32 -12
  37. data/lib/statsample/analysis.rb +79 -0
  38. data/lib/statsample/analysis/suite.rb +72 -0
  39. data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
  40. data/lib/statsample/bivariate.rb +70 -16
  41. data/lib/statsample/dataset.rb +25 -19
  42. data/lib/statsample/dominanceanalysis.rb +2 -2
  43. data/lib/statsample/factor.rb +2 -0
  44. data/lib/statsample/factor/map.rb +16 -10
  45. data/lib/statsample/factor/parallelanalysis.rb +9 -3
  46. data/lib/statsample/factor/pca.rb +28 -32
  47. data/lib/statsample/factor/rotation.rb +15 -8
  48. data/lib/statsample/graph/boxplot.rb +3 -4
  49. data/lib/statsample/graph/histogram.rb +2 -1
  50. data/lib/statsample/graph/scatterplot.rb +1 -0
  51. data/lib/statsample/matrix.rb +106 -16
  52. data/lib/statsample/regression.rb +4 -1
  53. data/lib/statsample/regression/binomial.rb +1 -1
  54. data/lib/statsample/regression/multiple/baseengine.rb +19 -9
  55. data/lib/statsample/regression/multiple/gslengine.rb +127 -126
  56. data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
  57. data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
  58. data/lib/statsample/regression/simple.rb +31 -6
  59. data/lib/statsample/reliability.rb +11 -3
  60. data/lib/statsample/reliability/scaleanalysis.rb +4 -4
  61. data/lib/statsample/shorthand.rb +81 -0
  62. data/lib/statsample/test/chisquare.rb +1 -1
  63. data/lib/statsample/vector.rb +163 -163
  64. data/lib/statsample/vector/gsl.rb +106 -0
  65. data/references.txt +2 -2
  66. data/{data → test/fixtures}/crime.txt +0 -0
  67. data/{data → test/fixtures}/hartman_23.matrix +0 -0
  68. data/{data → test/fixtures}/repeated_fields.csv +0 -0
  69. data/{data → test/fixtures}/test_binomial.csv +0 -0
  70. data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
  71. data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
  72. data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
  73. data/{data → test/fixtures}/tetmat_test.txt +0 -0
  74. data/test/helpers_tests.rb +18 -2
  75. data/test/test_analysis.rb +118 -0
  76. data/test/test_anovatwoway.rb +1 -1
  77. data/test/test_anovatwowaywithdataset.rb +1 -1
  78. data/test/test_anovawithvectors.rb +1 -2
  79. data/test/test_bartlettsphericity.rb +1 -2
  80. data/test/test_bivariate.rb +64 -22
  81. data/test/test_codification.rb +1 -2
  82. data/test/test_crosstab.rb +1 -2
  83. data/test/test_csv.rb +3 -4
  84. data/test/test_dataset.rb +24 -3
  85. data/test/test_dominance_analysis.rb +1 -2
  86. data/test/test_factor.rb +8 -69
  87. data/test/test_factor_map.rb +43 -0
  88. data/test/test_factor_pa.rb +54 -0
  89. data/test/test_ggobi.rb +1 -1
  90. data/test/test_gsl.rb +12 -18
  91. data/test/test_histogram.rb +1 -2
  92. data/test/test_logit.rb +62 -18
  93. data/test/test_matrix.rb +4 -5
  94. data/test/test_mle.rb +3 -4
  95. data/test/test_regression.rb +21 -2
  96. data/test/test_reliability.rb +3 -3
  97. data/test/test_reliability_icc.rb +1 -1
  98. data/test/test_reliability_skillscale.rb +20 -4
  99. data/test/test_resample.rb +1 -2
  100. data/test/test_rserve_extension.rb +1 -2
  101. data/test/test_srs.rb +1 -2
  102. data/test/test_statistics.rb +1 -2
  103. data/test/test_stest.rb +1 -2
  104. data/test/test_stratified.rb +1 -2
  105. data/test/test_test_f.rb +1 -2
  106. data/test/test_test_t.rb +1 -2
  107. data/test/test_umannwhitney.rb +1 -2
  108. data/test/test_vector.rb +117 -18
  109. data/test/test_xls.rb +2 -3
  110. data/web/Rakefile +39 -0
  111. metadata +109 -29
  112. metadata.gz.sig +0 -0
  113. data/examples/parallel_analysis_tetrachoric.rb +0 -31
  114. data/lib/distribution.rb +0 -25
  115. data/lib/distribution/chisquare.rb +0 -23
  116. data/lib/distribution/f.rb +0 -35
  117. data/lib/distribution/normal.rb +0 -60
  118. data/lib/distribution/normalbivariate.rb +0 -284
  119. data/lib/distribution/normalmultivariate.rb +0 -73
  120. data/lib/distribution/t.rb +0 -55
  121. data/test/test_distribution.rb +0 -73
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,26 @@
1
+ === 1.0.0 / 2011-01-27
2
+
3
+ * Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
4
+ * Created benchmarks directory
5
+ * Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
6
+ * Optimized simple regression. Better library detection
7
+ * New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
8
+ * Almost complete GSL cleanup on Vector
9
+ * Updated some doc on Vector
10
+ * Used GSL::Matrix on Factor classes when available
11
+ * SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
12
+ * Modified examples using Statsample::Analysis
13
+ * Simplified eigen calculations
14
+ * Updated some examples. Added correlation matrix speed suite
15
+ * Correlation matrix optimized. Better specs
16
+ * Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation
17
+ * Moved tests fixtures from data to test/fixtures
18
+ * Fixed some errors on tests
19
+ * Bug fix: constant_se on binomial regression have an error
20
+ * All test should work on ruby 1.9.3
21
+ * New Vector.[] and Vector.new_scale
22
+ * Detect linearly dependent predictors on OLS.
23
+
1
24
  === 0.18.0 / 2011-01-07
2
25
  * New Statsample.load_excel
3
26
  * New Statsample.load_csv
data/Manifest.txt CHANGED
@@ -3,14 +3,19 @@ LICENSE.txt
3
3
  Manifest.txt
4
4
  README.txt
5
5
  Rakefile
6
+ benchmarks/correlation_matrix_15_variables.rb
7
+ benchmarks/correlation_matrix_5_variables.rb
8
+ benchmarks/correlation_matrix_methods/correlation_matrix.ds
9
+ benchmarks/correlation_matrix_methods/correlation_matrix.html
10
+ benchmarks/correlation_matrix_methods/correlation_matrix.rb
11
+ benchmarks/correlation_matrix_methods/correlation_matrix.xls
12
+ benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
13
+ benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
14
+ benchmarks/correlation_matrix_methods/results.ds
15
+ benchmarks/factor_map.rb
16
+ benchmarks/helpers_benchmark.rb
6
17
  bin/statsample
7
- data/crime.txt
8
- data/hartman_23.matrix
9
18
  data/locale/es/LC_MESSAGES/statsample.mo
10
- data/repeated_fields.csv
11
- data/test_binomial.csv
12
- data/tetmat_matrix.txt
13
- data/tetmat_test.txt
14
19
  doc_latex/manual/equations.tex
15
20
  examples/boxplot.rb
16
21
  examples/correlation_matrix.rb
@@ -23,7 +28,6 @@ examples/levene.rb
23
28
  examples/multiple_regression.rb
24
29
  examples/multivariate_correlation.rb
25
30
  examples/parallel_analysis.rb
26
- examples/parallel_analysis_tetrachoric.rb
27
31
  examples/polychoric.rb
28
32
  examples/principal_axis.rb
29
33
  examples/reliability.rb
@@ -34,15 +38,11 @@ examples/u_test.rb
34
38
  examples/vector.rb
35
39
  examples/velicer_map_test.rb
36
40
  grab_references.rb
37
- lib/distribution.rb
38
- lib/distribution/chisquare.rb
39
- lib/distribution/f.rb
40
- lib/distribution/normal.rb
41
- lib/distribution/normalbivariate.rb
42
- lib/distribution/normalmultivariate.rb
43
- lib/distribution/t.rb
44
41
  lib/spss.rb
45
42
  lib/statsample.rb
43
+ lib/statsample/analysis.rb
44
+ lib/statsample/analysis/suite.rb
45
+ lib/statsample/analysis/suitereportbuilder.rb
46
46
  lib/statsample/anova.rb
47
47
  lib/statsample/anova/oneway.rb
48
48
  lib/statsample/anova/twoway.rb
@@ -91,6 +91,7 @@ lib/statsample/reliability/scaleanalysis.rb
91
91
  lib/statsample/reliability/skillscaleanalysis.rb
92
92
  lib/statsample/resample.rb
93
93
  lib/statsample/rserve_extension.rb
94
+ lib/statsample/shorthand.rb
94
95
  lib/statsample/srs.rb
95
96
  lib/statsample/test.rb
96
97
  lib/statsample/test/bartlettsphericity.rb
@@ -100,6 +101,7 @@ lib/statsample/test/levene.rb
100
101
  lib/statsample/test/t.rb
101
102
  lib/statsample/test/umannwhitney.rb
102
103
  lib/statsample/vector.rb
104
+ lib/statsample/vector/gsl.rb
103
105
  po/es/statsample.mo
104
106
  po/es/statsample.po
105
107
  po/statsample.pot
@@ -107,7 +109,16 @@ references.txt
107
109
  setup.rb
108
110
  test/fixtures/bank2.dat
109
111
  test/fixtures/correlation_matrix.rb
112
+ test/fixtures/crime.txt
113
+ test/fixtures/hartman_23.matrix
114
+ test/fixtures/repeated_fields.csv
115
+ test/fixtures/test_binomial.csv
116
+ test/fixtures/test_csv.csv
117
+ test/fixtures/test_xls.xls
118
+ test/fixtures/tetmat_matrix.txt
119
+ test/fixtures/tetmat_test.txt
110
120
  test/helpers_tests.rb
121
+ test/test_analysis.rb
111
122
  test/test_anovaoneway.rb
112
123
  test/test_anovatwoway.rb
113
124
  test/test_anovatwowaywithdataset.rb
@@ -116,12 +127,12 @@ test/test_bartlettsphericity.rb
116
127
  test/test_bivariate.rb
117
128
  test/test_codification.rb
118
129
  test/test_crosstab.rb
119
- test/test_csv.csv
120
130
  test/test_csv.rb
121
131
  test/test_dataset.rb
122
- test/test_distribution.rb
123
132
  test/test_dominance_analysis.rb
124
133
  test/test_factor.rb
134
+ test/test_factor_map.rb
135
+ test/test_factor_pa.rb
125
136
  test/test_ggobi.rb
126
137
  test/test_gsl.rb
127
138
  test/test_histogram.rb
@@ -144,4 +155,4 @@ test/test_test_t.rb
144
155
  test/test_umannwhitney.rb
145
156
  test/test_vector.rb
146
157
  test/test_xls.rb
147
- test/test_xls.xls
158
+ web/Rakefile
data/Rakefile CHANGED
@@ -40,9 +40,10 @@ h=Hoe.spec('statsample') do
40
40
  #self.testlib=:minitest
41
41
  self.rubyforge_name = "ruby-statsample"
42
42
  self.developer('Claudio Bustos', 'clbustos@gmail.com')
43
- self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
43
+ self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"] << ["distribution", "~>0.2.0"]
44
+
45
+ self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"] << ["rserve-client", "~>0"] << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
44
46
 
45
- self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
46
47
  self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
47
48
  self.post_install_message = <<-EOF
48
49
  ***************************************************
@@ -0,0 +1,31 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=250
5
+ vars=20
6
+
7
+
8
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
9
+ author 'Clbustos'
10
+ date '2011-01-18'
11
+ summary "
12
+ A correlation matrix could be constructed using matrix algebra or
13
+ mannualy, calculating covariances, means and sd for each pair of vectors.
14
+ In this test, we test the calculation using #{vars} variables with
15
+ #{cases} cases on each vector
16
+ "
17
+
18
+ reps 200 #number of repetitions
19
+
20
+ ds=vars.times.inject({}) {|ac,v|
21
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
22
+ ac
23
+ }.to_dataset
24
+
25
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
26
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
27
+ end
28
+
29
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
30
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
31
+ end
@@ -0,0 +1,32 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+ cases=500
5
+ vars=5
6
+
7
+
8
+ name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
9
+ author 'Clbustos'
10
+ date '2011-01-18'
11
+ summary "
12
+ A correlation matrix could be constructed using matrix algebra or
13
+ mannualy, calculating covariances, means and sd for each pair of vectors.
14
+ In this test, we test the calculation using #{vars} variables with
15
+ #{cases} cases on each vector
16
+ "
17
+
18
+ reps 200 #number of repetitions
19
+
20
+
21
+ ds=vars.times.inject({}) {|ac,v|
22
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
23
+ ac
24
+ }.to_dataset
25
+
26
+ measure "Statsample::Bivariate.correlation_matrix_optimized" do
27
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
28
+ end
29
+
30
+ measure "Statsample::Bivariate.correlation_matrix_pairwise" do
31
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
32
+ end
@@ -0,0 +1,93 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
5
+ <title>Correlation matrix analysis</title>
6
+ <style>
7
+ body {
8
+ margin:0;
9
+ padding:1em;
10
+ }
11
+ table {
12
+ border-collapse: collapse;
13
+
14
+ }
15
+ table td {
16
+ border: 1px solid black;
17
+ }
18
+ .section {
19
+ margin:0.5em;
20
+ }
21
+ </style>
22
+
23
+ </head><body>
24
+ <h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
25
+ <ul>
26
+ <li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
27
+ <ul>
28
+ <li><a href='#toc_2'>ANOVA</a></li>
29
+ </ul>
30
+ <li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
31
+ <ul>
32
+ <li><a href='#toc_4'>ANOVA</a></li>
33
+ </ul>
34
+ </ul>
35
+ </div>
36
+ <div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
37
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
38
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
39
+ <p>Cases(listwise)=63(63)</p>
40
+ <p>R=0.978844</p>
41
+ <p>R^2=0.958137</p>
42
+ <p>R^2 Adj=0.956008</p>
43
+ <p>Std.Error R=3.092024</p>
44
+ <p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
45
+ <div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
46
+ <a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
47
+ <tbody>
48
+ <tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
49
+ <tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
50
+ <tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
51
+ </tbody>
52
+ </table>
53
+
54
+ </div>
55
+ <a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
56
+ <tbody>
57
+ <tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
58
+ <tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
59
+ <tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
60
+ <tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
61
+ </tbody>
62
+ </table>
63
+
64
+ </div>
65
+ <div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
66
+ <p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
67
+ <p>Cases(listwise)=63(63)</p>
68
+ <p>R=0.999637</p>
69
+ <p>R^2=0.999275</p>
70
+ <p>R^2 Adj=0.999238</p>
71
+ <p>Std.Error R=0.538365</p>
72
+ <p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
73
+ <div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
74
+ <a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
75
+ <tbody>
76
+ <tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
77
+ <tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
78
+ <tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
79
+ </tbody>
80
+ </table>
81
+
82
+ </div>
83
+ <a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
84
+ <tbody>
85
+ <tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
86
+ <tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
87
+ <tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
88
+ <tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
89
+ </tbody>
90
+ </table>
91
+
92
+ </div>
93
+ </body></html>
@@ -0,0 +1,75 @@
1
+ # This test create a database to adjust the best algorithm
2
+ # to use on correlation matrix
3
+ require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
4
+ require 'statsample'
5
+ require 'benchmark'
6
+
7
+ def create_dataset(vars,cases)
8
+ ran=Distribution::Normal.rng
9
+ ds=vars.times.inject({}) {|ac,v|
10
+ ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
11
+ ac
12
+ }.to_dataset
13
+ end
14
+
15
+ def prediction_pairwise(vars,cases)
16
+ Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
17
+ end
18
+ def prediction_optimized(vars,cases)
19
+ Statsample::Bivariate.prediction_optimized(vars,cases) / 10
20
+ end
21
+
22
+
23
+
24
+ if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
25
+ reps=100 #number of repetitions
26
+ ds_sizes=[5,10,30,50,100,150,200,500,1000]
27
+ ds_vars=[3,4,5,10,20,30,40]
28
+ #ds_sizes=[5,10]
29
+ #ds_vars=[3,5,20]
30
+ rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})
31
+
32
+ ds_sizes.each do |cases|
33
+ ds_vars.each do |vars|
34
+ ds=create_dataset(vars,cases)
35
+ time_optimized= Benchmark.realtime do
36
+ reps.times {
37
+ Statsample::Bivariate.correlation_matrix_optimized(ds)
38
+ ds.clear_gsl
39
+ }
40
+ end
41
+
42
+ time_pairwise= Benchmark.realtime do
43
+ reps.times {
44
+ Statsample::Bivariate.correlation_matrix_pairwise(ds)
45
+ }
46
+ end
47
+
48
+ puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
49
+
50
+ rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
51
+ end
52
+ end
53
+
54
+ else
55
+ rs=Statsample.load("correlation_matrix.ds")
56
+ end
57
+
58
+
59
+ rs.fields.each {|f| rs[f].type=:scale}
60
+
61
+ rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
62
+
63
+ rs.update_valid_data
64
+ rs.save("correlation_matrix.ds")
65
+ Statsample::Excel.write(rs,"correlation_matrix.xls")
66
+
67
+
68
+
69
+ rb=ReportBuilder.new(:name=>"Correlation matrix analysis")
70
+
71
+ rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
72
+ rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
73
+
74
+
75
+ rb.save_html("correlation_matrix.html")
@@ -0,0 +1,37 @@
1
+ require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
2
+
3
+ extend BenchPress
4
+
5
+
6
+ name "Statsample::Factor::Map with and without GSL"
7
+ author 'Clbustos'
8
+ date '2011-01-18'
9
+ summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
10
+ "
11
+
12
+ reps 20 #number of repetitions
13
+
14
+ m=Matrix[
15
+ [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
16
+ [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
17
+ [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
18
+ [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
19
+ [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
20
+ [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
21
+ [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
22
+ [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
23
+ ]
24
+
25
+ map=Statsample::Factor::MAP.new(m)
26
+
27
+
28
+ measure "Statsample::Factor::MAP without GSL" do
29
+ map.use_gsl=false
30
+ map.compute
31
+ end
32
+
33
+ measure "Statsample::Factor::MAP with GSL" do
34
+ map.use_gsl=true
35
+ map.compute
36
+ end
37
+
@@ -0,0 +1,5 @@
1
+ $:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
2
+ $:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
3
+
4
+ require 'statsample'
5
+ require 'bench_press'
data/examples/boxplot.rb CHANGED
@@ -1,17 +1,16 @@
1
1
  #!/usr/bin/ruby
2
2
  $:.unshift(File.dirname(__FILE__)+'/../lib/')
3
- $:.unshift('/home/cdx/dev/reportbuilder/lib/')
4
-
5
- require 'benchmark'
6
3
  require 'statsample'
7
- n=100
8
- a=(n-1).times.map {|i| rand()*20+50}
9
- b=n.times.map {|i| rand()*10+50}.to_scale
10
- c=n.times.map {|i| rand()*5+50}.to_scale
11
-
12
- a.push(30)
13
- a=a.to_scale
14
- sp=Statsample::Graph::Boxplot.new(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
15
- rb=ReportBuilder.new
16
- rb.add(sp)
17
- puts rb.to_text
4
+ Statsample::Analysis.store(Statsample::Graph::Boxplot) do
5
+ n=30
6
+ a=rnorm(n-1,50,10)
7
+ b=rnorm(n, 30,5)
8
+ c=rnorm(n,5,1)
9
+ a.push(2)
10
+ boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
11
+
12
+ end
13
+
14
+ if __FILE__==$0
15
+ Statsample::Analysis.run
16
+ end