statsample 0.18.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,26 @@
|
|
1
|
+
=== 1.0.0 / 2011-01-27
|
2
|
+
|
3
|
+
* Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
|
4
|
+
* Created benchmarks directory
|
5
|
+
* Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
|
6
|
+
* Optimized simple regression. Better library detection
|
7
|
+
* New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
|
8
|
+
* Almost complete GSL cleanup on Vector
|
9
|
+
* Updated some doc on Vector
|
10
|
+
* Used GSL::Matrix on Factor classes when available
|
11
|
+
* SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
|
12
|
+
* Modified examples using Statsample::Analysis
|
13
|
+
* Simplified eigen calculations
|
14
|
+
* Updated some examples. Added correlation matrix speed suite
|
15
|
+
* Correlation matrix optimized. Better specs
|
16
|
+
* Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation
|
17
|
+
* Moved tests fixtures from data to test/fixtures
|
18
|
+
* Fixed some errors on tests
|
19
|
+
* Bug fix: constant_se on binomial regression have an error
|
20
|
+
* All test should work on ruby 1.9.3
|
21
|
+
* New Vector.[] and Vector.new_scale
|
22
|
+
* Detect linearly dependent predictors on OLS.
|
23
|
+
|
1
24
|
=== 0.18.0 / 2011-01-07
|
2
25
|
* New Statsample.load_excel
|
3
26
|
* New Statsample.load_csv
|
data/Manifest.txt
CHANGED
@@ -3,14 +3,19 @@ LICENSE.txt
|
|
3
3
|
Manifest.txt
|
4
4
|
README.txt
|
5
5
|
Rakefile
|
6
|
+
benchmarks/correlation_matrix_15_variables.rb
|
7
|
+
benchmarks/correlation_matrix_5_variables.rb
|
8
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.ds
|
9
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.html
|
10
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.rb
|
11
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.xls
|
12
|
+
benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
|
13
|
+
benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
|
14
|
+
benchmarks/correlation_matrix_methods/results.ds
|
15
|
+
benchmarks/factor_map.rb
|
16
|
+
benchmarks/helpers_benchmark.rb
|
6
17
|
bin/statsample
|
7
|
-
data/crime.txt
|
8
|
-
data/hartman_23.matrix
|
9
18
|
data/locale/es/LC_MESSAGES/statsample.mo
|
10
|
-
data/repeated_fields.csv
|
11
|
-
data/test_binomial.csv
|
12
|
-
data/tetmat_matrix.txt
|
13
|
-
data/tetmat_test.txt
|
14
19
|
doc_latex/manual/equations.tex
|
15
20
|
examples/boxplot.rb
|
16
21
|
examples/correlation_matrix.rb
|
@@ -23,7 +28,6 @@ examples/levene.rb
|
|
23
28
|
examples/multiple_regression.rb
|
24
29
|
examples/multivariate_correlation.rb
|
25
30
|
examples/parallel_analysis.rb
|
26
|
-
examples/parallel_analysis_tetrachoric.rb
|
27
31
|
examples/polychoric.rb
|
28
32
|
examples/principal_axis.rb
|
29
33
|
examples/reliability.rb
|
@@ -34,15 +38,11 @@ examples/u_test.rb
|
|
34
38
|
examples/vector.rb
|
35
39
|
examples/velicer_map_test.rb
|
36
40
|
grab_references.rb
|
37
|
-
lib/distribution.rb
|
38
|
-
lib/distribution/chisquare.rb
|
39
|
-
lib/distribution/f.rb
|
40
|
-
lib/distribution/normal.rb
|
41
|
-
lib/distribution/normalbivariate.rb
|
42
|
-
lib/distribution/normalmultivariate.rb
|
43
|
-
lib/distribution/t.rb
|
44
41
|
lib/spss.rb
|
45
42
|
lib/statsample.rb
|
43
|
+
lib/statsample/analysis.rb
|
44
|
+
lib/statsample/analysis/suite.rb
|
45
|
+
lib/statsample/analysis/suitereportbuilder.rb
|
46
46
|
lib/statsample/anova.rb
|
47
47
|
lib/statsample/anova/oneway.rb
|
48
48
|
lib/statsample/anova/twoway.rb
|
@@ -91,6 +91,7 @@ lib/statsample/reliability/scaleanalysis.rb
|
|
91
91
|
lib/statsample/reliability/skillscaleanalysis.rb
|
92
92
|
lib/statsample/resample.rb
|
93
93
|
lib/statsample/rserve_extension.rb
|
94
|
+
lib/statsample/shorthand.rb
|
94
95
|
lib/statsample/srs.rb
|
95
96
|
lib/statsample/test.rb
|
96
97
|
lib/statsample/test/bartlettsphericity.rb
|
@@ -100,6 +101,7 @@ lib/statsample/test/levene.rb
|
|
100
101
|
lib/statsample/test/t.rb
|
101
102
|
lib/statsample/test/umannwhitney.rb
|
102
103
|
lib/statsample/vector.rb
|
104
|
+
lib/statsample/vector/gsl.rb
|
103
105
|
po/es/statsample.mo
|
104
106
|
po/es/statsample.po
|
105
107
|
po/statsample.pot
|
@@ -107,7 +109,16 @@ references.txt
|
|
107
109
|
setup.rb
|
108
110
|
test/fixtures/bank2.dat
|
109
111
|
test/fixtures/correlation_matrix.rb
|
112
|
+
test/fixtures/crime.txt
|
113
|
+
test/fixtures/hartman_23.matrix
|
114
|
+
test/fixtures/repeated_fields.csv
|
115
|
+
test/fixtures/test_binomial.csv
|
116
|
+
test/fixtures/test_csv.csv
|
117
|
+
test/fixtures/test_xls.xls
|
118
|
+
test/fixtures/tetmat_matrix.txt
|
119
|
+
test/fixtures/tetmat_test.txt
|
110
120
|
test/helpers_tests.rb
|
121
|
+
test/test_analysis.rb
|
111
122
|
test/test_anovaoneway.rb
|
112
123
|
test/test_anovatwoway.rb
|
113
124
|
test/test_anovatwowaywithdataset.rb
|
@@ -116,12 +127,12 @@ test/test_bartlettsphericity.rb
|
|
116
127
|
test/test_bivariate.rb
|
117
128
|
test/test_codification.rb
|
118
129
|
test/test_crosstab.rb
|
119
|
-
test/test_csv.csv
|
120
130
|
test/test_csv.rb
|
121
131
|
test/test_dataset.rb
|
122
|
-
test/test_distribution.rb
|
123
132
|
test/test_dominance_analysis.rb
|
124
133
|
test/test_factor.rb
|
134
|
+
test/test_factor_map.rb
|
135
|
+
test/test_factor_pa.rb
|
125
136
|
test/test_ggobi.rb
|
126
137
|
test/test_gsl.rb
|
127
138
|
test/test_histogram.rb
|
@@ -144,4 +155,4 @@ test/test_test_t.rb
|
|
144
155
|
test/test_umannwhitney.rb
|
145
156
|
test/test_vector.rb
|
146
157
|
test/test_xls.rb
|
147
|
-
|
158
|
+
web/Rakefile
|
data/Rakefile
CHANGED
@@ -40,9 +40,10 @@ h=Hoe.spec('statsample') do
|
|
40
40
|
#self.testlib=:minitest
|
41
41
|
self.rubyforge_name = "ruby-statsample"
|
42
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
43
|
-
self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"] << ["distribution", "~>0.2.0"]
|
44
|
+
|
45
|
+
self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"] << ["rserve-client", "~>0"] << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
|
44
46
|
|
45
|
-
self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
|
46
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
47
48
|
self.post_install_message = <<-EOF
|
48
49
|
***************************************************
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
2
|
+
|
3
|
+
extend BenchPress
|
4
|
+
cases=250
|
5
|
+
vars=20
|
6
|
+
|
7
|
+
|
8
|
+
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
|
9
|
+
author 'Clbustos'
|
10
|
+
date '2011-01-18'
|
11
|
+
summary "
|
12
|
+
A correlation matrix could be constructed using matrix algebra or
|
13
|
+
mannualy, calculating covariances, means and sd for each pair of vectors.
|
14
|
+
In this test, we test the calculation using #{vars} variables with
|
15
|
+
#{cases} cases on each vector
|
16
|
+
"
|
17
|
+
|
18
|
+
reps 200 #number of repetitions
|
19
|
+
|
20
|
+
ds=vars.times.inject({}) {|ac,v|
|
21
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
|
22
|
+
ac
|
23
|
+
}.to_dataset
|
24
|
+
|
25
|
+
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
26
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
27
|
+
end
|
28
|
+
|
29
|
+
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
|
30
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
31
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
2
|
+
|
3
|
+
extend BenchPress
|
4
|
+
cases=500
|
5
|
+
vars=5
|
6
|
+
|
7
|
+
|
8
|
+
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
|
9
|
+
author 'Clbustos'
|
10
|
+
date '2011-01-18'
|
11
|
+
summary "
|
12
|
+
A correlation matrix could be constructed using matrix algebra or
|
13
|
+
mannualy, calculating covariances, means and sd for each pair of vectors.
|
14
|
+
In this test, we test the calculation using #{vars} variables with
|
15
|
+
#{cases} cases on each vector
|
16
|
+
"
|
17
|
+
|
18
|
+
reps 200 #number of repetitions
|
19
|
+
|
20
|
+
|
21
|
+
ds=vars.times.inject({}) {|ac,v|
|
22
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
|
23
|
+
ac
|
24
|
+
}.to_dataset
|
25
|
+
|
26
|
+
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
27
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
28
|
+
end
|
29
|
+
|
30
|
+
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
|
31
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
32
|
+
end
|
Binary file
|
@@ -0,0 +1,93 @@
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
|
5
|
+
<title>Correlation matrix analysis</title>
|
6
|
+
<style>
|
7
|
+
body {
|
8
|
+
margin:0;
|
9
|
+
padding:1em;
|
10
|
+
}
|
11
|
+
table {
|
12
|
+
border-collapse: collapse;
|
13
|
+
|
14
|
+
}
|
15
|
+
table td {
|
16
|
+
border: 1px solid black;
|
17
|
+
}
|
18
|
+
.section {
|
19
|
+
margin:0.5em;
|
20
|
+
}
|
21
|
+
</style>
|
22
|
+
|
23
|
+
</head><body>
|
24
|
+
<h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
|
25
|
+
<ul>
|
26
|
+
<li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
|
27
|
+
<ul>
|
28
|
+
<li><a href='#toc_2'>ANOVA</a></li>
|
29
|
+
</ul>
|
30
|
+
<li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
|
31
|
+
<ul>
|
32
|
+
<li><a href='#toc_4'>ANOVA</a></li>
|
33
|
+
</ul>
|
34
|
+
</ul>
|
35
|
+
</div>
|
36
|
+
<div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
|
37
|
+
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
|
38
|
+
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
|
39
|
+
<p>Cases(listwise)=63(63)</p>
|
40
|
+
<p>R=0.978844</p>
|
41
|
+
<p>R^2=0.958137</p>
|
42
|
+
<p>R^2 Adj=0.956008</p>
|
43
|
+
<p>Std.Error R=3.092024</p>
|
44
|
+
<p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
|
45
|
+
<div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
|
46
|
+
<a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
|
47
|
+
<tbody>
|
48
|
+
<tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
|
49
|
+
<tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
|
50
|
+
<tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
|
51
|
+
</tbody>
|
52
|
+
</table>
|
53
|
+
|
54
|
+
</div>
|
55
|
+
<a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
|
56
|
+
<tbody>
|
57
|
+
<tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
|
58
|
+
<tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
|
59
|
+
<tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
|
60
|
+
<tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
|
61
|
+
</tbody>
|
62
|
+
</table>
|
63
|
+
|
64
|
+
</div>
|
65
|
+
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
|
66
|
+
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
|
67
|
+
<p>Cases(listwise)=63(63)</p>
|
68
|
+
<p>R=0.999637</p>
|
69
|
+
<p>R^2=0.999275</p>
|
70
|
+
<p>R^2 Adj=0.999238</p>
|
71
|
+
<p>Std.Error R=0.538365</p>
|
72
|
+
<p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
|
73
|
+
<div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
|
74
|
+
<a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
|
75
|
+
<tbody>
|
76
|
+
<tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
|
77
|
+
<tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
|
78
|
+
<tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
|
79
|
+
</tbody>
|
80
|
+
</table>
|
81
|
+
|
82
|
+
</div>
|
83
|
+
<a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
|
84
|
+
<tbody>
|
85
|
+
<tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
|
86
|
+
<tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
|
87
|
+
<tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
|
88
|
+
<tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
|
89
|
+
</tbody>
|
90
|
+
</table>
|
91
|
+
|
92
|
+
</div>
|
93
|
+
</body></html>
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This test create a database to adjust the best algorithm
|
2
|
+
# to use on correlation matrix
|
3
|
+
require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
|
4
|
+
require 'statsample'
|
5
|
+
require 'benchmark'
|
6
|
+
|
7
|
+
def create_dataset(vars,cases)
|
8
|
+
ran=Distribution::Normal.rng
|
9
|
+
ds=vars.times.inject({}) {|ac,v|
|
10
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
|
11
|
+
ac
|
12
|
+
}.to_dataset
|
13
|
+
end
|
14
|
+
|
15
|
+
def prediction_pairwise(vars,cases)
|
16
|
+
Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
|
17
|
+
end
|
18
|
+
def prediction_optimized(vars,cases)
|
19
|
+
Statsample::Bivariate.prediction_optimized(vars,cases) / 10
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
|
25
|
+
reps=100 #number of repetitions
|
26
|
+
ds_sizes=[5,10,30,50,100,150,200,500,1000]
|
27
|
+
ds_vars=[3,4,5,10,20,30,40]
|
28
|
+
#ds_sizes=[5,10]
|
29
|
+
#ds_vars=[3,5,20]
|
30
|
+
rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})
|
31
|
+
|
32
|
+
ds_sizes.each do |cases|
|
33
|
+
ds_vars.each do |vars|
|
34
|
+
ds=create_dataset(vars,cases)
|
35
|
+
time_optimized= Benchmark.realtime do
|
36
|
+
reps.times {
|
37
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
38
|
+
ds.clear_gsl
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
time_pairwise= Benchmark.realtime do
|
43
|
+
reps.times {
|
44
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
|
49
|
+
|
50
|
+
rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
else
|
55
|
+
rs=Statsample.load("correlation_matrix.ds")
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
rs.fields.each {|f| rs[f].type=:scale}
|
60
|
+
|
61
|
+
rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
|
62
|
+
|
63
|
+
rs.update_valid_data
|
64
|
+
rs.save("correlation_matrix.ds")
|
65
|
+
Statsample::Excel.write(rs,"correlation_matrix.xls")
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
rb=ReportBuilder.new(:name=>"Correlation matrix analysis")
|
70
|
+
|
71
|
+
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
|
72
|
+
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
|
73
|
+
|
74
|
+
|
75
|
+
rb.save_html("correlation_matrix.html")
|
Binary file
|
Binary file
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
2
|
+
|
3
|
+
extend BenchPress
|
4
|
+
|
5
|
+
|
6
|
+
name "Statsample::Factor::Map with and without GSL"
|
7
|
+
author 'Clbustos'
|
8
|
+
date '2011-01-18'
|
9
|
+
summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
|
10
|
+
"
|
11
|
+
|
12
|
+
reps 20 #number of repetitions
|
13
|
+
|
14
|
+
m=Matrix[
|
15
|
+
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
16
|
+
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
17
|
+
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
18
|
+
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
19
|
+
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
20
|
+
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
21
|
+
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
22
|
+
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
23
|
+
]
|
24
|
+
|
25
|
+
map=Statsample::Factor::MAP.new(m)
|
26
|
+
|
27
|
+
|
28
|
+
measure "Statsample::Factor::MAP without GSL" do
|
29
|
+
map.use_gsl=false
|
30
|
+
map.compute
|
31
|
+
end
|
32
|
+
|
33
|
+
measure "Statsample::Factor::MAP with GSL" do
|
34
|
+
map.use_gsl=true
|
35
|
+
map.compute
|
36
|
+
end
|
37
|
+
|
data/examples/boxplot.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
2
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
-
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
4
|
-
|
5
|
-
require 'benchmark'
|
6
3
|
require 'statsample'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
a.push(
|
13
|
-
a
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
4
|
+
Statsample::Analysis.store(Statsample::Graph::Boxplot) do
|
5
|
+
n=30
|
6
|
+
a=rnorm(n-1,50,10)
|
7
|
+
b=rnorm(n, 30,5)
|
8
|
+
c=rnorm(n,5,1)
|
9
|
+
a.push(2)
|
10
|
+
boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
if __FILE__==$0
|
15
|
+
Statsample::Analysis.run
|
16
|
+
end
|