statsample 0.18.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
data.tar.gz.sig
CHANGED
|
Binary file
|
data/History.txt
CHANGED
|
@@ -1,3 +1,26 @@
|
|
|
1
|
+
=== 1.0.0 / 2011-01-27
|
|
2
|
+
|
|
3
|
+
* Added Statsample::Analysis, a beautiful DSL to perform fast statistical analysis using statsample. See directory /examples
|
|
4
|
+
* Created benchmarks directory
|
|
5
|
+
* Removed Distribution module from statsample and moved to a gem. Changes on code to reflect new API
|
|
6
|
+
* Optimized simple regression. Better library detection
|
|
7
|
+
* New 'should_with_gsl' to test methods with gsl. Refactored Factor::MAP
|
|
8
|
+
* Almost complete GSL cleanup on Vector
|
|
9
|
+
* Updated some doc on Vector
|
|
10
|
+
* Used GSL::Matrix on Factor classes when available
|
|
11
|
+
* SkillScaleAnalysis doesn't crash with one or more vectors with 0 variance
|
|
12
|
+
* Modified examples using Statsample::Analysis
|
|
13
|
+
* Simplified eigen calculations
|
|
14
|
+
* Updated some examples. Added correlation matrix speed suite
|
|
15
|
+
* Correlation matrix optimized. Better specs
|
|
16
|
+
* Optimized correlation matrix. Use gsl matrix algebra or pairwise correlations depending on empiric calculated equations. See benchmarks/correlation_matrix.rb to see implementation of calculation
|
|
17
|
+
* Moved tests fixtures from data to test/fixtures
|
|
18
|
+
* Fixed some errors on tests
|
|
19
|
+
* Bug fix: constant_se on binomial regression have an error
|
|
20
|
+
* All test should work on ruby 1.9.3
|
|
21
|
+
* New Vector.[] and Vector.new_scale
|
|
22
|
+
* Detect linearly dependent predictors on OLS.
|
|
23
|
+
|
|
1
24
|
=== 0.18.0 / 2011-01-07
|
|
2
25
|
* New Statsample.load_excel
|
|
3
26
|
* New Statsample.load_csv
|
data/Manifest.txt
CHANGED
|
@@ -3,14 +3,19 @@ LICENSE.txt
|
|
|
3
3
|
Manifest.txt
|
|
4
4
|
README.txt
|
|
5
5
|
Rakefile
|
|
6
|
+
benchmarks/correlation_matrix_15_variables.rb
|
|
7
|
+
benchmarks/correlation_matrix_5_variables.rb
|
|
8
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.ds
|
|
9
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.html
|
|
10
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.rb
|
|
11
|
+
benchmarks/correlation_matrix_methods/correlation_matrix.xls
|
|
12
|
+
benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods
|
|
13
|
+
benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods
|
|
14
|
+
benchmarks/correlation_matrix_methods/results.ds
|
|
15
|
+
benchmarks/factor_map.rb
|
|
16
|
+
benchmarks/helpers_benchmark.rb
|
|
6
17
|
bin/statsample
|
|
7
|
-
data/crime.txt
|
|
8
|
-
data/hartman_23.matrix
|
|
9
18
|
data/locale/es/LC_MESSAGES/statsample.mo
|
|
10
|
-
data/repeated_fields.csv
|
|
11
|
-
data/test_binomial.csv
|
|
12
|
-
data/tetmat_matrix.txt
|
|
13
|
-
data/tetmat_test.txt
|
|
14
19
|
doc_latex/manual/equations.tex
|
|
15
20
|
examples/boxplot.rb
|
|
16
21
|
examples/correlation_matrix.rb
|
|
@@ -23,7 +28,6 @@ examples/levene.rb
|
|
|
23
28
|
examples/multiple_regression.rb
|
|
24
29
|
examples/multivariate_correlation.rb
|
|
25
30
|
examples/parallel_analysis.rb
|
|
26
|
-
examples/parallel_analysis_tetrachoric.rb
|
|
27
31
|
examples/polychoric.rb
|
|
28
32
|
examples/principal_axis.rb
|
|
29
33
|
examples/reliability.rb
|
|
@@ -34,15 +38,11 @@ examples/u_test.rb
|
|
|
34
38
|
examples/vector.rb
|
|
35
39
|
examples/velicer_map_test.rb
|
|
36
40
|
grab_references.rb
|
|
37
|
-
lib/distribution.rb
|
|
38
|
-
lib/distribution/chisquare.rb
|
|
39
|
-
lib/distribution/f.rb
|
|
40
|
-
lib/distribution/normal.rb
|
|
41
|
-
lib/distribution/normalbivariate.rb
|
|
42
|
-
lib/distribution/normalmultivariate.rb
|
|
43
|
-
lib/distribution/t.rb
|
|
44
41
|
lib/spss.rb
|
|
45
42
|
lib/statsample.rb
|
|
43
|
+
lib/statsample/analysis.rb
|
|
44
|
+
lib/statsample/analysis/suite.rb
|
|
45
|
+
lib/statsample/analysis/suitereportbuilder.rb
|
|
46
46
|
lib/statsample/anova.rb
|
|
47
47
|
lib/statsample/anova/oneway.rb
|
|
48
48
|
lib/statsample/anova/twoway.rb
|
|
@@ -91,6 +91,7 @@ lib/statsample/reliability/scaleanalysis.rb
|
|
|
91
91
|
lib/statsample/reliability/skillscaleanalysis.rb
|
|
92
92
|
lib/statsample/resample.rb
|
|
93
93
|
lib/statsample/rserve_extension.rb
|
|
94
|
+
lib/statsample/shorthand.rb
|
|
94
95
|
lib/statsample/srs.rb
|
|
95
96
|
lib/statsample/test.rb
|
|
96
97
|
lib/statsample/test/bartlettsphericity.rb
|
|
@@ -100,6 +101,7 @@ lib/statsample/test/levene.rb
|
|
|
100
101
|
lib/statsample/test/t.rb
|
|
101
102
|
lib/statsample/test/umannwhitney.rb
|
|
102
103
|
lib/statsample/vector.rb
|
|
104
|
+
lib/statsample/vector/gsl.rb
|
|
103
105
|
po/es/statsample.mo
|
|
104
106
|
po/es/statsample.po
|
|
105
107
|
po/statsample.pot
|
|
@@ -107,7 +109,16 @@ references.txt
|
|
|
107
109
|
setup.rb
|
|
108
110
|
test/fixtures/bank2.dat
|
|
109
111
|
test/fixtures/correlation_matrix.rb
|
|
112
|
+
test/fixtures/crime.txt
|
|
113
|
+
test/fixtures/hartman_23.matrix
|
|
114
|
+
test/fixtures/repeated_fields.csv
|
|
115
|
+
test/fixtures/test_binomial.csv
|
|
116
|
+
test/fixtures/test_csv.csv
|
|
117
|
+
test/fixtures/test_xls.xls
|
|
118
|
+
test/fixtures/tetmat_matrix.txt
|
|
119
|
+
test/fixtures/tetmat_test.txt
|
|
110
120
|
test/helpers_tests.rb
|
|
121
|
+
test/test_analysis.rb
|
|
111
122
|
test/test_anovaoneway.rb
|
|
112
123
|
test/test_anovatwoway.rb
|
|
113
124
|
test/test_anovatwowaywithdataset.rb
|
|
@@ -116,12 +127,12 @@ test/test_bartlettsphericity.rb
|
|
|
116
127
|
test/test_bivariate.rb
|
|
117
128
|
test/test_codification.rb
|
|
118
129
|
test/test_crosstab.rb
|
|
119
|
-
test/test_csv.csv
|
|
120
130
|
test/test_csv.rb
|
|
121
131
|
test/test_dataset.rb
|
|
122
|
-
test/test_distribution.rb
|
|
123
132
|
test/test_dominance_analysis.rb
|
|
124
133
|
test/test_factor.rb
|
|
134
|
+
test/test_factor_map.rb
|
|
135
|
+
test/test_factor_pa.rb
|
|
125
136
|
test/test_ggobi.rb
|
|
126
137
|
test/test_gsl.rb
|
|
127
138
|
test/test_histogram.rb
|
|
@@ -144,4 +155,4 @@ test/test_test_t.rb
|
|
|
144
155
|
test/test_umannwhitney.rb
|
|
145
156
|
test/test_vector.rb
|
|
146
157
|
test/test_xls.rb
|
|
147
|
-
|
|
158
|
+
web/Rakefile
|
data/Rakefile
CHANGED
|
@@ -40,9 +40,10 @@ h=Hoe.spec('statsample') do
|
|
|
40
40
|
#self.testlib=:minitest
|
|
41
41
|
self.rubyforge_name = "ruby-statsample"
|
|
42
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
|
43
|
-
self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"]
|
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.5"] << ["reportbuilder", "~>1.4"] << ["minimization", "~>0.2.0"] << ["fastercsv", ">0"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.3.1"] << ["statsample-bivariate-extension", ">0"] << ["rserve-client", "~>0.2.5"] << ["rubyvis", "~>0.4.0"] << ["distribution", "~>0.2.0"]
|
|
44
|
+
|
|
45
|
+
self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"] << ["rserve-client", "~>0"] << ["gettext", "~>0"] << ["mocha", "~>0"] << ["hoe-git", "~>0"]
|
|
44
46
|
|
|
45
|
-
self.extra_dev_deps << ["hoe","~>0"] << ["shoulda","~>0"] << ["minitest", "~>2.0"]
|
|
46
47
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
|
47
48
|
self.post_install_message = <<-EOF
|
|
48
49
|
***************************************************
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
|
2
|
+
|
|
3
|
+
extend BenchPress
|
|
4
|
+
cases=250
|
|
5
|
+
vars=20
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
|
|
9
|
+
author 'Clbustos'
|
|
10
|
+
date '2011-01-18'
|
|
11
|
+
summary "
|
|
12
|
+
A correlation matrix could be constructed using matrix algebra or
|
|
13
|
+
mannualy, calculating covariances, means and sd for each pair of vectors.
|
|
14
|
+
In this test, we test the calculation using #{vars} variables with
|
|
15
|
+
#{cases} cases on each vector
|
|
16
|
+
"
|
|
17
|
+
|
|
18
|
+
reps 200 #number of repetitions
|
|
19
|
+
|
|
20
|
+
ds=vars.times.inject({}) {|ac,v|
|
|
21
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
|
|
22
|
+
ac
|
|
23
|
+
}.to_dataset
|
|
24
|
+
|
|
25
|
+
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
|
26
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
|
|
30
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
|
31
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
|
2
|
+
|
|
3
|
+
extend BenchPress
|
|
4
|
+
cases=500
|
|
5
|
+
vars=5
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
|
|
9
|
+
author 'Clbustos'
|
|
10
|
+
date '2011-01-18'
|
|
11
|
+
summary "
|
|
12
|
+
A correlation matrix could be constructed using matrix algebra or
|
|
13
|
+
mannualy, calculating covariances, means and sd for each pair of vectors.
|
|
14
|
+
In this test, we test the calculation using #{vars} variables with
|
|
15
|
+
#{cases} cases on each vector
|
|
16
|
+
"
|
|
17
|
+
|
|
18
|
+
reps 200 #number of repetitions
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
ds=vars.times.inject({}) {|ac,v|
|
|
22
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {rand()}
|
|
23
|
+
ac
|
|
24
|
+
}.to_dataset
|
|
25
|
+
|
|
26
|
+
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
|
27
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
measure "Statsample::Bivariate.correlation_matrix_pairwise" do
|
|
31
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
|
32
|
+
end
|
|
Binary file
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" >
|
|
5
|
+
<title>Correlation matrix analysis</title>
|
|
6
|
+
<style>
|
|
7
|
+
body {
|
|
8
|
+
margin:0;
|
|
9
|
+
padding:1em;
|
|
10
|
+
}
|
|
11
|
+
table {
|
|
12
|
+
border-collapse: collapse;
|
|
13
|
+
|
|
14
|
+
}
|
|
15
|
+
table td {
|
|
16
|
+
border: 1px solid black;
|
|
17
|
+
}
|
|
18
|
+
.section {
|
|
19
|
+
margin:0.5em;
|
|
20
|
+
}
|
|
21
|
+
</style>
|
|
22
|
+
|
|
23
|
+
</head><body>
|
|
24
|
+
<h1>Correlation matrix analysis</h1><div id='toc'><div class='title'>List of contents</div>
|
|
25
|
+
<ul>
|
|
26
|
+
<li><a href='#toc_1'>Multiple reggresion of cases,vars,c_v on time_optimized</a></li>
|
|
27
|
+
<ul>
|
|
28
|
+
<li><a href='#toc_2'>ANOVA</a></li>
|
|
29
|
+
</ul>
|
|
30
|
+
<li><a href='#toc_3'>Multiple reggresion of cases,vars,c_v on time_pairwise</a></li>
|
|
31
|
+
<ul>
|
|
32
|
+
<li><a href='#toc_4'>ANOVA</a></li>
|
|
33
|
+
</ul>
|
|
34
|
+
</ul>
|
|
35
|
+
</div>
|
|
36
|
+
<div class='tot'><div class='title'>List of tables</div><ul><li><a href='#table_1'>ANOVA Table</a></li><li><a href='#table_2'>Beta coefficients</a></li><li><a href='#table_3'>ANOVA Table</a></li><li><a href='#table_4'>Beta coefficients</a></li></ul></div>
|
|
37
|
+
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_optimized</h2><a name='toc_1'></a>
|
|
38
|
+
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
|
|
39
|
+
<p>Cases(listwise)=63(63)</p>
|
|
40
|
+
<p>R=0.978844</p>
|
|
41
|
+
<p>R^2=0.958137</p>
|
|
42
|
+
<p>R^2 Adj=0.956008</p>
|
|
43
|
+
<p>Std.Error R=3.092024</p>
|
|
44
|
+
<p>Equation=4.031667 + 0.018039cases + 0.244790vars + 0.001197c_v</p>
|
|
45
|
+
<div class='section'><h3>ANOVA</h3><a name='toc_2'></a>
|
|
46
|
+
<a name='table_1'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
|
|
47
|
+
<tbody>
|
|
48
|
+
<tr><td>Regression</td><td>12910.098</td><td>3</td><td>4303.366</td><td>450.114</td><td>0.000</td></tr>
|
|
49
|
+
<tr><td>Error</td><td>564.076</td><td>59</td><td>9.561</td><td></td><td></td></tr>
|
|
50
|
+
<tr><td>Total</td><td>13474.174</td><td>62</td><td>4312.927</td><td></td><td></td></tr>
|
|
51
|
+
</tbody>
|
|
52
|
+
</table>
|
|
53
|
+
|
|
54
|
+
</div>
|
|
55
|
+
<a name='table_2'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
|
|
56
|
+
<tbody>
|
|
57
|
+
<tr><td>Constant</td><td>4.031667</td><td>-</td><td>0.752604</td><td>5.356953</td></tr>
|
|
58
|
+
<tr><td>cases</td><td>0.018039</td><td>0.381587</td><td>0.001961</td><td>9.200093</td></tr>
|
|
59
|
+
<tr><td>vars</td><td>0.244790</td><td>0.224390</td><td>0.036055</td><td>6.789335</td></tr>
|
|
60
|
+
<tr><td>c_v</td><td>0.001197</td><td>0.584174</td><td>0.000094</td><td>12.738410</td></tr>
|
|
61
|
+
</tbody>
|
|
62
|
+
</table>
|
|
63
|
+
|
|
64
|
+
</div>
|
|
65
|
+
<div class='section'><h2>Multiple reggresion of cases,vars,c_v on time_pairwise</h2><a name='toc_3'></a>
|
|
66
|
+
<p>Engine: Statsample::Regression::Multiple::RubyEngine</p>
|
|
67
|
+
<p>Cases(listwise)=63(63)</p>
|
|
68
|
+
<p>R=0.999637</p>
|
|
69
|
+
<p>R^2=0.999275</p>
|
|
70
|
+
<p>R^2 Adj=0.999238</p>
|
|
71
|
+
<p>Std.Error R=0.538365</p>
|
|
72
|
+
<p>Equation=-0.520303 + -0.000708cases + 1.234451vars + 0.000735c_v</p>
|
|
73
|
+
<div class='section'><h3>ANOVA</h3><a name='toc_4'></a>
|
|
74
|
+
<a name='table_3'></a><table><caption>ANOVA Table</caption><thead><th>source</th><th>ss</th><th>df</th><th>ms</th><th>f</th><th>p</th></thead>
|
|
75
|
+
<tbody>
|
|
76
|
+
<tr><td>Regression</td><td>23554.271</td><td>3</td><td>7851.424</td><td>27089.134</td><td>0.000</td></tr>
|
|
77
|
+
<tr><td>Error</td><td>17.100</td><td>59</td><td>0.290</td><td></td><td></td></tr>
|
|
78
|
+
<tr><td>Total</td><td>23571.372</td><td>62</td><td>7851.714</td><td></td><td></td></tr>
|
|
79
|
+
</tbody>
|
|
80
|
+
</table>
|
|
81
|
+
|
|
82
|
+
</div>
|
|
83
|
+
<a name='table_4'></a><table><caption>Beta coefficients</caption><thead><th>coeff</th><th>b</th><th>beta</th><th>se</th><th>t</th></thead>
|
|
84
|
+
<tbody>
|
|
85
|
+
<tr><td>Constant</td><td>-0.520303</td><td>-</td><td>0.131039</td><td>-3.970594</td></tr>
|
|
86
|
+
<tr><td>cases</td><td>-0.000708</td><td>-0.011324</td><td>0.000341</td><td>-2.074007</td></tr>
|
|
87
|
+
<tr><td>vars</td><td>1.234451</td><td>0.855546</td><td>0.006278</td><td>196.641087</td></tr>
|
|
88
|
+
<tr><td>c_v</td><td>0.000735</td><td>0.271138</td><td>0.000016</td><td>44.912972</td></tr>
|
|
89
|
+
</tbody>
|
|
90
|
+
</table>
|
|
91
|
+
|
|
92
|
+
</div>
|
|
93
|
+
</body></html>
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# This test create a database to adjust the best algorithm
|
|
2
|
+
# to use on correlation matrix
|
|
3
|
+
require(File.expand_path(File.dirname(__FILE__)+'/../helpers_benchmark.rb'))
|
|
4
|
+
require 'statsample'
|
|
5
|
+
require 'benchmark'
|
|
6
|
+
|
|
7
|
+
def create_dataset(vars,cases)
|
|
8
|
+
ran=Distribution::Normal.rng
|
|
9
|
+
ds=vars.times.inject({}) {|ac,v|
|
|
10
|
+
ac["x#{v}"]=Statsample::Vector.new_scale(cases) {ran.call}
|
|
11
|
+
ac
|
|
12
|
+
}.to_dataset
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def prediction_pairwise(vars,cases)
|
|
16
|
+
Statsample::Bivariate.prediction_pairwise(vars,cases) / 10
|
|
17
|
+
end
|
|
18
|
+
def prediction_optimized(vars,cases)
|
|
19
|
+
Statsample::Bivariate.prediction_optimized(vars,cases) / 10
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
|
|
25
|
+
reps=100 #number of repetitions
|
|
26
|
+
ds_sizes=[5,10,30,50,100,150,200,500,1000]
|
|
27
|
+
ds_vars=[3,4,5,10,20,30,40]
|
|
28
|
+
#ds_sizes=[5,10]
|
|
29
|
+
#ds_vars=[3,5,20]
|
|
30
|
+
rs=Statsample::Dataset.new(%w{cases vars time_optimized time_pairwise})
|
|
31
|
+
|
|
32
|
+
ds_sizes.each do |cases|
|
|
33
|
+
ds_vars.each do |vars|
|
|
34
|
+
ds=create_dataset(vars,cases)
|
|
35
|
+
time_optimized= Benchmark.realtime do
|
|
36
|
+
reps.times {
|
|
37
|
+
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
|
38
|
+
ds.clear_gsl
|
|
39
|
+
}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
time_pairwise= Benchmark.realtime do
|
|
43
|
+
reps.times {
|
|
44
|
+
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
|
45
|
+
}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
|
|
49
|
+
|
|
50
|
+
rs.add_case({'cases'=>cases,'vars'=>vars,'time_optimized'=>Math.sqrt(time_optimized*1000),'time_pairwise'=>Math.sqrt(time_pairwise*1000)})
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
else
|
|
55
|
+
rs=Statsample.load("correlation_matrix.ds")
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
rs.fields.each {|f| rs[f].type=:scale}
|
|
60
|
+
|
|
61
|
+
rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
|
|
62
|
+
|
|
63
|
+
rs.update_valid_data
|
|
64
|
+
rs.save("correlation_matrix.ds")
|
|
65
|
+
Statsample::Excel.write(rs,"correlation_matrix.xls")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
rb=ReportBuilder.new(:name=>"Correlation matrix analysis")
|
|
70
|
+
|
|
71
|
+
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
|
|
72
|
+
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
rb.save_html("correlation_matrix.html")
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
|
2
|
+
|
|
3
|
+
extend BenchPress
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
name "Statsample::Factor::Map with and without GSL"
|
|
7
|
+
author 'Clbustos'
|
|
8
|
+
date '2011-01-18'
|
|
9
|
+
summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
|
|
10
|
+
"
|
|
11
|
+
|
|
12
|
+
reps 20 #number of repetitions
|
|
13
|
+
|
|
14
|
+
m=Matrix[
|
|
15
|
+
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
|
16
|
+
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
|
17
|
+
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
|
18
|
+
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
|
19
|
+
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
|
20
|
+
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
|
21
|
+
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
|
22
|
+
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
map=Statsample::Factor::MAP.new(m)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
measure "Statsample::Factor::MAP without GSL" do
|
|
29
|
+
map.use_gsl=false
|
|
30
|
+
map.compute
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
measure "Statsample::Factor::MAP with GSL" do
|
|
34
|
+
map.use_gsl=true
|
|
35
|
+
map.compute
|
|
36
|
+
end
|
|
37
|
+
|
data/examples/boxplot.rb
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/ruby
|
|
2
2
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
-
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
|
4
|
-
|
|
5
|
-
require 'benchmark'
|
|
6
3
|
require 'statsample'
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
a.push(
|
|
13
|
-
a
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
4
|
+
Statsample::Analysis.store(Statsample::Graph::Boxplot) do
|
|
5
|
+
n=30
|
|
6
|
+
a=rnorm(n-1,50,10)
|
|
7
|
+
b=rnorm(n, 30,5)
|
|
8
|
+
c=rnorm(n,5,1)
|
|
9
|
+
a.push(2)
|
|
10
|
+
boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
|
|
11
|
+
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
if __FILE__==$0
|
|
15
|
+
Statsample::Analysis.run
|
|
16
|
+
end
|