statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 244a3b640d29832affc12304a77aa447f4f2df0d
|
4
|
+
data.tar.gz: 08261176b4763367e65036c486e699491e209e7d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 125beac1e267c030c6d4b70b84543a7888f3b8fcb43e6475913b00f7332dd5ce01bb6330813c3c99f5d649add677a339dc8ef366b466f2710aeaaf4bf20c0cdf
|
7
|
+
data.tar.gz: 402c505e62785e2f1eacb28b8798fed26193ed54171f7dce2a106584800ffd3de657013559c55df9e20fe5827fe6309c53f526213fc9ec23bfa2a95bf086d38e
|
data/.build.sh
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
git clone https://github.com/SciRuby/nmatrix.git
|
4
|
+
cd nmatrix
|
5
|
+
gem build nmatrix.gemspec
|
6
|
+
gem install nmatrix-0.1.0.gem
|
7
|
+
cd ..
|
8
|
+
rm -rf nmatrix
|
9
|
+
git clone https://github.com/v0dro/gsl-nmatrix
|
10
|
+
cd gsl-nmatrix
|
11
|
+
gem build gsl-nmatrix.gemspec
|
12
|
+
gem install gsl-nmatrix-1.17.gem
|
13
|
+
cd ..
|
14
|
+
rm -rf gsl-nmatrix
|
15
|
+
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,16 +1,28 @@
|
|
1
1
|
language:
|
2
2
|
ruby
|
3
3
|
|
4
|
+
env:
|
5
|
+
- CPLUS_INCLUDE_PATH=/usr/include/atlas C_INCLUDE_PATH=/usr/include/atlas
|
6
|
+
|
4
7
|
rvm:
|
5
|
-
- 1.9.3
|
6
|
-
- 2.0
|
7
|
-
- 2.1
|
8
|
-
- 2.2
|
8
|
+
- '1.9.3'
|
9
|
+
- '2.0'
|
10
|
+
- '2.1'
|
11
|
+
- '2.2'
|
12
|
+
|
13
|
+
matrix:
|
14
|
+
fast_finish:
|
15
|
+
true
|
16
|
+
|
17
|
+
script: "bundle exec rake test"
|
18
|
+
|
19
|
+
install:
|
20
|
+
- gem install bundler
|
21
|
+
- ./.build.sh
|
22
|
+
- bundle install
|
9
23
|
|
10
|
-
script:
|
11
|
-
bundle exec rake test
|
12
|
-
|
13
24
|
before_install:
|
14
25
|
- sudo apt-get update -qq
|
26
|
+
- sudo apt-get install -qq libatlas-base-dev
|
15
27
|
- sudo apt-get install -y libgsl0-dev r-base r-base-dev
|
16
28
|
- sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# Contributing guide
|
2
|
+
|
3
|
+
## Installing statsample development dependencies
|
4
|
+
|
5
|
+
If you want to run the full test suite, you will need the latest unreleased nmatrix and gsl-nmatrix ruby gems. They will be released upstream soon but please follow this procdure for now.
|
6
|
+
|
7
|
+
Keep in mind that either nmatrix OR gsl-nmatrix are NOT NECESSARY for using statsample. They are just required for an optional speed up.
|
8
|
+
|
9
|
+
Statsample also works with [rb-gsl](https://github.com/blackwinter/rb-gsl), though installing that will cause a problem if you have any nmatrix dependent code because narray and nmatrix have a namespace problem.
|
10
|
+
|
11
|
+
To install dependencies, execute the following commands:
|
12
|
+
|
13
|
+
`export CPLUS_INCLUDE_PATH=/usr/include/atlas`
|
14
|
+
`export C_INCLUDE_PATH=/usr/include/atlas`
|
15
|
+
`sudo apt-get update -qq`
|
16
|
+
`sudo apt-get install -qq libatlas-base-dev`
|
17
|
+
`sudo apt-get --purge remove liblapack-dev liblapack3 liblapack3gf`
|
18
|
+
`sudo apt-get install -y libgsl0-dev r-base r-base-dev`
|
19
|
+
`sudo Rscript -e "install.packages(c('Rserve','irr'),,'http://cran.us.r-project.org')"`
|
20
|
+
|
21
|
+
Then execute the .build.sh script to clone and install the latest nmatrix and gsl-nmatrix on your system:
|
22
|
+
|
23
|
+
`./.build.sh`
|
24
|
+
|
25
|
+
Then finally install remaining dependencies:
|
26
|
+
|
27
|
+
`bundle install`
|
28
|
+
|
29
|
+
And run the test suite (should be all green):
|
30
|
+
|
31
|
+
`bundle exec rake test`
|
32
|
+
|
33
|
+
If you have problems installing nmatrix, please consult the [nmatrix installation wiki](https://github.com/SciRuby/nmatrix/wiki/Installation) or the [mailing list](https://groups.google.com/forum/#!forum/sciruby-dev).
|
data/History.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
=== 2.0.0 / 2015-06-20
|
2
|
+
* Added dependency on daru and replaced Statsample::Vector and Dataset with
|
3
|
+
Daru::Vector and Daru::DataFrame.
|
4
|
+
* NMatrix and gsl-nmatrix are used as development dependencies.
|
5
|
+
|
1
6
|
=== 1.5.0 / 2015-06-11
|
2
7
|
* Made sure all methods work properly with and without GSL.
|
3
8
|
* Statsample works with either rb-gsl or gsl-nmatrix.
|
data/README.md
CHANGED
@@ -32,18 +32,56 @@ If you need to work on Structural Equation Modeling, you could see +statsample-s
|
|
32
32
|
```bash
|
33
33
|
$ [sudo] gem install statsample-sem
|
34
34
|
```
|
35
|
+
# Testing
|
36
|
+
|
37
|
+
See CONTRIBUTING for information on testing and contributing to statsample.
|
35
38
|
|
36
39
|
# Documentation
|
37
40
|
|
38
41
|
You can see the latest documentation in [rubydoc.info](http://www.rubydoc.info/github/sciruby/statsample/master).
|
39
42
|
|
43
|
+
# Usage
|
44
|
+
|
45
|
+
## Notebooks
|
46
|
+
|
47
|
+
You can see some iruby notebooks here:
|
48
|
+
|
49
|
+
### Statistics
|
50
|
+
|
51
|
+
* [Correlation Matrix with daru and statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Correlation%20Matrix%20with%20daru%20and%20statsample.ipynb)
|
52
|
+
* [Dominance Analysis with statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Dominance%20Analysis%20with%20statsample.ipynb)
|
53
|
+
* [Reliability ICC](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Reliability%20ICC%20with%20statsample.ipynb)
|
54
|
+
* [Levene Test](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Levene%20Test.ipynb)
|
55
|
+
* [Multiple Regression](http://nbviewer.ipython.org/github/v0dro/sciruby-notebooks/blob/master/Statistics/Multiple%20Regression.ipynb)
|
56
|
+
* [Parallel Analysis on PCA](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Parallel%20Analysis%20on%20PCA.ipynb)
|
57
|
+
* [Polychoric Analysis](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Polychoric%20Correlation.ipynb)
|
58
|
+
* [Reliability Scale and Multiscale Analysis](https://github.com/SciRuby/sciruby-notebooks/blob/master/Statistics/Reliability%20Scale%20Analysis.ipynb)
|
59
|
+
* [Velicer MAP Test](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Statistics/Velicer%20MAP%20test.ipynb)
|
60
|
+
|
61
|
+
### Visualizations
|
62
|
+
|
63
|
+
* [Creating Boxplots with daru and statsample](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Boxplot%20with%20daru%20and%20statsample.ipynb)
|
64
|
+
* [Creating A Histogram](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Creating%20a%20Histogram.ipynb)
|
65
|
+
* [Creating a Scatterplot](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Scatterplot%20with%20statsample.ipynb)
|
66
|
+
|
67
|
+
### Working with DataFrame and Vector
|
68
|
+
|
69
|
+
* [Creating Vectors and DataFrames with daru](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Creation%20of%20Vector%20and%20DataFrame.ipynb)
|
70
|
+
* [Detailed Usage of Daru::Vector](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb)
|
71
|
+
* [Detailed Usage of Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20DataFrame.ipynb)
|
72
|
+
* [Visualizing Data with Daru::DataFrame](http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Visualization/Visualizing%20data%20with%20daru%20DataFrame.ipynb)
|
73
|
+
|
74
|
+
## Examples
|
75
|
+
|
76
|
+
See the /examples directory for some use cases. The notebooks listed above have mostly
|
77
|
+
the same examples, and they look better so you might want to see that first.
|
78
|
+
|
40
79
|
# Description
|
41
80
|
|
42
81
|
A suite for basic and advanced statistics on Ruby. Tested on CRuby 1.9.3, 2.0.0 and 2.1.1. See `.travis.yml` for more information.
|
43
82
|
|
44
83
|
Include:
|
45
84
|
- Descriptive statistics: frequencies, median, mean, standard error, skew, kurtosis (and many others).
|
46
|
-
- Imports and exports datasets from and to Excel, CSV and plain text files.
|
47
85
|
- Correlations: Pearson's r, Spearman's rank correlation (rho), point biserial, tau a, tau b and gamma. Tetrachoric and Polychoric correlation provides by +statsample-bivariate-extension+ gem.
|
48
86
|
- Intra-class correlation
|
49
87
|
- Anova: generic and vector-based One-way ANOVA and Two-way ANOVA, with contrasts for One-way ANOVA.
|
@@ -75,8 +113,7 @@ Include:
|
|
75
113
|
# Features
|
76
114
|
|
77
115
|
- Classes for manipulation and storage of data:
|
78
|
-
-
|
79
|
-
- Statsample::Dataset: a group of Statsample::Vector, analog to a excel spreadsheet or a dataframe on R. The base of almost all operations on statsample.
|
116
|
+
- Uses [daru](https://github.com/v0dro/daru) for storing data and basic statistics.
|
80
117
|
- Statsample::Multiset: multiple datasets with same fields and type of vectors
|
81
118
|
- Anova module provides generic Statsample::Anova::OneWay and vector based Statsample::Anova::OneWayWithVectors. Also you can create contrast using Statsample::Anova::Contrast
|
82
119
|
- Module Statsample::Bivariate provides covariance and pearson, spearman, point biserial, tau a, tau b, gamma, tetrachoric (see Bivariate::Tetrachoric) and polychoric (see Bivariate::Polychoric) correlations. Include methods to create correlation and covariance matrices
|
@@ -100,10 +137,7 @@ Include:
|
|
100
137
|
- Statsample::DominanceAnalysis class can report dominance analysis for a sample, using uni or multivariate dependent variables
|
101
138
|
- Statsample::DominanceAnalysis::Bootstrap can execute bootstrap analysis to determine dominance stability, as recomended by Azen & Budescu (2003) link[http://psycnet.apa.org/journals/met/8/2/129/].
|
102
139
|
- Module Statsample::Codification, to help to codify open questions
|
103
|
-
- Converters to
|
104
|
-
- Statsample::Database : Can create sql to create tables, read and insert data
|
105
|
-
- Statsample::CSV : Read and write CSV files
|
106
|
-
- Statsample::Excel : Read and write Excel files
|
140
|
+
- Converters to export data:
|
107
141
|
- Statsample::Mx : Write Mx Files
|
108
142
|
- Statsample::GGobi : Write Ggobi files
|
109
143
|
- Module Statsample::Crosstab provides function to create crosstab for categorical data
|
@@ -130,52 +164,6 @@ Include:
|
|
130
164
|
- Gem <tt>statsample-glm</tt> provides you with GML method, to work with Logistic, Poisson and Gaussian regression ,using ML or IRWLS.
|
131
165
|
- Close integration with gem <tt>reportbuilder</tt>, to easily create reports on text, html and rtf formats.
|
132
166
|
|
133
|
-
# Usage
|
134
|
-
|
135
|
-
See the [examples folder](https://github.com/clbustos/statsample/tree/master/examples/) too.
|
136
|
-
|
137
|
-
## Boxplot
|
138
|
-
|
139
|
-
```ruby
|
140
|
-
require 'statsample'
|
141
|
-
|
142
|
-
ss_analysis(Statsample::Graph::Boxplot) do
|
143
|
-
n = 30
|
144
|
-
a = rnorm(n-1, 50, 10)
|
145
|
-
b = rnorm(n, 30, 5)
|
146
|
-
c = rnorm(n, 5, 1)
|
147
|
-
a.push(2)
|
148
|
-
boxplot(vectors: [a, b, c],
|
149
|
-
width: 300,
|
150
|
-
height: 300,
|
151
|
-
groups: %w{first first second},
|
152
|
-
minimum: 0)
|
153
|
-
end
|
154
|
-
|
155
|
-
Statsample::Analysis.run # Open svg file on *nix application defined
|
156
|
-
```
|
157
|
-
|
158
|
-
## Correlation matrix
|
159
|
-
|
160
|
-
```ruby
|
161
|
-
require 'statsample'
|
162
|
-
# Note R like generation of random gaussian variable
|
163
|
-
# and correlation matrix
|
164
|
-
|
165
|
-
ss_analysis("Statsample::Bivariate.correlation_matrix") do
|
166
|
-
samples = 1000
|
167
|
-
ds = data_frame(
|
168
|
-
'a' => rnorm(samples),
|
169
|
-
'b' => rnorm(samples),
|
170
|
-
'c' => rnorm(samples),
|
171
|
-
'd' => rnorm(samples))
|
172
|
-
cm = cor(ds)
|
173
|
-
summary(cm)
|
174
|
-
end
|
175
|
-
|
176
|
-
Statsample::Analysis.run_batch # Echo output to console
|
177
|
-
```
|
178
|
-
|
179
167
|
# Resources
|
180
168
|
|
181
169
|
- Source code on github :: http://github.com/sciruby/statsample
|
@@ -4,7 +4,6 @@ extend BenchPress
|
|
4
4
|
cases=250
|
5
5
|
vars=20
|
6
6
|
|
7
|
-
|
8
7
|
name "gsl matrix based vs. manual ruby correlation matrix (#{vars} vars, #{cases} cases)"
|
9
8
|
author 'Clbustos'
|
10
9
|
date '2011-01-18'
|
@@ -17,10 +16,12 @@ In this test, we test the calculation using #{vars} variables with
|
|
17
16
|
|
18
17
|
reps 200 #number of repetitions
|
19
18
|
|
20
|
-
ds=
|
21
|
-
|
22
|
-
ac
|
23
|
-
|
19
|
+
ds = Daru::DataFrame.new(
|
20
|
+
vars.times.inject({}) do |ac,v|
|
21
|
+
ac["x#{v}".to_sym]=Daru::Vector.new_with_size(cases) {rand()}
|
22
|
+
ac
|
23
|
+
end
|
24
|
+
)
|
24
25
|
|
25
26
|
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
26
27
|
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
@@ -17,11 +17,12 @@ In this test, we test the calculation using #{vars} variables with
|
|
17
17
|
|
18
18
|
reps 200 #number of repetitions
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
ac["x#{v}"]=
|
23
|
-
ac
|
24
|
-
|
20
|
+
ds = Daru::DataFrame.new(
|
21
|
+
vars.times.inject({}) do |ac,v|
|
22
|
+
ac["x#{v}".to_sym]=Daru::Vector.new_with_size(cases) {rand()}
|
23
|
+
ac
|
24
|
+
end
|
25
|
+
)
|
25
26
|
|
26
27
|
measure "Statsample::Bivariate.correlation_matrix_optimized" do
|
27
28
|
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
@@ -5,11 +5,13 @@ require 'statsample'
|
|
5
5
|
require 'benchmark'
|
6
6
|
|
7
7
|
def create_dataset(vars,cases)
|
8
|
-
ran=Distribution::Normal.rng
|
9
|
-
ds=
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
ran = Distribution::Normal.rng
|
9
|
+
ds = Daru::DataFrame.new(
|
10
|
+
vars.times.inject({}) do |ac,v|
|
11
|
+
ac["x#{v}".to_sym] = Daru::Vector.new_with_size(cases) {ran.call}
|
12
|
+
ac
|
13
|
+
end
|
14
|
+
)
|
13
15
|
end
|
14
16
|
|
15
17
|
def prediction_pairwise(vars,cases)
|
@@ -19,19 +21,17 @@ def prediction_optimized(vars,cases)
|
|
19
21
|
Statsample::Bivariate.prediction_optimized(vars,cases) / 10
|
20
22
|
end
|
21
23
|
|
22
|
-
|
23
|
-
|
24
24
|
if !File.exists?("correlation_matrix.ds") or File.mtime(__FILE__) > File.mtime("correlation_matrix.ds")
|
25
25
|
reps=100 #number of repetitions
|
26
26
|
ds_sizes=[5,10,30,50,100,150,200,500,1000]
|
27
27
|
ds_vars=[3,4,5,10,20,30,40]
|
28
28
|
#ds_sizes=[5,10]
|
29
29
|
#ds_vars=[3,5,20]
|
30
|
-
rs=
|
30
|
+
rs = Daru::DataFrame.new({}, order: [:cases, :vars, :time_optimized, :time_pairwise])
|
31
31
|
|
32
32
|
ds_sizes.each do |cases|
|
33
33
|
ds_vars.each do |vars|
|
34
|
-
ds=create_dataset(vars,cases)
|
34
|
+
ds = create_dataset(vars,cases)
|
35
35
|
time_optimized= Benchmark.realtime do
|
36
36
|
reps.times {
|
37
37
|
Statsample::Bivariate.correlation_matrix_optimized(ds)
|
@@ -40,36 +40,33 @@ ds_sizes.each do |cases|
|
|
40
40
|
end
|
41
41
|
|
42
42
|
time_pairwise= Benchmark.realtime do
|
43
|
-
reps.times {
|
44
|
-
Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
45
|
-
}
|
43
|
+
reps.times { Statsample::Bivariate.correlation_matrix_pairwise(ds) }
|
46
44
|
end
|
47
45
|
|
48
46
|
puts "Cases:#{cases}, vars:#{vars} -> opt:%0.3f (%0.3f) | pair: %0.3f (%0.3f)" % [time_optimized, prediction_optimized(vars,cases), time_pairwise, prediction_pairwise(vars,cases)]
|
49
47
|
|
50
|
-
rs.
|
48
|
+
rs.add_row(Daru::Vector.new({
|
49
|
+
:cases => cases,
|
50
|
+
:vars => vars,
|
51
|
+
:time_optimized => Math.sqrt(time_optimized*1000),
|
52
|
+
:time_pairwise =>Math.sqrt(time_pairwise*1000)
|
53
|
+
})
|
54
|
+
)
|
51
55
|
end
|
52
|
-
end
|
53
|
-
|
56
|
+
end
|
54
57
|
else
|
55
58
|
rs=Statsample.load("correlation_matrix.ds")
|
56
59
|
end
|
57
60
|
|
61
|
+
rs[:c_v] = rs.collect {|row| row[:cases]*row[:vars]}
|
58
62
|
|
59
|
-
rs.
|
60
|
-
|
61
|
-
rs['c_v']=rs.collect {|row| row['cases']*row['vars']}
|
62
|
-
|
63
|
-
rs.update_valid_data
|
63
|
+
rs.update
|
64
64
|
rs.save("correlation_matrix.ds")
|
65
65
|
Statsample::Excel.write(rs,"correlation_matrix.xls")
|
66
66
|
|
67
|
+
rb = ReportBuilder.new(:name=>"Correlation matrix analysis")
|
67
68
|
|
68
|
-
|
69
|
-
rb
|
70
|
-
|
71
|
-
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_optimized','c_v']],'time_optimized', :digits=>6))
|
72
|
-
rb.add(Statsample::Regression.multiple(rs[['cases','vars','time_pairwise','c_v']],'time_pairwise', :digits=>6))
|
73
|
-
|
69
|
+
rb.add(Statsample::Regression.multiple(rs[:cases,:vars,:time_optimized,:c_v],:time_optimized, :digits=>6))
|
70
|
+
rb.add(Statsample::Regression.multiple(rs[:cases,:vars,:time_pairwise,:c_v],:time_pairwise, :digits=>6))
|
74
71
|
|
75
72
|
rb.save_html("correlation_matrix.html")
|
data/examples/boxplot.rb
CHANGED
@@ -1,14 +1,26 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
# == Description
|
3
|
+
#
|
4
|
+
# This example illustrates how daru, combined with Statsample::Graph::Boxplot
|
5
|
+
# can be used for generating box plots of a normally distributed set of data.
|
6
|
+
#
|
7
|
+
# The 'rnorm' function, defined in statsample/shorthands generates a Daru::Vector
|
8
|
+
# object which contains the specified number of random variables in a normal distribution.
|
9
|
+
# It uses the 'distribution' gem for this purpose.
|
10
|
+
#
|
11
|
+
# Create a boxplot of the data by specifying the vectors a, b and c and providing
|
12
|
+
# necessary options to Statsample::Graph::Boxplot. The 'boxplot' function is shorthand
|
13
|
+
# for calling Statsample::Graph::Boxplot.
|
2
14
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
15
|
require 'statsample'
|
4
16
|
Statsample::Analysis.store(Statsample::Graph::Boxplot) do
|
5
|
-
n=30
|
6
|
-
a=rnorm(n-1,50,10)
|
7
|
-
b=rnorm(n, 30,5)
|
8
|
-
c=rnorm(n,5,1)
|
17
|
+
n = 30
|
18
|
+
a = rnorm(n-1,50,10)
|
19
|
+
b = rnorm(n, 30,5)
|
20
|
+
c = rnorm(n,5,1)
|
9
21
|
a.push(2)
|
22
|
+
|
10
23
|
boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
|
11
|
-
|
12
24
|
end
|
13
25
|
|
14
26
|
if __FILE__==$0
|
@@ -1,16 +1,45 @@
|
|
1
1
|
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# == Description
|
4
|
+
#
|
5
|
+
# Creating and summarizing a correlation matrix with daru and statsample
|
2
6
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
3
|
-
require 'statsample'
|
4
7
|
|
8
|
+
require 'statsample'
|
5
9
|
Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
|
10
|
+
# It so happens that Daru::Vector and Daru::DataFrame must update metadata
|
11
|
+
# like positions of missing values every time they are created.
|
12
|
+
#
|
13
|
+
# Since we dont have any missing values in the data that we are creating,
|
14
|
+
# we set Daru.lazy_update = true so that missing data is not updated every
|
15
|
+
# time and things happen much faster.
|
16
|
+
#
|
17
|
+
# In case you do have missing data and lazy_update has been set to *true*,
|
18
|
+
# you _SHOULD_ called `#update` on the concerned Vector or DataFrame object
|
19
|
+
# everytime an assingment or deletion cycle is complete.
|
20
|
+
Daru.lazy_update = true
|
21
|
+
|
22
|
+
# Create a Daru::DataFrame containing 4 vectors a, b, c and d.
|
23
|
+
#
|
24
|
+
# Notice that the `clone` option has been set to *false*. This tells Daru
|
25
|
+
# to not clone the Daru::Vectors being supplied by `rnorm`, since it would
|
26
|
+
# be unnecessarily counter productive to clone the vectors once they have
|
27
|
+
# been assigned to the dataframe.
|
6
28
|
samples=1000
|
7
|
-
ds=
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
29
|
+
ds = Daru::DataFrame.new({
|
30
|
+
:a => rnorm(samples),
|
31
|
+
:b => rnorm(samples),
|
32
|
+
:c => rnorm(samples),
|
33
|
+
:d => rnorm(samples)
|
34
|
+
}, clone: false)
|
35
|
+
|
36
|
+
# Calculate correlation matrix by calling the `cor` shorthand.
|
37
|
+
cm = cor(ds)
|
13
38
|
summary(cm)
|
39
|
+
|
40
|
+
# Set lazy_update to *false* once our job is done so that this analysis does
|
41
|
+
# not accidentally affect code elsewhere.
|
42
|
+
Daru.lazy_update = false
|
14
43
|
end
|
15
44
|
|
16
45
|
if __FILE__==$0
|