statsample-ekatena 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_benchmark.rb'))
|
|
2
|
+
|
|
3
|
+
extend BenchPress
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
name "Statsample::Factor::Map with and without GSL"
|
|
7
|
+
author 'Clbustos'
|
|
8
|
+
date '2011-01-18'
|
|
9
|
+
summary "Velicer's MAP uses a lot of Matrix algebra. How much we can improve the timing using GSL?
|
|
10
|
+
"
|
|
11
|
+
|
|
12
|
+
reps 20 #number of repetitions
|
|
13
|
+
|
|
14
|
+
m=Matrix[
|
|
15
|
+
[ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382],
|
|
16
|
+
[ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415],
|
|
17
|
+
[ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345],
|
|
18
|
+
[ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365],
|
|
19
|
+
[ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629],
|
|
20
|
+
[ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577],
|
|
21
|
+
[ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539],
|
|
22
|
+
[ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1]
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
map=Statsample::Factor::MAP.new(m)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
measure "Statsample::Factor::MAP without GSL" do
|
|
29
|
+
map.use_gsl=false
|
|
30
|
+
map.compute
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
measure "Statsample::Factor::MAP with GSL" do
|
|
34
|
+
map.use_gsl=true
|
|
35
|
+
map.compute
|
|
36
|
+
end
|
|
37
|
+
|
|
Binary file
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
\part{Equations}
|
|
2
|
+
\section{Convention}
|
|
3
|
+
\begin{align*}
|
|
4
|
+
n &= \text{sample size}\\
|
|
5
|
+
N &= \text{population size}\\
|
|
6
|
+
p &= \text{proportion inside a sample}\\
|
|
7
|
+
P &= \text{proportion inside a population}
|
|
8
|
+
\end{align*}
|
|
9
|
+
\section{Ruby::Regression::Multiple}
|
|
10
|
+
|
|
11
|
+
To compute the standard error of coefficients, you obtain the estimated variance-covariance matrix of error.
|
|
12
|
+
|
|
13
|
+
Let \mathbf{X} be matrix of predictors data, including a constant column; \mathbf{MSE} as mean square error; SSE as Sum of squares of errors; n the number of cases; p as number of predictors
|
|
14
|
+
|
|
15
|
+
\begin{equation}
|
|
16
|
+
\mathbf{MSE}=\frac{SSE}{n-p-1}
|
|
17
|
+
\end{equation}
|
|
18
|
+
|
|
19
|
+
\begin{equation}
|
|
20
|
+
\mathbf{E}=(\mathbf{X'}\mathbf{X})^-1\mathbf{MSE}
|
|
21
|
+
\end{equation}
|
|
22
|
+
|
|
23
|
+
The root squares of diagonal should be standard errors
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
\section{Ruby::SRS}
|
|
27
|
+
Finite Poblation correction is used on standard error calculation on poblation below 10.000. Function
|
|
28
|
+
\begin{verbatim}
|
|
29
|
+
fpc_var(sam,pop)
|
|
30
|
+
\end{verbatim}
|
|
31
|
+
calculate FPC for variance with
|
|
32
|
+
\begin{equation}
|
|
33
|
+
fpc_{var} = \frac{N-n} {N-1}
|
|
34
|
+
\end{equation}
|
|
35
|
+
|
|
36
|
+
with n as sam and N as pop
|
|
37
|
+
|
|
38
|
+
Function
|
|
39
|
+
\begin{verbatim}
|
|
40
|
+
fpc = fpc(sam,pop)
|
|
41
|
+
\end{verbatim}
|
|
42
|
+
|
|
43
|
+
calculate FPC for standard deviation with
|
|
44
|
+
\begin{equation}
|
|
45
|
+
fpc_{sd} = \sqrt{\frac{N-n} {N-1}}
|
|
46
|
+
\label{fpc}
|
|
47
|
+
\end{equation}
|
|
48
|
+
with n as sample size and N as population size.
|
|
49
|
+
|
|
50
|
+
\subsection{Sample Size estimation for proportions}
|
|
51
|
+
|
|
52
|
+
On infinite poblations, you should use method
|
|
53
|
+
\begin{verbatim}
|
|
54
|
+
estimation_n0(d,prop,margin=0.95)
|
|
55
|
+
\end{verbatim}
|
|
56
|
+
which uses
|
|
57
|
+
\begin{equation}
|
|
58
|
+
n = \frac{t^2(pq)}{d^2}
|
|
59
|
+
\label{n_i}
|
|
60
|
+
\end{equation}
|
|
61
|
+
where
|
|
62
|
+
\begin{align*}
|
|
63
|
+
t &= \text{t value for given level of confidence ( 1.96 for 95\% )}\\
|
|
64
|
+
d &= \text{margin of error}
|
|
65
|
+
\end{align*}
|
|
66
|
+
|
|
67
|
+
On finite poblations, you should use
|
|
68
|
+
\begin{verbatim}
|
|
69
|
+
estimation_n(d,prop,n_pobl, margin=0.95)
|
|
70
|
+
\end{verbatim}
|
|
71
|
+
which uses
|
|
72
|
+
\begin{equation}
|
|
73
|
+
n = \frac{n_i}{1+(\frac{n_i-1}{N})}
|
|
74
|
+
\end{equation}
|
|
75
|
+
|
|
76
|
+
Where $n_i$ is n on \ref{n_i} and N is population size
|
|
77
|
+
|
|
78
|
+
|
data/examples/boxplot.rb
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# == Description
|
|
3
|
+
#
|
|
4
|
+
# This example illustrates how daru, combined with Statsample::Graph::Boxplot
|
|
5
|
+
# can be used for generating box plots of a normally distributed set of data.
|
|
6
|
+
#
|
|
7
|
+
# The 'rnorm' function, defined in statsample/shorthands generates a Daru::Vector
|
|
8
|
+
# object which contains the specified number of random variables in a normal distribution.
|
|
9
|
+
# It uses the 'distribution' gem for this purpose.
|
|
10
|
+
#
|
|
11
|
+
# Create a boxplot of the data by specifying the vectors a, b and c and providing
|
|
12
|
+
# necessary options to Statsample::Graph::Boxplot. The 'boxplot' function is shorthand
|
|
13
|
+
# for calling Statsample::Graph::Boxplot.
|
|
14
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
15
|
+
require 'statsample'
|
|
16
|
+
Statsample::Analysis.store(Statsample::Graph::Boxplot) do
|
|
17
|
+
n = 30
|
|
18
|
+
a = rnorm(n-1,50,10)
|
|
19
|
+
b = rnorm(n, 30,5)
|
|
20
|
+
c = rnorm(n,5,1)
|
|
21
|
+
a.push(2)
|
|
22
|
+
|
|
23
|
+
boxplot(:vectors=>[a,b,c],:width=>300, :height=>300, :groups=>%w{first first second}, :minimum=>0)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
if __FILE__==$0
|
|
27
|
+
Statsample::Analysis.run
|
|
28
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib')
|
|
3
|
+
require 'statsample'
|
|
4
|
+
|
|
5
|
+
Statsample::Analysis.store(Statsample::Test::ChiSquare) do
|
|
6
|
+
# Collect the two vectors with the categorical data (raw number of occurences) into one matrix. Here
|
|
7
|
+
#--------------------------------------------
|
|
8
|
+
#| category | observation 1 | observation 2 |
|
|
9
|
+
#|------------------------------------------|
|
|
10
|
+
#| A | 100 | 20 |
|
|
11
|
+
#| B | 50 | 70 |
|
|
12
|
+
#| C | 30 | 100 |
|
|
13
|
+
#|------------------------------------------|
|
|
14
|
+
#
|
|
15
|
+
m=Matrix[[100, 50, 30],[20, 70, 100]]
|
|
16
|
+
x_2=Statsample::Test.chi_square(m)
|
|
17
|
+
# after the test is done, look at the p-value.
|
|
18
|
+
puts x_2.probability
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
if __FILE__==$0
|
|
22
|
+
Statsample::Analysis.run_batch
|
|
23
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
|
|
3
|
+
# == Description
|
|
4
|
+
#
|
|
5
|
+
# Creating and summarizing a correlation matrix with daru and statsample
|
|
6
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
7
|
+
|
|
8
|
+
require 'statsample'
|
|
9
|
+
Statsample::Analysis.store("Statsample::Bivariate.correlation_matrix") do
|
|
10
|
+
# Create a Daru::DataFrame containing 4 vectors a, b, c and d.
|
|
11
|
+
#
|
|
12
|
+
# Notice that the `clone` option has been set to *false*. This tells Daru
|
|
13
|
+
# to not clone the Daru::Vectors being supplied by `rnorm`, since it would
|
|
14
|
+
# be unnecessarily counter productive to clone the vectors once they have
|
|
15
|
+
# been assigned to the dataframe.
|
|
16
|
+
samples=1000
|
|
17
|
+
ds = Daru::DataFrame.new({
|
|
18
|
+
:a => rnorm(samples),
|
|
19
|
+
:b => rnorm(samples),
|
|
20
|
+
:c => rnorm(samples),
|
|
21
|
+
:d => rnorm(samples)
|
|
22
|
+
}, clone: false)
|
|
23
|
+
|
|
24
|
+
# Calculate correlation matrix by calling the `cor` shorthand.
|
|
25
|
+
cm = cor(ds)
|
|
26
|
+
summary(cm)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
if __FILE__==$0
|
|
30
|
+
Statsample::Analysis.run_batch
|
|
31
|
+
end
|
|
32
|
+
|
data/examples/dataset.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
# == Description
|
|
4
|
+
#
|
|
5
|
+
# This example demonstrates creation of basic Vectors and DataFrames.
|
|
6
|
+
require 'statsample'
|
|
7
|
+
|
|
8
|
+
Statsample::Analysis.store(Daru::DataFrame) do
|
|
9
|
+
samples = 1000
|
|
10
|
+
|
|
11
|
+
# The 'new_with_size' function lets you specify the size of the
|
|
12
|
+
# vector as the argument and the block specifies how each element
|
|
13
|
+
# of the vector will be created.
|
|
14
|
+
a = Daru::Vector.new_with_size(samples) {r=rand(5); r==4 ? nil: r}
|
|
15
|
+
b = Daru::Vector.new_with_size(samples) {r=rand(5); r==4 ? nil: r}
|
|
16
|
+
|
|
17
|
+
# Pass the Daru::Vector objects in a Hash to the DataFrame constructor
|
|
18
|
+
# to make a DataFrame.
|
|
19
|
+
#
|
|
20
|
+
# The *order* option lets you specify the way the vectors in the Hash
|
|
21
|
+
# will be ordered. Not specifyin this will order vectors in alphabetical
|
|
22
|
+
# order by default.
|
|
23
|
+
ds = Daru::DataFrame.new({:a=>a,:b=>b}, order: [:b, :a])
|
|
24
|
+
summary(ds)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if __FILE__==$0
|
|
28
|
+
Statsample::Analysis.run_batch
|
|
29
|
+
end
|
|
30
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# Dominance Analysis with statsample
|
|
7
|
+
require 'statsample'
|
|
8
|
+
Statsample::Analysis.store(Statsample::DominanceAnalysis) do
|
|
9
|
+
sample=300
|
|
10
|
+
a=rnorm(sample)
|
|
11
|
+
b=rnorm(sample)
|
|
12
|
+
c=rnorm(sample)
|
|
13
|
+
d=rnorm(sample)
|
|
14
|
+
|
|
15
|
+
ds = Daru::DataFrame.new({:a => a,:b => b,:cc => c,:d => d}, clone: false)
|
|
16
|
+
attach(ds)
|
|
17
|
+
ds[:y]=a*5 + b*3 + cc*2 + d + rnorm(300)
|
|
18
|
+
cm=cor(ds)
|
|
19
|
+
summary(cm)
|
|
20
|
+
lr=lr(ds,:y)
|
|
21
|
+
summary(lr)
|
|
22
|
+
da=dominance_analysis(ds,:y)
|
|
23
|
+
summary(da)
|
|
24
|
+
|
|
25
|
+
da = dominance_analysis(ds,:y,:name=>"Dominance Analysis using group of predictors", :predictors=>[:a, :b, [:cc, :d]])
|
|
26
|
+
summary(da)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
if __FILE__==$0
|
|
31
|
+
Statsample::Analysis.run_batch
|
|
32
|
+
end
|
|
33
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
require 'statsample'
|
|
4
|
+
|
|
5
|
+
Statsample::Analysis.store(Statsample::DominanceAnalysis::Bootstrap) do
|
|
6
|
+
sample=300
|
|
7
|
+
a=rnorm(sample)
|
|
8
|
+
b=rnorm(sample)
|
|
9
|
+
c=rnorm(sample)
|
|
10
|
+
d=rnorm(sample)
|
|
11
|
+
a.rename :a
|
|
12
|
+
b.rename :b
|
|
13
|
+
c.rename :c
|
|
14
|
+
d.rename :d
|
|
15
|
+
|
|
16
|
+
ds = Daru::DataFrame.new({:a => a,:b => b,:cc => c,:d => d})
|
|
17
|
+
attach(ds)
|
|
18
|
+
ds[:y1] = a*5 + b*2 + cc*2 + d*2 + rnorm(sample,0,10)
|
|
19
|
+
ds[:y2] = a*10 + rnorm(sample)
|
|
20
|
+
|
|
21
|
+
dab=dominance_analysis_bootstrap(ds, [:y1,:y2], :debug=>true)
|
|
22
|
+
dab.bootstrap(100,nil)
|
|
23
|
+
summary(dab)
|
|
24
|
+
ds2=ds[:a..:y1]
|
|
25
|
+
dab2=dominance_analysis_bootstrap(ds2, :y1, :debug=>true)
|
|
26
|
+
dab2.bootstrap(100,nil)
|
|
27
|
+
summary(dab2)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
if __FILE__==$0
|
|
31
|
+
Statsample::Analysis.run_batch
|
|
32
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how a histogram can be created
|
|
7
|
+
# with statsample.
|
|
8
|
+
#
|
|
9
|
+
# The 'histogram' function creates a histogram by using the
|
|
10
|
+
# Statsample::Graph::Histogram class. This class accepts data
|
|
11
|
+
# in a Daru::Vector (as created by `rnorm`).
|
|
12
|
+
#
|
|
13
|
+
# A line showing normal distribution can be drawn by setting
|
|
14
|
+
# the `:line_normal_distribution` option to *true*.
|
|
15
|
+
#
|
|
16
|
+
# See this notebook for an illustration:
|
|
17
|
+
# http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/statistics/Creating%20a%20Histogram.ipynb
|
|
18
|
+
require 'statsample'
|
|
19
|
+
|
|
20
|
+
Statsample::Analysis.store(Statsample::Graph::Histogram) do
|
|
21
|
+
histogram(rnorm(3000,0,20), :line_normal_distribution => true)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if __FILE__==$0
|
|
25
|
+
Statsample::Analysis.run
|
|
26
|
+
end
|
data/examples/icc.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
require 'statsample'
|
|
5
|
+
|
|
6
|
+
Statsample::Analysis.store(Statsample::Reliability::ICC) do
|
|
7
|
+
|
|
8
|
+
size=1000
|
|
9
|
+
a = Daru::Vector.new_with_size(size) {rand(10)}
|
|
10
|
+
b = a.recode{|i|i+rand(4)-2}
|
|
11
|
+
c = a.recode{|i|i+rand(4)-2}
|
|
12
|
+
d = a.recode{|i|i+rand(4)-2}
|
|
13
|
+
@ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
|
|
14
|
+
@icc=Statsample::Reliability::ICC.new(@ds)
|
|
15
|
+
summary(@icc)
|
|
16
|
+
@icc.type=:icc_3_1
|
|
17
|
+
summary(@icc)
|
|
18
|
+
@icc.type=:icc_a_k
|
|
19
|
+
summary(@icc)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if __FILE__==$0
|
|
23
|
+
Statsample::Analysis.run_batch
|
|
24
|
+
end
|
data/examples/levene.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how a levene test can be performed by
|
|
7
|
+
# using Daru::Vector and the Statsample::Test::Levene class.
|
|
8
|
+
#
|
|
9
|
+
# Levene's test is an inferential statistic used to assess the
|
|
10
|
+
# equality of variances for a variable calculated for two or more groups.
|
|
11
|
+
#
|
|
12
|
+
# == References
|
|
13
|
+
#
|
|
14
|
+
# http://en.wikipedia.org/wiki/Levene%27s_test
|
|
15
|
+
require 'statsample'
|
|
16
|
+
|
|
17
|
+
Statsample::Analysis.store(Statsample::Test::Levene) do
|
|
18
|
+
|
|
19
|
+
a = Daru::Vector.new([1,2,3,4,5,6,7,8,100,10])
|
|
20
|
+
b = Daru::Vector.new([30,40,50,60,70,80,90,100,110,120])
|
|
21
|
+
|
|
22
|
+
# The 'levene' function is used as a shorthand
|
|
23
|
+
# for creating a Statsample::Test::Levene object.
|
|
24
|
+
summary(levene([a,b]))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if __FILE__==$0
|
|
28
|
+
Statsample::Analysis.run_batch
|
|
29
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# This example shows how multiple regression can be performed using statsample and daru.
|
|
7
|
+
require 'statsample'
|
|
8
|
+
|
|
9
|
+
Statsample::Analysis.store(Statsample::Regression::Multiple) do
|
|
10
|
+
|
|
11
|
+
samples=2000
|
|
12
|
+
ds=dataset(:a => rnorm(samples),:b => rnorm(samples),:cc => rnorm(samples),:d => rnorm(samples))
|
|
13
|
+
attach(ds)
|
|
14
|
+
ds[:y] = a*5+b*3+cc*2+d+rnorm(samples)
|
|
15
|
+
summary lr(ds,:y)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
if __FILE__==$0
|
|
19
|
+
Statsample::Analysis.run_batch
|
|
20
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
require 'statsample'
|
|
5
|
+
require 'mathn'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Statsample::Analysis.store(Statsample::Regression::Multiple::MultipleDependent) do
|
|
9
|
+
|
|
10
|
+
complete=Matrix[
|
|
11
|
+
[1,0.53,0.62,0.19,-0.09,0.08,0.02,-0.12,0.08],
|
|
12
|
+
[0.53,1,0.61,0.23,0.1,0.18,0.02,-0.1,0.15],
|
|
13
|
+
[0.62,0.61,1,0.03,0.1,0.12,0.03,-0.06,0.12],
|
|
14
|
+
[0.19,0.23,0.03,1,-0.02,0.02,0,-0.02,-0.02],
|
|
15
|
+
[-0.09,0.1,0.1,-0.02,1,0.05,0.06,0.18,0.02],
|
|
16
|
+
[0.08,0.18,0.12,0.02,0.05,1,0.22,-0.07,0.36],
|
|
17
|
+
[0.02,0.02,0.03,0,0.06,0.22,1,-0.01,-0.05],
|
|
18
|
+
[-0.12,-0.1,-0.06,-0.02,0.18,-0.07,-0.01,1,-0.03],
|
|
19
|
+
[0.08,0.15,0.12,-0.02,0.02,0.36,-0.05,-0.03,1]]
|
|
20
|
+
|
|
21
|
+
complete.extend Statsample::CovariateMatrix
|
|
22
|
+
complete.fields=%w{adhd cd odd sex age monly mwork mage poverty}
|
|
23
|
+
|
|
24
|
+
lr=Statsample::Regression::Multiple::MultipleDependent.new(complete, %w{adhd cd odd})
|
|
25
|
+
|
|
26
|
+
echo "R^2_yx #{lr.r2yx}"
|
|
27
|
+
echo "P^2_yx #{lr.p2yx}"
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __FILE__==$0
|
|
32
|
+
Statsample::Analysis.run_batch
|
|
33
|
+
end
|