statsample-ekatena 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# This example will explain how a parallel analysis can be performed on a PCA.
|
|
7
|
+
# Parallel Analysis helps in determining how many components are to be retained
|
|
8
|
+
# from the PCA.
|
|
9
|
+
require 'statsample'
|
|
10
|
+
samples=150
|
|
11
|
+
variables=30
|
|
12
|
+
iterations=50
|
|
13
|
+
Statsample::Analysis.store(Statsample::Factor::ParallelAnalysis) do
|
|
14
|
+
|
|
15
|
+
rng = Distribution::Normal.rng()
|
|
16
|
+
f1 = rnorm(samples)
|
|
17
|
+
f2 = rnorm(samples)
|
|
18
|
+
f3 = rnorm(samples)
|
|
19
|
+
|
|
20
|
+
vectors={}
|
|
21
|
+
|
|
22
|
+
variables.times do |i|
|
|
23
|
+
vectors["v#{i}".to_sym] = Daru::Vector.new(samples.times.collect {|nv| f1[nv]*i+(f2[nv]*(15-i))+((f3[nv]*(30-i))*1.5)*rng.call})
|
|
24
|
+
vectors["v#{i}".to_sym].rename "Vector #{i}"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
ds = Daru::DataFrame.new(vectors)
|
|
28
|
+
|
|
29
|
+
pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>iterations, :debug=>true)
|
|
30
|
+
pca=pca(cor(ds))
|
|
31
|
+
echo "There are 3 real factors on data"
|
|
32
|
+
summary pca
|
|
33
|
+
echo "Traditional Kaiser criterion (k>1) returns #{pca.m} factors"
|
|
34
|
+
summary pa
|
|
35
|
+
echo "Parallel Analysis returns #{pa.number_of_factors} factors to preserve"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
if __FILE__==$0
|
|
39
|
+
Statsample::Analysis.run_batch
|
|
40
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
# Polychoric Correlation using two-step and joint method
|
|
6
|
+
#
|
|
7
|
+
# Polychoric correlation in statsample requires installation of
|
|
8
|
+
# the [statsample-bivariate-extension](https://rubygems.org/gems/statsample-bivariate-extension)
|
|
9
|
+
# gem. This gem extends the Statsample::Bivariate class with useful
|
|
10
|
+
# algorithms for polychoric and tetrachoric correlation.
|
|
11
|
+
#
|
|
12
|
+
# Statsample will automatically detect presence of polychoric/tetrachoric
|
|
13
|
+
# algorithms so there is no need to explicitly require the gem.
|
|
14
|
+
#
|
|
15
|
+
# In this example we'll see how polychoric correlation can be
|
|
16
|
+
# performed using statsample.
|
|
17
|
+
require 'statsample'
|
|
18
|
+
Statsample::Analysis.store(Statsample::Bivariate::Polychoric) do
|
|
19
|
+
ct=Matrix[[rand(10)+50, rand(10)+50, rand(10)+1],
|
|
20
|
+
[rand(20)+5, rand(50)+4, rand(10)+1],
|
|
21
|
+
[rand(8)+1, rand(12)+1, rand(10)+1]]
|
|
22
|
+
|
|
23
|
+
# Estimation of polychoric correlation using two-step (default)
|
|
24
|
+
poly=polychoric(ct, :name=>"Polychoric with two-step", :debug=>false)
|
|
25
|
+
summary poly
|
|
26
|
+
|
|
27
|
+
# Estimation of polychoric correlation using joint method (slow)
|
|
28
|
+
poly=polychoric(ct, :method=>:joint, :name=>"Polychoric with joint")
|
|
29
|
+
summary poly
|
|
30
|
+
|
|
31
|
+
# Uses polychoric series (not recomended)
|
|
32
|
+
|
|
33
|
+
poly=polychoric(ct, :method=>:polychoric_series, :name=>"Polychoric with polychoric series")
|
|
34
|
+
summary poly
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
if __FILE__==$0
|
|
38
|
+
Statsample::Analysis.run_batch
|
|
39
|
+
end
|
|
40
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
# Principal Axis Analysis¶
|
|
4
|
+
#
|
|
5
|
+
# Here we use the Statsample::Factor::PrincipalAnalysis class
|
|
6
|
+
# for principal axis analysis for a correlation or covariance matrix.
|
|
7
|
+
require 'statsample'
|
|
8
|
+
|
|
9
|
+
Statsample::Analysis.store(Statsample::Factor::PrincipalAxis) do
|
|
10
|
+
|
|
11
|
+
matrix=Matrix[
|
|
12
|
+
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],
|
|
13
|
+
[0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844],
|
|
14
|
+
[0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167],
|
|
15
|
+
[0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
|
|
16
|
+
|
|
17
|
+
matrix.extend Statsample::CovariateMatrix
|
|
18
|
+
fa=principal_axis(matrix,:m=>1,:smc=>false)
|
|
19
|
+
|
|
20
|
+
summary fa
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if __FILE__==$0
|
|
24
|
+
Statsample::Analysis.run_batch
|
|
25
|
+
end
|
|
26
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# Reliability Scale Analysis with statsample
|
|
7
|
+
require 'statsample'
|
|
8
|
+
Statsample::Analysis.store(Statsample::Reliability) do
|
|
9
|
+
samples=100
|
|
10
|
+
a=rnorm(samples)
|
|
11
|
+
|
|
12
|
+
ds = Daru::DataFrame.new({})
|
|
13
|
+
|
|
14
|
+
20.times do |i|
|
|
15
|
+
ds["v#{i}".to_sym]= a + rnorm(samples,0,0.2)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
rel=Statsample::Reliability::ScaleAnalysis.new(ds)
|
|
19
|
+
summary rel
|
|
20
|
+
|
|
21
|
+
ms=Statsample::Reliability::MultiScaleAnalysis.new(:name=>"Multi Scale analyss") do |m|
|
|
22
|
+
m.scale "Scale 1", ds.clone([:v1, :v2, :v3, :v4, :v5, :v6, :v7, :v8, :v9, :v10])
|
|
23
|
+
m.scale "Scale 2", ds.clone([:v11, :v12, :v13, :v14, :v15, :v16, :v17, :v18, :v19])
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
summary ms
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
if __FILE__==$0
|
|
30
|
+
Statsample::Analysis.run_batch
|
|
31
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
$:.unshift('/home/cdx/dev/reportbuilder/lib/')
|
|
4
|
+
|
|
5
|
+
# == Description
|
|
6
|
+
#
|
|
7
|
+
# Creating a scatterplot with statsample's Statsample::Graph::Scatterplot class.
|
|
8
|
+
#
|
|
9
|
+
# In this example we'll demonstrate how a normally distributed Daru::Vector can
|
|
10
|
+
# be created using the daru and distribution gems, and how the values generated
|
|
11
|
+
# can be plotted very easily using the 'scatterplot' shorthand and supplying X
|
|
12
|
+
# and Y co-ordinates.
|
|
13
|
+
require 'benchmark'
|
|
14
|
+
require 'statsample'
|
|
15
|
+
n=100
|
|
16
|
+
|
|
17
|
+
Statsample::Analysis.store(Statsample::Graph::Scatterplot) do
|
|
18
|
+
x=rnorm(n)
|
|
19
|
+
y=x+rnorm(n,0.5,0.2)
|
|
20
|
+
scatterplot(x,y)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if __FILE__==$0
|
|
24
|
+
Statsample::Analysis.run
|
|
25
|
+
end
|
data/examples/t_test.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib')
|
|
3
|
+
# == Description
|
|
4
|
+
#
|
|
5
|
+
# This example illustrates how a T test can be done and summarized with statsample
|
|
6
|
+
#
|
|
7
|
+
# == References
|
|
8
|
+
#
|
|
9
|
+
# http://en.wikipedia.org/wiki/Student%27s_t-test
|
|
10
|
+
require 'statsample'
|
|
11
|
+
|
|
12
|
+
Statsample::Analysis.store(Statsample::Test::T) do
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
a=rnorm(10)
|
|
16
|
+
t_1=Statsample::Test.t_one_sample(a,{:u=>50})
|
|
17
|
+
summary t_1
|
|
18
|
+
|
|
19
|
+
b=rnorm(10,2)
|
|
20
|
+
|
|
21
|
+
t_2=Statsample::Test.t_two_samples_independent(a,b)
|
|
22
|
+
summary t_2
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
if __FILE__==$0
|
|
26
|
+
Statsample::Analysis.run_batch
|
|
27
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
|
|
4
|
+
require 'statsample'
|
|
5
|
+
|
|
6
|
+
Statsample::Analysis.store(Statsample::Bivariate::Tetrachoric) do
|
|
7
|
+
|
|
8
|
+
a=40
|
|
9
|
+
b=10
|
|
10
|
+
c=20
|
|
11
|
+
d=30
|
|
12
|
+
summary tetrachoric(a,b,c,d)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
if __FILE__==$0
|
|
16
|
+
Statsample::Analysis.run_batch
|
|
17
|
+
end
|
data/examples/u_test.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib')
|
|
3
|
+
|
|
4
|
+
# == Description
|
|
5
|
+
#
|
|
6
|
+
# Example illustrating Mann-Whitney U test with statsample.
|
|
7
|
+
#
|
|
8
|
+
# == References
|
|
9
|
+
#
|
|
10
|
+
# http://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test
|
|
11
|
+
require 'statsample'
|
|
12
|
+
|
|
13
|
+
Statsample::Analysis.store(Statsample::Test::UMannWhitney) do
|
|
14
|
+
|
|
15
|
+
a = Daru::Vector.new(10.times.map {rand(100)})
|
|
16
|
+
b = Daru::Vector.new(20.times.map {(rand(20))**2+50})
|
|
17
|
+
|
|
18
|
+
u=Statsample::Test::UMannWhitney.new(a,b)
|
|
19
|
+
summary u
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
if __FILE__==$0
|
|
23
|
+
Statsample::Analysis.run_batch
|
|
24
|
+
end
|
data/examples/vector.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
# == Description
|
|
4
|
+
#
|
|
5
|
+
# This example provides a small sneak-peak into creating a Daru::Vector.
|
|
6
|
+
# For details on using Daru::Vector (with example on math, statistics and plotting)
|
|
7
|
+
# see the notebook at this link:
|
|
8
|
+
# http://nbviewer.ipython.org/github/SciRuby/sciruby-notebooks/blob/master/Data%20Analysis/Usage%20of%20Vector.ipynb
|
|
9
|
+
require 'statsample'
|
|
10
|
+
|
|
11
|
+
Statsample::Analysis.store(Daru::Vector) do
|
|
12
|
+
a = Daru::Vector.new_with_size(1000) {r=rand(5); r==4 ? nil: r;}
|
|
13
|
+
summary a
|
|
14
|
+
b = Daru::Vector[1,2,3,4,6..10]
|
|
15
|
+
summary b
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
if __FILE__==$0
|
|
19
|
+
Statsample::Analysis.run_batch
|
|
20
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
3
|
+
# == Description
|
|
4
|
+
#
|
|
5
|
+
# Velicer MAP test.
|
|
6
|
+
|
|
7
|
+
require 'statsample'
|
|
8
|
+
|
|
9
|
+
Statsample::Analysis.store(Statsample::Factor::MAP) do
|
|
10
|
+
|
|
11
|
+
rng=Distribution::Normal.rng
|
|
12
|
+
samples=100
|
|
13
|
+
variables=10
|
|
14
|
+
|
|
15
|
+
f1=rnorm(samples)
|
|
16
|
+
f2=rnorm(samples)
|
|
17
|
+
|
|
18
|
+
vectors={}
|
|
19
|
+
|
|
20
|
+
variables.times do |i|
|
|
21
|
+
vectors["v#{i}".to_sym]= Daru::Vector.new(
|
|
22
|
+
samples.times.collect do |nv|
|
|
23
|
+
if i<5
|
|
24
|
+
f1[nv]*5 + f2[nv] *2 +rng.call
|
|
25
|
+
else
|
|
26
|
+
f1[nv]*2 + f2[nv] *3 +rng.call
|
|
27
|
+
end
|
|
28
|
+
end)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
ds = Daru::DataFrame.new(vectors)
|
|
33
|
+
cor=cor(ds)
|
|
34
|
+
pca=pca(cor)
|
|
35
|
+
|
|
36
|
+
map=Statsample::Factor::MAP.new(cor)
|
|
37
|
+
|
|
38
|
+
echo ("There are 2 real factors on data")
|
|
39
|
+
summary(pca)
|
|
40
|
+
echo("Traditional Kaiser criterion (k>1) returns #{pca.m} factors")
|
|
41
|
+
summary(map)
|
|
42
|
+
echo("Velicer's MAP Test returns #{map.number_of_factors} factors to preserve")
|
|
43
|
+
end
|
|
44
|
+
if __FILE__==$0
|
|
45
|
+
Statsample::Analysis.run_batch
|
|
46
|
+
end
|
data/grab_references.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby1.9
|
|
2
|
+
require 'reportbuilder'
|
|
3
|
+
refs=[]
|
|
4
|
+
Dir.glob "**/*.rb" do |f|
|
|
5
|
+
next if f=~/pkg/
|
|
6
|
+
reference=false
|
|
7
|
+
File.open(f).each_line do |l|
|
|
8
|
+
|
|
9
|
+
if l=~/== Reference/
|
|
10
|
+
reference=true
|
|
11
|
+
elsif reference
|
|
12
|
+
if l=~/\*\s+(.+)/
|
|
13
|
+
refs.push $1
|
|
14
|
+
else
|
|
15
|
+
reference=false
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
rb=ReportBuilder.new(:name=>"References") do |g|
|
|
24
|
+
refs.uniq.sort.each do |r|
|
|
25
|
+
g.text "* #{r}"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
rb.save_text("references.txt")
|
data/lib/spss.rb
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# = spss.rb -
|
|
2
|
+
#
|
|
3
|
+
# Provides utilites for working with spss files
|
|
4
|
+
#
|
|
5
|
+
# Copyright (C) 2009 Claudio Bustos
|
|
6
|
+
#
|
|
7
|
+
# Claudio Bustos mailto:clbustos@gmail.com
|
|
8
|
+
|
|
9
|
+
module SPSS # :nodoc: all
|
|
10
|
+
module Dictionary
|
|
11
|
+
class Element
|
|
12
|
+
def add(a)
|
|
13
|
+
@elements.push(a)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def parse_elements(func = :to_s)
|
|
17
|
+
@elements.collect{ |e| " "+e.send(func) }.join("\n")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def init_with config
|
|
21
|
+
config.each do |key, value|
|
|
22
|
+
self.send(key.to_s + "=", value) if methods.include? key.to_s
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def initialize(config = {})
|
|
27
|
+
@config = config
|
|
28
|
+
@elements = []
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
class Dictionary < Element
|
|
32
|
+
attr_accessor :locale, :date_time, :row_count
|
|
33
|
+
def initialize(config = {})
|
|
34
|
+
super
|
|
35
|
+
init_with ({
|
|
36
|
+
:locale=>"en_US",
|
|
37
|
+
:date_time=>Time.new().strftime("%Y-%m-%dT%H:%M:%S"),
|
|
38
|
+
:row_count=>1
|
|
39
|
+
})
|
|
40
|
+
init_with config
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def to_xml
|
|
44
|
+
"<dictionary locale='#{@locale}' creationDateTime='#{@date_time}' rowCount='#{@row_count}' xmlns='http://xml.spss.com/spss/data'>\n"+parse_elements(:to_xml)+"\n</dictionary>"
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
def to_spss
|
|
48
|
+
parse_elements(:to_spss)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class MissingValue < Element
|
|
53
|
+
attr_accessor :data, :type, :from, :to
|
|
54
|
+
def initialize(data,type=nil)
|
|
55
|
+
@data=data
|
|
56
|
+
if type.nil? or type=="lowerBound" or type=="upperBound"
|
|
57
|
+
@type=type
|
|
58
|
+
else
|
|
59
|
+
raise Exception,"Incorrect value for type"
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
def to_xml
|
|
63
|
+
"<missingValue data='#{@data}' "+(type.nil? ? "":"type='#{type}'")+"/>"
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
class LabelSet
|
|
67
|
+
attr_accessor
|
|
68
|
+
def initialize(labels)
|
|
69
|
+
@labels=labels
|
|
70
|
+
end
|
|
71
|
+
def parse_xml(name)
|
|
72
|
+
"<valueLabelSet>\n "+@labels.collect{|key,value| "<valueLabel label='#{key}' value='#{value}' />"}.join("\n ")+"\n <valueLabelVariable name='#{name}' />\n</valueLabelSet>"
|
|
73
|
+
end
|
|
74
|
+
def parse_spss()
|
|
75
|
+
@labels.collect{|key,value| "#{key} '#{value}'"}.join("\n ")
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
class Variable < Element
|
|
79
|
+
attr_accessor :aligment, :display_width, :label, :measurement_level, :name, :type, :decimals, :width, :type_format, :labelset, :missing_values
|
|
80
|
+
def initialize(config={})
|
|
81
|
+
super
|
|
82
|
+
@@var_number||=1
|
|
83
|
+
init_with({
|
|
84
|
+
:aligment => "left",
|
|
85
|
+
:display_width => 8,
|
|
86
|
+
:label => "Variable #{@@var_number}",
|
|
87
|
+
:measurement_level => "SCALE",
|
|
88
|
+
:name => "var#{@@var_number}",
|
|
89
|
+
:type => 0,
|
|
90
|
+
:decimals => 2,
|
|
91
|
+
:width => 10,
|
|
92
|
+
:type_format => "F",
|
|
93
|
+
:labelset => nil
|
|
94
|
+
})
|
|
95
|
+
init_with config
|
|
96
|
+
@missing_values=[]
|
|
97
|
+
@@var_number+=1
|
|
98
|
+
end
|
|
99
|
+
def to_xml
|
|
100
|
+
labelset_s=(@labelset.nil?) ? "":"\n"+@labelset.parse_xml(@name)
|
|
101
|
+
missing_values=(@missing_values.size>0) ? @missing_values.collect {|m| m.to_xml}.join("\n"):""
|
|
102
|
+
"<variable aligment='#{@aligment}' displayWidth='#{@display_width}' label='#{@label}' measurementLevel='#{@measurement_level}' name='#{@name}' type='#{@type}'>\n<variableFormat decimals='#{@decimals}' width='#{@width}' type='#{@type_format}' />\n"+parse_elements(:to_xml)+missing_values+"</variable>"+labelset_s
|
|
103
|
+
end
|
|
104
|
+
def to_spss
|
|
105
|
+
out=<<HERE
|
|
106
|
+
VARIABLE LABELS #{@name} '#{label}' .
|
|
107
|
+
VARIABLE ALIGMENT #{@name} (#{@aligment.upcase}) .
|
|
108
|
+
VARIABLE WIDTH #{@name} (#{@display_width}) .
|
|
109
|
+
VARIABLE LEVEL #{@name} (#{@measurement_level.upcase}) .
|
|
110
|
+
HERE
|
|
111
|
+
if !@labelset.nil?
|
|
112
|
+
out << "VALUE LABELS #{@name} "+labelset.parse_spss()+" ."
|
|
113
|
+
end
|
|
114
|
+
if @missing_values.size>0
|
|
115
|
+
out << "MISSING VALUES #{@name} ("+@missing_values.collect{|m| m.data}.join(",")+") ."
|
|
116
|
+
end
|
|
117
|
+
out
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
n=SPSS::Dictionary::Dictionary.new
|
|
123
|
+
ls=SPSS::Dictionary::LabelSet.new({1=>"Si",2=>"No"})
|
|
124
|
+
var1=SPSS::Dictionary::Variable.new
|
|
125
|
+
var1.labelset=ls
|
|
126
|
+
mv1=SPSS::Dictionary::MissingValue.new("-99")
|
|
127
|
+
var2=SPSS::Dictionary::Variable.new
|
|
128
|
+
n.add(var1)
|
|
129
|
+
n.add(var2)
|
|
130
|
+
var2.missing_values=[mv1]
|
|
131
|
+
|
|
132
|
+
File.open("dic_spss.sps","wb") {|f|
|
|
133
|
+
f.puts n.to_spss
|
|
134
|
+
}
|