statsample 0.18.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
@@ -0,0 +1,106 @@
|
|
1
|
+
module Statsample
|
2
|
+
class Vector
|
3
|
+
module GSL_
|
4
|
+
def clear_gsl
|
5
|
+
@gsl=nil
|
6
|
+
end
|
7
|
+
|
8
|
+
def set_valid_data
|
9
|
+
clear_gsl
|
10
|
+
set_valid_data_ruby
|
11
|
+
end
|
12
|
+
def push(v)
|
13
|
+
# If data is GSL::Vector, should be converted first to an Array
|
14
|
+
if @data.is_a? GSL::Vector
|
15
|
+
@data=@data.to_a
|
16
|
+
end
|
17
|
+
push_ruby(v)
|
18
|
+
end
|
19
|
+
|
20
|
+
def gsl
|
21
|
+
@gsl||=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
|
22
|
+
end
|
23
|
+
|
24
|
+
alias :to_gsl :gsl
|
25
|
+
def vector_standarized_compute(m,sd)
|
26
|
+
if flawed?
|
27
|
+
vector_standarized_compute_ruby(m,sd)
|
28
|
+
else
|
29
|
+
gsl.collect {|x| (x.to_f - m).quo(sd)}.to_scale
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def vector_centered_compute(m)
|
34
|
+
if flawed?
|
35
|
+
vector_centered_compute_ruby(m)
|
36
|
+
else
|
37
|
+
gsl.collect {|x| (x.to_f - m)}.to_scale
|
38
|
+
end
|
39
|
+
end
|
40
|
+
def sample_with_replacement(sample=1)
|
41
|
+
if(@type!=:scale)
|
42
|
+
sample_with_replacement_ruby(sample)
|
43
|
+
else
|
44
|
+
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
45
|
+
Statsample::Vector.new(r.sample(gsl, sample).to_a,:scale)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def sample_without_replacement(sample=1)
|
50
|
+
if(@type!=:scale)
|
51
|
+
sample_without_replacement_ruby(sample)
|
52
|
+
else
|
53
|
+
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
54
|
+
r.choose(gsl, sample).to_a
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def median
|
58
|
+
if @type!=:scale
|
59
|
+
median_ruby
|
60
|
+
else
|
61
|
+
sorted=GSL::Vector.alloc(@scale_data.sort)
|
62
|
+
GSL::Stats::median_from_sorted_data(sorted)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def sum
|
67
|
+
check_type :scale
|
68
|
+
gsl.nil? ? nil : gsl.sum
|
69
|
+
end
|
70
|
+
def mean
|
71
|
+
check_type :scale
|
72
|
+
gsl.nil? ? nil : gsl.mean
|
73
|
+
end
|
74
|
+
def variance_sample(m=nil)
|
75
|
+
check_type :scale
|
76
|
+
m||=mean
|
77
|
+
gsl.nil? ? nil : gsl.variance_m
|
78
|
+
end
|
79
|
+
|
80
|
+
def standard_deviation_sample(m=nil)
|
81
|
+
check_type :scale
|
82
|
+
m||=mean
|
83
|
+
gsl.nil? ? nil : gsl.sd(m)
|
84
|
+
end
|
85
|
+
|
86
|
+
def variance_population(m=nil) # :nodoc:
|
87
|
+
check_type :scale
|
88
|
+
m||=mean
|
89
|
+
gsl.nil? ? nil : gsl.variance_with_fixed_mean(m)
|
90
|
+
end
|
91
|
+
def standard_deviation_population(m=nil) # :nodoc:
|
92
|
+
check_type :scale
|
93
|
+
m||=mean
|
94
|
+
gsl.nil? ? nil : gsl.sd_with_fixed_mean(m)
|
95
|
+
end
|
96
|
+
def skew # :nodoc:
|
97
|
+
check_type :scale
|
98
|
+
gsl.nil? ? nil : gsl.skew
|
99
|
+
end
|
100
|
+
def kurtosis # :nodoc:
|
101
|
+
check_type :scale
|
102
|
+
gsl.nil? ? nil : gsl.kurtosis
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
data/references.txt
CHANGED
@@ -7,6 +7,7 @@ References
|
|
7
7
|
* Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
|
8
8
|
* Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
|
9
9
|
* Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
|
10
|
+
* Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
|
10
11
|
* Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
|
11
12
|
* Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
|
12
13
|
* McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
|
@@ -16,8 +17,7 @@ References
|
|
16
17
|
* Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
17
18
|
* http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
|
18
19
|
* http://europe.isixsigma.com/library/content/c080806a.asp
|
19
|
-
* http://snippets.dzone.com/posts/show/4666
|
20
20
|
* http://stattrek.com/Lesson6/SRS.aspx
|
21
|
-
* http://
|
21
|
+
* http://talkstats.com/showthread.php?t=5056
|
22
22
|
* http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
23
23
|
* http://www.taygeta.com/random/gaussian.html
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/test/helpers_tests.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
|
2
2
|
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
|
3
3
|
|
4
|
-
|
5
4
|
require 'minitest/unit'
|
6
5
|
require 'tempfile'
|
7
6
|
require 'tmpdir'
|
@@ -17,7 +16,18 @@ module MiniTest
|
|
17
16
|
include Shoulda::InstanceMethods
|
18
17
|
extend Shoulda::ClassMethods
|
19
18
|
include Shoulda::Assertions
|
20
|
-
|
19
|
+
|
20
|
+
def self.should_with_gsl(name,&block)
|
21
|
+
should(name) do
|
22
|
+
if Statsample.has_gsl?
|
23
|
+
instance_eval(&block)
|
24
|
+
else
|
25
|
+
skip("Requires GSL")
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
21
31
|
end
|
22
32
|
end
|
23
33
|
|
@@ -29,6 +39,12 @@ module MiniTest
|
|
29
39
|
assert_in_delta(v,obs[i],delta)
|
30
40
|
}
|
31
41
|
end
|
42
|
+
def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
|
43
|
+
assert_equal(exp.size, obs.size, "Different size.#{msg}")
|
44
|
+
exp.size.times {|i|
|
45
|
+
assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
46
|
+
}
|
47
|
+
end
|
32
48
|
def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
|
33
49
|
assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
|
34
50
|
assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
+
require 'mocha'
|
3
|
+
class StatsampleAnalysisTestCase < MiniTest::Unit::TestCase
|
4
|
+
context(Statsample::Analysis) do
|
5
|
+
should "store() should create and store Statsample::Analysis::Suite" do
|
6
|
+
Statsample::Analysis.store(:first) do
|
7
|
+
a=1
|
8
|
+
end
|
9
|
+
assert(Statsample::Analysis.stored_analysis[:first])
|
10
|
+
assert(Statsample::Analysis.stored_analysis[:first].is_a? Statsample::Analysis::Suite)
|
11
|
+
end
|
12
|
+
should "store last created analysis" do
|
13
|
+
an=Statsample::Analysis.store(:first) do
|
14
|
+
a=1
|
15
|
+
end
|
16
|
+
assert_equal(an,Statsample::Analysis.last)
|
17
|
+
end
|
18
|
+
context(Statsample::Analysis::Suite) do
|
19
|
+
should "echo() uses output#puts with same arguments" do
|
20
|
+
an=Statsample::Analysis::Suite.new(:output)
|
21
|
+
obj=mock()
|
22
|
+
obj.expects(:puts).with(:first,:second).once
|
23
|
+
an.output=obj
|
24
|
+
an.echo(:first,:second)
|
25
|
+
end
|
26
|
+
should "summary() should call object.summary" do
|
27
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
28
|
+
obj=stub('summarizable',:summary=>'summary')
|
29
|
+
assert_equal(obj.summary,an.summary(obj))
|
30
|
+
end
|
31
|
+
should "attach() allows to call objects on objects which respond to fields" do
|
32
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
33
|
+
ds={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
34
|
+
ds.expects(:fields).returns(%w{x y}).at_least_once
|
35
|
+
an.attach(ds)
|
36
|
+
assert_equal(10,an.x.mean)
|
37
|
+
assert_equal(12,an.y.mean)
|
38
|
+
assert_raise(RuntimeError) {
|
39
|
+
an.z
|
40
|
+
}
|
41
|
+
end
|
42
|
+
should "attached objects should be called LIFO" do
|
43
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
44
|
+
ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
|
45
|
+
ds1.expects(:fields).returns(%w{x y z}).at_least_once
|
46
|
+
ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
47
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
48
|
+
an.attach(ds1)
|
49
|
+
an.attach(ds2)
|
50
|
+
assert_equal(10,an.x.mean)
|
51
|
+
assert_equal(12,an.y.mean)
|
52
|
+
assert_equal(13,an.z.mean)
|
53
|
+
end
|
54
|
+
|
55
|
+
should "detach() without arguments drop latest object" do
|
56
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
57
|
+
ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
|
58
|
+
ds1.expects(:fields).returns(%w{x y z}).at_least_once
|
59
|
+
ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
60
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
61
|
+
an.attach(ds1)
|
62
|
+
an.attach(ds2)
|
63
|
+
assert_equal(10,an.x.mean)
|
64
|
+
an.detach
|
65
|
+
assert_equal(100, an.x.mean)
|
66
|
+
end
|
67
|
+
should "detach() with argument drop select object" do
|
68
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
69
|
+
ds1={'x'=>1}
|
70
|
+
ds1.expects(:fields).returns(%w{x}).at_least_once
|
71
|
+
ds2={'x'=>2,'y'=>3}
|
72
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
73
|
+
ds3={'y'=>4}
|
74
|
+
ds3.expects(:fields).returns(%w{y}).at_least_once
|
75
|
+
|
76
|
+
an.attach(ds3)
|
77
|
+
an.attach(ds2)
|
78
|
+
an.attach(ds1)
|
79
|
+
assert_equal(1,an.x)
|
80
|
+
assert_equal(3,an.y)
|
81
|
+
an.detach(ds2)
|
82
|
+
assert_equal(4,an.y)
|
83
|
+
end
|
84
|
+
should "perform a simple analysis" do
|
85
|
+
output=mock()
|
86
|
+
output.expects(:puts).with(5.5)
|
87
|
+
an=Statsample::Analysis.store(:simple, :output=>output) do
|
88
|
+
ds=data_frame(:x=>c(1..10),:y=>c(1..10))
|
89
|
+
attach(ds)
|
90
|
+
echo x.mean
|
91
|
+
end
|
92
|
+
an.run
|
93
|
+
end
|
94
|
+
should "rnorm returns a random normal distribution vector" do
|
95
|
+
an=Statsample::Analysis::Suite.new(:simple)
|
96
|
+
v=an.rnorm(1000)
|
97
|
+
assert_in_delta(0,v.mean,0.09)
|
98
|
+
assert_in_delta(1,v.sd,0.09)
|
99
|
+
v=an.rnorm(1000,5,10)
|
100
|
+
assert_in_delta(5,v.mean,0.9)
|
101
|
+
assert_in_delta(10,v.sd,0.9)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
context(Statsample::Analysis::SuiteReportBuilder) do
|
105
|
+
should "echo() use add on rb object" do
|
106
|
+
an=Statsample::Analysis::SuiteReportBuilder.new(:puts_to_add)
|
107
|
+
an.rb.expects(:add).with(:first).twice
|
108
|
+
an.echo(:first, :first)
|
109
|
+
end
|
110
|
+
should "summary() uses add on rb object" do
|
111
|
+
an=Statsample::Analysis::SuiteReportBuilder.new(:summary_to_add)
|
112
|
+
an.rb.expects(:add).with(:first).once
|
113
|
+
an.summary(:first)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
end
|
data/test/test_anovatwoway.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
3
2
|
class StatsampleAnovaOneWayWithVectorsTestCase < MiniTest::Unit::TestCase
|
4
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
5
4
|
context("when initializing") do
|
data/test/test_bivariate.rb
CHANGED
@@ -1,31 +1,21 @@
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
2
|
-
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
3
2
|
class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
4
3
|
should "method sum of squares should be correct" do
|
5
4
|
v1=[1,2,3,4,5,6].to_vector(:scale)
|
6
5
|
v2=[6,2,4,10,12,8].to_vector(:scale)
|
7
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1,v2))
|
8
7
|
end
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
14
|
-
else
|
15
|
-
skip "Bivariate::covariance not tested (needs GSL)"
|
16
|
-
end
|
17
|
-
|
8
|
+
should_with_gsl "return same covariance with ruby and gls implementation" do
|
9
|
+
v1=20.times.collect {|a| rand()}.to_scale
|
10
|
+
v2=20.times.collect {|a| rand()}.to_scale
|
11
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
18
12
|
end
|
19
13
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
v2=20.times.collect {|a| rand()}.to_scale
|
14
|
+
should_with_gsl "return same correlation with ruby and gls implementation" do
|
15
|
+
v1=20.times.collect {|a| rand()}.to_scale
|
16
|
+
v2=20.times.collect {|a| rand()}.to_scale
|
24
17
|
|
25
|
-
|
26
|
-
else
|
27
|
-
skip "Not tested gsl versus ruby correlation (needs GSL)"
|
28
|
-
end
|
18
|
+
assert_in_delta(GSL::Stats::correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1,v2), 1e-10)
|
29
19
|
end
|
30
20
|
should "return correct pearson correlation" do
|
31
21
|
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
@@ -49,7 +39,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
49
39
|
assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8,:both), r.probability, 0.001)
|
50
40
|
assert(r.summary.size>0)
|
51
41
|
end
|
52
|
-
should "return correct correlation_matrix" do
|
42
|
+
should "return correct correlation_matrix with nils values" do
|
53
43
|
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
54
44
|
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
55
45
|
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
@@ -69,6 +59,60 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
69
59
|
end
|
70
60
|
#assert_equal(expected,obt)
|
71
61
|
end
|
62
|
+
should_with_gsl "return same values for optimized and pairwise covariance matrix" do
|
63
|
+
cases=100
|
64
|
+
v1=Statsample::Vector.new_scale(cases) {rand()}
|
65
|
+
v2=Statsample::Vector.new_scale(cases) {rand()}
|
66
|
+
v3=Statsample::Vector.new_scale(cases) {rand()}
|
67
|
+
v4=Statsample::Vector.new_scale(cases) {rand()}
|
68
|
+
v5=Statsample::Vector.new_scale(cases) {rand()}
|
69
|
+
|
70
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
|
71
|
+
|
72
|
+
cor_opt=Statsample::Bivariate.covariance_matrix_optimized(ds)
|
73
|
+
|
74
|
+
cor_pw =Statsample::Bivariate.covariance_matrix_pairwise(ds)
|
75
|
+
assert_equal_matrix(cor_opt,cor_pw,1e-15)
|
76
|
+
end
|
77
|
+
should_with_gsl "return same values for optimized and pairwise correlation matrix" do
|
78
|
+
|
79
|
+
cases=100
|
80
|
+
v1=Statsample::Vector.new_scale(cases) {rand()}
|
81
|
+
v2=Statsample::Vector.new_scale(cases) {rand()}
|
82
|
+
v3=Statsample::Vector.new_scale(cases) {rand()}
|
83
|
+
v4=Statsample::Vector.new_scale(cases) {rand()}
|
84
|
+
v5=Statsample::Vector.new_scale(cases) {rand()}
|
85
|
+
|
86
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
|
87
|
+
|
88
|
+
cor_opt=Statsample::Bivariate.correlation_matrix_optimized(ds)
|
89
|
+
|
90
|
+
cor_pw =Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
91
|
+
assert_equal_matrix(cor_opt,cor_pw,1e-15)
|
92
|
+
|
93
|
+
end
|
94
|
+
should "return correct correlation_matrix without nils values" do
|
95
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
96
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
97
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
98
|
+
v4=[2,4,6,7, 3,7,8,6].to_vector(:scale)
|
99
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
100
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
101
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
102
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
103
|
+
]
|
104
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
105
|
+
for i in 0...expected.row_size
|
106
|
+
for j in 0...expected.column_size
|
107
|
+
#puts expected[i,j].inspect
|
108
|
+
#puts obt[i,j].inspect
|
109
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class} ")
|
110
|
+
end
|
111
|
+
end
|
112
|
+
#assert_equal(expected,obt)
|
113
|
+
end
|
114
|
+
|
115
|
+
|
72
116
|
should "return correct value for prop pearson" do
|
73
117
|
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
74
118
|
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
@@ -115,7 +159,5 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
115
159
|
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
116
160
|
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
117
161
|
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
118
|
-
|
119
|
-
|
120
162
|
end
|
121
163
|
end
|