statsample 0.18.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +23 -0
- data/Manifest.txt +28 -17
- data/Rakefile +3 -2
- data/benchmarks/correlation_matrix_15_variables.rb +31 -0
- data/benchmarks/correlation_matrix_5_variables.rb +32 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +75 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/examples/boxplot.rb +13 -14
- data/examples/correlation_matrix.rb +16 -8
- data/examples/dataset.rb +13 -4
- data/examples/dominance_analysis.rb +23 -17
- data/examples/dominance_analysis_bootstrap.rb +28 -22
- data/examples/histogram.rb +8 -9
- data/examples/icc.rb +20 -21
- data/examples/levene.rb +10 -4
- data/examples/multiple_regression.rb +9 -28
- data/examples/multivariate_correlation.rb +9 -3
- data/examples/parallel_analysis.rb +20 -16
- data/examples/polychoric.rb +15 -9
- data/examples/principal_axis.rb +18 -6
- data/examples/reliability.rb +26 -13
- data/examples/scatterplot.rb +10 -6
- data/examples/t_test.rb +15 -6
- data/examples/tetrachoric.rb +9 -2
- data/examples/u_test.rb +12 -4
- data/examples/vector.rb +13 -2
- data/examples/velicer_map_test.rb +33 -26
- data/lib/statsample.rb +32 -12
- data/lib/statsample/analysis.rb +79 -0
- data/lib/statsample/analysis/suite.rb +72 -0
- data/lib/statsample/analysis/suitereportbuilder.rb +38 -0
- data/lib/statsample/bivariate.rb +70 -16
- data/lib/statsample/dataset.rb +25 -19
- data/lib/statsample/dominanceanalysis.rb +2 -2
- data/lib/statsample/factor.rb +2 -0
- data/lib/statsample/factor/map.rb +16 -10
- data/lib/statsample/factor/parallelanalysis.rb +9 -3
- data/lib/statsample/factor/pca.rb +28 -32
- data/lib/statsample/factor/rotation.rb +15 -8
- data/lib/statsample/graph/boxplot.rb +3 -4
- data/lib/statsample/graph/histogram.rb +2 -1
- data/lib/statsample/graph/scatterplot.rb +1 -0
- data/lib/statsample/matrix.rb +106 -16
- data/lib/statsample/regression.rb +4 -1
- data/lib/statsample/regression/binomial.rb +1 -1
- data/lib/statsample/regression/multiple/baseengine.rb +19 -9
- data/lib/statsample/regression/multiple/gslengine.rb +127 -126
- data/lib/statsample/regression/multiple/matrixengine.rb +8 -5
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/regression/simple.rb +31 -6
- data/lib/statsample/reliability.rb +11 -3
- data/lib/statsample/reliability/scaleanalysis.rb +4 -4
- data/lib/statsample/shorthand.rb +81 -0
- data/lib/statsample/test/chisquare.rb +1 -1
- data/lib/statsample/vector.rb +163 -163
- data/lib/statsample/vector/gsl.rb +106 -0
- data/references.txt +2 -2
- data/{data → test/fixtures}/crime.txt +0 -0
- data/{data → test/fixtures}/hartman_23.matrix +0 -0
- data/{data → test/fixtures}/repeated_fields.csv +0 -0
- data/{data → test/fixtures}/test_binomial.csv +0 -0
- data/test/{test_csv.csv → fixtures/test_csv.csv} +0 -0
- data/test/{test_xls.xls → fixtures/test_xls.xls} +0 -0
- data/{data → test/fixtures}/tetmat_matrix.txt +0 -0
- data/{data → test/fixtures}/tetmat_test.txt +0 -0
- data/test/helpers_tests.rb +18 -2
- data/test/test_analysis.rb +118 -0
- data/test/test_anovatwoway.rb +1 -1
- data/test/test_anovatwowaywithdataset.rb +1 -1
- data/test/test_anovawithvectors.rb +1 -2
- data/test/test_bartlettsphericity.rb +1 -2
- data/test/test_bivariate.rb +64 -22
- data/test/test_codification.rb +1 -2
- data/test/test_crosstab.rb +1 -2
- data/test/test_csv.rb +3 -4
- data/test/test_dataset.rb +24 -3
- data/test/test_dominance_analysis.rb +1 -2
- data/test/test_factor.rb +8 -69
- data/test/test_factor_map.rb +43 -0
- data/test/test_factor_pa.rb +54 -0
- data/test/test_ggobi.rb +1 -1
- data/test/test_gsl.rb +12 -18
- data/test/test_histogram.rb +1 -2
- data/test/test_logit.rb +62 -18
- data/test/test_matrix.rb +4 -5
- data/test/test_mle.rb +3 -4
- data/test/test_regression.rb +21 -2
- data/test/test_reliability.rb +3 -3
- data/test/test_reliability_icc.rb +1 -1
- data/test/test_reliability_skillscale.rb +20 -4
- data/test/test_resample.rb +1 -2
- data/test/test_rserve_extension.rb +1 -2
- data/test/test_srs.rb +1 -2
- data/test/test_statistics.rb +1 -2
- data/test/test_stest.rb +1 -2
- data/test/test_stratified.rb +1 -2
- data/test/test_test_f.rb +1 -2
- data/test/test_test_t.rb +1 -2
- data/test/test_umannwhitney.rb +1 -2
- data/test/test_vector.rb +117 -18
- data/test/test_xls.rb +2 -3
- data/web/Rakefile +39 -0
- metadata +109 -29
- metadata.gz.sig +0 -0
- data/examples/parallel_analysis_tetrachoric.rb +0 -31
- data/lib/distribution.rb +0 -25
- data/lib/distribution/chisquare.rb +0 -23
- data/lib/distribution/f.rb +0 -35
- data/lib/distribution/normal.rb +0 -60
- data/lib/distribution/normalbivariate.rb +0 -284
- data/lib/distribution/normalmultivariate.rb +0 -73
- data/lib/distribution/t.rb +0 -55
- data/test/test_distribution.rb +0 -73
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
module Statsample
|
|
2
|
+
class Vector
|
|
3
|
+
module GSL_
|
|
4
|
+
def clear_gsl
|
|
5
|
+
@gsl=nil
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def set_valid_data
|
|
9
|
+
clear_gsl
|
|
10
|
+
set_valid_data_ruby
|
|
11
|
+
end
|
|
12
|
+
def push(v)
|
|
13
|
+
# If data is GSL::Vector, should be converted first to an Array
|
|
14
|
+
if @data.is_a? GSL::Vector
|
|
15
|
+
@data=@data.to_a
|
|
16
|
+
end
|
|
17
|
+
push_ruby(v)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def gsl
|
|
21
|
+
@gsl||=GSL::Vector.alloc(@scale_data) if @scale_data.size>0
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
alias :to_gsl :gsl
|
|
25
|
+
def vector_standarized_compute(m,sd)
|
|
26
|
+
if flawed?
|
|
27
|
+
vector_standarized_compute_ruby(m,sd)
|
|
28
|
+
else
|
|
29
|
+
gsl.collect {|x| (x.to_f - m).quo(sd)}.to_scale
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def vector_centered_compute(m)
|
|
34
|
+
if flawed?
|
|
35
|
+
vector_centered_compute_ruby(m)
|
|
36
|
+
else
|
|
37
|
+
gsl.collect {|x| (x.to_f - m)}.to_scale
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
def sample_with_replacement(sample=1)
|
|
41
|
+
if(@type!=:scale)
|
|
42
|
+
sample_with_replacement_ruby(sample)
|
|
43
|
+
else
|
|
44
|
+
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
|
45
|
+
Statsample::Vector.new(r.sample(gsl, sample).to_a,:scale)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def sample_without_replacement(sample=1)
|
|
50
|
+
if(@type!=:scale)
|
|
51
|
+
sample_without_replacement_ruby(sample)
|
|
52
|
+
else
|
|
53
|
+
r = GSL::Rng.alloc(GSL::Rng::MT19937,rand(10000))
|
|
54
|
+
r.choose(gsl, sample).to_a
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
def median
|
|
58
|
+
if @type!=:scale
|
|
59
|
+
median_ruby
|
|
60
|
+
else
|
|
61
|
+
sorted=GSL::Vector.alloc(@scale_data.sort)
|
|
62
|
+
GSL::Stats::median_from_sorted_data(sorted)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def sum
|
|
67
|
+
check_type :scale
|
|
68
|
+
gsl.nil? ? nil : gsl.sum
|
|
69
|
+
end
|
|
70
|
+
def mean
|
|
71
|
+
check_type :scale
|
|
72
|
+
gsl.nil? ? nil : gsl.mean
|
|
73
|
+
end
|
|
74
|
+
def variance_sample(m=nil)
|
|
75
|
+
check_type :scale
|
|
76
|
+
m||=mean
|
|
77
|
+
gsl.nil? ? nil : gsl.variance_m
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def standard_deviation_sample(m=nil)
|
|
81
|
+
check_type :scale
|
|
82
|
+
m||=mean
|
|
83
|
+
gsl.nil? ? nil : gsl.sd(m)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def variance_population(m=nil) # :nodoc:
|
|
87
|
+
check_type :scale
|
|
88
|
+
m||=mean
|
|
89
|
+
gsl.nil? ? nil : gsl.variance_with_fixed_mean(m)
|
|
90
|
+
end
|
|
91
|
+
def standard_deviation_population(m=nil) # :nodoc:
|
|
92
|
+
check_type :scale
|
|
93
|
+
m||=mean
|
|
94
|
+
gsl.nil? ? nil : gsl.sd_with_fixed_mean(m)
|
|
95
|
+
end
|
|
96
|
+
def skew # :nodoc:
|
|
97
|
+
check_type :scale
|
|
98
|
+
gsl.nil? ? nil : gsl.skew
|
|
99
|
+
end
|
|
100
|
+
def kurtosis # :nodoc:
|
|
101
|
+
check_type :scale
|
|
102
|
+
gsl.nil? ? nil : gsl.kurtosis
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
data/references.txt
CHANGED
|
@@ -7,6 +7,7 @@ References
|
|
|
7
7
|
* Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
|
|
8
8
|
* Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
|
|
9
9
|
* Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. <i>Organizational Research Methods, 7</i> (2), 191-205.
|
|
10
|
+
* Härdle, W. & Simar, L. (2003). Applied Multivariate Statistical Analysis. Springer
|
|
10
11
|
* Lin, J. (2007). VARIMAX_K58 [Source code]. [http://www.johnny-lin.com/idl_code/varimax_k58.pro]
|
|
11
12
|
* Liu, O., & Rijmen, F. (2008). A modified procedure for parallel analysis of ordered categorical data. Behavior Research Methods, 40(2), 556-562.
|
|
12
13
|
* McGraw, K. & Wong, S.P. (1996). Forming Inferences About Some Intraclass Correlation Coefficients. Psychological methods, 1(1), 30-46.
|
|
@@ -16,8 +17,7 @@ References
|
|
|
16
17
|
* Smith, L. (2002). A tutorial on Principal Component Analysis. Available on http://courses.eas.ualberta.ca/eas570/pca_tutorial.pdf
|
|
17
18
|
* http://en.wikipedia.org/wiki/Welch-Satterthwaite_equation
|
|
18
19
|
* http://europe.isixsigma.com/library/content/c080806a.asp
|
|
19
|
-
* http://snippets.dzone.com/posts/show/4666
|
|
20
20
|
* http://stattrek.com/Lesson6/SRS.aspx
|
|
21
|
-
* http://
|
|
21
|
+
* http://talkstats.com/showthread.php?t=5056
|
|
22
22
|
* http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
|
23
23
|
* http://www.taygeta.com/random/gaussian.html
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/test/helpers_tests.rb
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
|
|
2
2
|
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
|
|
3
3
|
|
|
4
|
-
|
|
5
4
|
require 'minitest/unit'
|
|
6
5
|
require 'tempfile'
|
|
7
6
|
require 'tmpdir'
|
|
@@ -17,7 +16,18 @@ module MiniTest
|
|
|
17
16
|
include Shoulda::InstanceMethods
|
|
18
17
|
extend Shoulda::ClassMethods
|
|
19
18
|
include Shoulda::Assertions
|
|
20
|
-
|
|
19
|
+
|
|
20
|
+
def self.should_with_gsl(name,&block)
|
|
21
|
+
should(name) do
|
|
22
|
+
if Statsample.has_gsl?
|
|
23
|
+
instance_eval(&block)
|
|
24
|
+
else
|
|
25
|
+
skip("Requires GSL")
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
21
31
|
end
|
|
22
32
|
end
|
|
23
33
|
|
|
@@ -29,6 +39,12 @@ module MiniTest
|
|
|
29
39
|
assert_in_delta(v,obs[i],delta)
|
|
30
40
|
}
|
|
31
41
|
end
|
|
42
|
+
def assert_equal_vector(exp,obs,delta=1e-10,msg=nil)
|
|
43
|
+
assert_equal(exp.size, obs.size, "Different size.#{msg}")
|
|
44
|
+
exp.size.times {|i|
|
|
45
|
+
assert_in_delta(exp[i],obs[i],delta, "Different element #{i}. \nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
|
46
|
+
}
|
|
47
|
+
end
|
|
32
48
|
def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
|
|
33
49
|
assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
|
|
34
50
|
assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
|
2
|
+
require 'mocha'
|
|
3
|
+
class StatsampleAnalysisTestCase < MiniTest::Unit::TestCase
|
|
4
|
+
context(Statsample::Analysis) do
|
|
5
|
+
should "store() should create and store Statsample::Analysis::Suite" do
|
|
6
|
+
Statsample::Analysis.store(:first) do
|
|
7
|
+
a=1
|
|
8
|
+
end
|
|
9
|
+
assert(Statsample::Analysis.stored_analysis[:first])
|
|
10
|
+
assert(Statsample::Analysis.stored_analysis[:first].is_a? Statsample::Analysis::Suite)
|
|
11
|
+
end
|
|
12
|
+
should "store last created analysis" do
|
|
13
|
+
an=Statsample::Analysis.store(:first) do
|
|
14
|
+
a=1
|
|
15
|
+
end
|
|
16
|
+
assert_equal(an,Statsample::Analysis.last)
|
|
17
|
+
end
|
|
18
|
+
context(Statsample::Analysis::Suite) do
|
|
19
|
+
should "echo() uses output#puts with same arguments" do
|
|
20
|
+
an=Statsample::Analysis::Suite.new(:output)
|
|
21
|
+
obj=mock()
|
|
22
|
+
obj.expects(:puts).with(:first,:second).once
|
|
23
|
+
an.output=obj
|
|
24
|
+
an.echo(:first,:second)
|
|
25
|
+
end
|
|
26
|
+
should "summary() should call object.summary" do
|
|
27
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
|
28
|
+
obj=stub('summarizable',:summary=>'summary')
|
|
29
|
+
assert_equal(obj.summary,an.summary(obj))
|
|
30
|
+
end
|
|
31
|
+
should "attach() allows to call objects on objects which respond to fields" do
|
|
32
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
|
33
|
+
ds={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
|
34
|
+
ds.expects(:fields).returns(%w{x y}).at_least_once
|
|
35
|
+
an.attach(ds)
|
|
36
|
+
assert_equal(10,an.x.mean)
|
|
37
|
+
assert_equal(12,an.y.mean)
|
|
38
|
+
assert_raise(RuntimeError) {
|
|
39
|
+
an.z
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
should "attached objects should be called LIFO" do
|
|
43
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
|
44
|
+
ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
|
|
45
|
+
ds1.expects(:fields).returns(%w{x y z}).at_least_once
|
|
46
|
+
ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
|
47
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
|
48
|
+
an.attach(ds1)
|
|
49
|
+
an.attach(ds2)
|
|
50
|
+
assert_equal(10,an.x.mean)
|
|
51
|
+
assert_equal(12,an.y.mean)
|
|
52
|
+
assert_equal(13,an.z.mean)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
should "detach() without arguments drop latest object" do
|
|
56
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
|
57
|
+
ds1={'x'=>stub(:mean=>100),'y'=>stub(:mean=>120),'z'=>stub(:mean=>13)}
|
|
58
|
+
ds1.expects(:fields).returns(%w{x y z}).at_least_once
|
|
59
|
+
ds2={'x'=>stub(:mean=>10),'y'=>stub(:mean=>12)}
|
|
60
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
|
61
|
+
an.attach(ds1)
|
|
62
|
+
an.attach(ds2)
|
|
63
|
+
assert_equal(10,an.x.mean)
|
|
64
|
+
an.detach
|
|
65
|
+
assert_equal(100, an.x.mean)
|
|
66
|
+
end
|
|
67
|
+
should "detach() with argument drop select object" do
|
|
68
|
+
an=Statsample::Analysis::Suite.new(:summary)
|
|
69
|
+
ds1={'x'=>1}
|
|
70
|
+
ds1.expects(:fields).returns(%w{x}).at_least_once
|
|
71
|
+
ds2={'x'=>2,'y'=>3}
|
|
72
|
+
ds2.expects(:fields).returns(%w{x y}).at_least_once
|
|
73
|
+
ds3={'y'=>4}
|
|
74
|
+
ds3.expects(:fields).returns(%w{y}).at_least_once
|
|
75
|
+
|
|
76
|
+
an.attach(ds3)
|
|
77
|
+
an.attach(ds2)
|
|
78
|
+
an.attach(ds1)
|
|
79
|
+
assert_equal(1,an.x)
|
|
80
|
+
assert_equal(3,an.y)
|
|
81
|
+
an.detach(ds2)
|
|
82
|
+
assert_equal(4,an.y)
|
|
83
|
+
end
|
|
84
|
+
should "perform a simple analysis" do
|
|
85
|
+
output=mock()
|
|
86
|
+
output.expects(:puts).with(5.5)
|
|
87
|
+
an=Statsample::Analysis.store(:simple, :output=>output) do
|
|
88
|
+
ds=data_frame(:x=>c(1..10),:y=>c(1..10))
|
|
89
|
+
attach(ds)
|
|
90
|
+
echo x.mean
|
|
91
|
+
end
|
|
92
|
+
an.run
|
|
93
|
+
end
|
|
94
|
+
should "rnorm returns a random normal distribution vector" do
|
|
95
|
+
an=Statsample::Analysis::Suite.new(:simple)
|
|
96
|
+
v=an.rnorm(1000)
|
|
97
|
+
assert_in_delta(0,v.mean,0.09)
|
|
98
|
+
assert_in_delta(1,v.sd,0.09)
|
|
99
|
+
v=an.rnorm(1000,5,10)
|
|
100
|
+
assert_in_delta(5,v.mean,0.9)
|
|
101
|
+
assert_in_delta(10,v.sd,0.9)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
context(Statsample::Analysis::SuiteReportBuilder) do
|
|
105
|
+
should "echo() use add on rb object" do
|
|
106
|
+
an=Statsample::Analysis::SuiteReportBuilder.new(:puts_to_add)
|
|
107
|
+
an.rb.expects(:add).with(:first).twice
|
|
108
|
+
an.echo(:first, :first)
|
|
109
|
+
end
|
|
110
|
+
should "summary() uses add on rb object" do
|
|
111
|
+
an=Statsample::Analysis::SuiteReportBuilder.new(:summary_to_add)
|
|
112
|
+
an.rb.expects(:add).with(:first).once
|
|
113
|
+
an.summary(:first)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
end
|
|
118
|
+
end
|
data/test/test_anovatwoway.rb
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
|
2
|
-
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
|
3
2
|
class StatsampleAnovaOneWayWithVectorsTestCase < MiniTest::Unit::TestCase
|
|
4
3
|
context(Statsample::Anova::OneWayWithVectors) do
|
|
5
4
|
context("when initializing") do
|
data/test/test_bivariate.rb
CHANGED
|
@@ -1,31 +1,21 @@
|
|
|
1
|
-
require(File.dirname(__FILE__)+'/helpers_tests.rb')
|
|
2
|
-
|
|
1
|
+
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
|
3
2
|
class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
4
3
|
should "method sum of squares should be correct" do
|
|
5
4
|
v1=[1,2,3,4,5,6].to_vector(:scale)
|
|
6
5
|
v2=[6,2,4,10,12,8].to_vector(:scale)
|
|
7
6
|
assert_equal(23.0, Statsample::Bivariate.sum_of_squares(v1,v2))
|
|
8
7
|
end
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
|
14
|
-
else
|
|
15
|
-
skip "Bivariate::covariance not tested (needs GSL)"
|
|
16
|
-
end
|
|
17
|
-
|
|
8
|
+
should_with_gsl "return same covariance with ruby and gls implementation" do
|
|
9
|
+
v1=20.times.collect {|a| rand()}.to_scale
|
|
10
|
+
v2=20.times.collect {|a| rand()}.to_scale
|
|
11
|
+
assert_in_delta(Statsample::Bivariate.covariance(v1,v2), Statsample::Bivariate.covariance_slow(v1,v2), 0.001)
|
|
18
12
|
end
|
|
19
13
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
v2=20.times.collect {|a| rand()}.to_scale
|
|
14
|
+
should_with_gsl "return same correlation with ruby and gls implementation" do
|
|
15
|
+
v1=20.times.collect {|a| rand()}.to_scale
|
|
16
|
+
v2=20.times.collect {|a| rand()}.to_scale
|
|
24
17
|
|
|
25
|
-
|
|
26
|
-
else
|
|
27
|
-
skip "Not tested gsl versus ruby correlation (needs GSL)"
|
|
28
|
-
end
|
|
18
|
+
assert_in_delta(GSL::Stats::correlation(v1.gsl, v2.gsl), Statsample::Bivariate.pearson_slow(v1,v2), 1e-10)
|
|
29
19
|
end
|
|
30
20
|
should "return correct pearson correlation" do
|
|
31
21
|
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
|
@@ -49,7 +39,7 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
|
49
39
|
assert_in_delta(Statsample::Bivariate.prop_pearson(r.t,8,:both), r.probability, 0.001)
|
|
50
40
|
assert(r.summary.size>0)
|
|
51
41
|
end
|
|
52
|
-
should "return correct correlation_matrix" do
|
|
42
|
+
should "return correct correlation_matrix with nils values" do
|
|
53
43
|
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
|
54
44
|
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
|
55
45
|
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
|
@@ -69,6 +59,60 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
|
69
59
|
end
|
|
70
60
|
#assert_equal(expected,obt)
|
|
71
61
|
end
|
|
62
|
+
should_with_gsl "return same values for optimized and pairwise covariance matrix" do
|
|
63
|
+
cases=100
|
|
64
|
+
v1=Statsample::Vector.new_scale(cases) {rand()}
|
|
65
|
+
v2=Statsample::Vector.new_scale(cases) {rand()}
|
|
66
|
+
v3=Statsample::Vector.new_scale(cases) {rand()}
|
|
67
|
+
v4=Statsample::Vector.new_scale(cases) {rand()}
|
|
68
|
+
v5=Statsample::Vector.new_scale(cases) {rand()}
|
|
69
|
+
|
|
70
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
|
|
71
|
+
|
|
72
|
+
cor_opt=Statsample::Bivariate.covariance_matrix_optimized(ds)
|
|
73
|
+
|
|
74
|
+
cor_pw =Statsample::Bivariate.covariance_matrix_pairwise(ds)
|
|
75
|
+
assert_equal_matrix(cor_opt,cor_pw,1e-15)
|
|
76
|
+
end
|
|
77
|
+
should_with_gsl "return same values for optimized and pairwise correlation matrix" do
|
|
78
|
+
|
|
79
|
+
cases=100
|
|
80
|
+
v1=Statsample::Vector.new_scale(cases) {rand()}
|
|
81
|
+
v2=Statsample::Vector.new_scale(cases) {rand()}
|
|
82
|
+
v3=Statsample::Vector.new_scale(cases) {rand()}
|
|
83
|
+
v4=Statsample::Vector.new_scale(cases) {rand()}
|
|
84
|
+
v5=Statsample::Vector.new_scale(cases) {rand()}
|
|
85
|
+
|
|
86
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4,'v5'=>v5}.to_dataset
|
|
87
|
+
|
|
88
|
+
cor_opt=Statsample::Bivariate.correlation_matrix_optimized(ds)
|
|
89
|
+
|
|
90
|
+
cor_pw =Statsample::Bivariate.correlation_matrix_pairwise(ds)
|
|
91
|
+
assert_equal_matrix(cor_opt,cor_pw,1e-15)
|
|
92
|
+
|
|
93
|
+
end
|
|
94
|
+
should "return correct correlation_matrix without nils values" do
|
|
95
|
+
v1=[6,5,4,7,8,4,3,2].to_vector(:scale)
|
|
96
|
+
v2=[2,3,7,8,6,4,3,2].to_vector(:scale)
|
|
97
|
+
v3=[6,2, 1000,1000,5,4,7,8].to_vector(:scale)
|
|
98
|
+
v4=[2,4,6,7, 3,7,8,6].to_vector(:scale)
|
|
99
|
+
ds={'v1'=>v1,'v2'=>v2,'v3'=>v3,'v4'=>v4}.to_dataset
|
|
100
|
+
c=Proc.new {|n1,n2|Statsample::Bivariate.pearson(n1,n2)}
|
|
101
|
+
expected=Matrix[ [c.call(v1,v1),c.call(v1,v2),c.call(v1,v3),c.call(v1,v4)], [c.call(v2,v1),c.call(v2,v2),c.call(v2,v3),c.call(v2,v4)], [c.call(v3,v1),c.call(v3,v2),c.call(v3,v3),c.call(v3,v4)],
|
|
102
|
+
[c.call(v4,v1),c.call(v4,v2),c.call(v4,v3),c.call(v4,v4)]
|
|
103
|
+
]
|
|
104
|
+
obt=Statsample::Bivariate.correlation_matrix(ds)
|
|
105
|
+
for i in 0...expected.row_size
|
|
106
|
+
for j in 0...expected.column_size
|
|
107
|
+
#puts expected[i,j].inspect
|
|
108
|
+
#puts obt[i,j].inspect
|
|
109
|
+
assert_in_delta(expected[i,j], obt[i,j],0.0001, "#{expected[i,j].class}!=#{obt[i,j].class} ")
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
#assert_equal(expected,obt)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
|
|
72
116
|
should "return correct value for prop pearson" do
|
|
73
117
|
assert_in_delta(0.42, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.084,94), 94),0.01)
|
|
74
118
|
assert_in_delta(0.65, Statsample::Bivariate.prop_pearson(Statsample::Bivariate.t_r(0.046,95), 95),0.01)
|
|
@@ -115,7 +159,5 @@ class StatsampleBivariateTestCase < MiniTest::Unit::TestCase
|
|
|
115
159
|
assert_in_delta(0.636,Statsample::Bivariate.gamma(m),0.001)
|
|
116
160
|
m2=Matrix[[15,12,6,5],[12,8,10,8],[4,6,9,10]]
|
|
117
161
|
assert_in_delta(0.349,Statsample::Bivariate.gamma(m2),0.001)
|
|
118
|
-
|
|
119
|
-
|
|
120
162
|
end
|
|
121
163
|
end
|