statsample 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +8 -19
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/demo/dominance_analysis_bootstrap.rb +20 -0
- data/demo/dominanceanalysis.rb +11 -0
- data/demo/multiple_regression.rb +40 -0
- data/demo/polychoric.rb +13 -0
- data/demo/tetrachoric.rb +10 -0
- data/lib/distribution.rb +1 -0
- data/lib/distribution/normalbivariate.rb +100 -0
- data/lib/statsample.rb +4 -105
- data/lib/statsample/bivariate.rb +5 -1
- data/lib/statsample/bivariate/polychoric.rb +581 -0
- data/lib/statsample/bivariate/tetrachoric.rb +37 -5
- data/lib/statsample/converters.rb +11 -0
- data/lib/statsample/dominanceanalysis.rb +104 -90
- data/lib/statsample/dominanceanalysis/bootstrap.rb +160 -131
- data/lib/statsample/factor/pca.rb +1 -2
- data/lib/statsample/factor/principalaxis.rb +2 -2
- data/lib/statsample/graph/svghistogram.rb +170 -172
- data/lib/statsample/matrix.rb +79 -0
- data/lib/statsample/mle.rb +6 -4
- data/lib/statsample/mle/probit.rb +0 -1
- data/lib/statsample/regression/multiple/alglibengine.rb +23 -23
- data/lib/statsample/regression/multiple/baseengine.rb +112 -113
- data/lib/statsample/regression/multiple/gslengine.rb +91 -94
- data/lib/statsample/regression/multiple/rubyengine.rb +1 -1
- data/lib/statsample/srs.rb +1 -1
- data/lib/statsample/test.rb +0 -1
- data/lib/statsample/test/umannwhitney.rb +8 -5
- data/po/es/statsample.po +201 -39
- data/po/statsample.pot +184 -32
- data/test/test_bivariate.rb +21 -2
- data/test/test_distribution.rb +58 -40
- data/test/test_factor.rb +0 -1
- data/test/test_gsl.rb +13 -14
- data/test/test_regression.rb +1 -1
- data/test/test_statistics.rb +1 -4
- metadata +10 -21
- data/demo/benchmark.rb +0 -76
- data/demo/chi-square.rb +0 -44
- data/demo/crosstab.rb +0 -7
- data/demo/dice.rb +0 -13
- data/demo/distribution_t.rb +0 -95
- data/demo/graph.rb +0 -9
- data/demo/item_analysis.rb +0 -30
- data/demo/mean.rb +0 -81
- data/demo/nunnally_6.rb +0 -34
- data/demo/pca.rb +0 -29
- data/demo/proportion.rb +0 -57
- data/demo/regression.rb +0 -82
- data/demo/sample_test.csv +0 -113
- data/demo/spss_matrix.rb +0 -3
- data/demo/strata_proportion.rb +0 -152
- data/demo/stratum.rb +0 -141
- data/demo/t-student.rb +0 -17
- data/demo/umann.rb +0 -8
- data/lib/matrix_extension.rb +0 -92
data/test/test_bivariate.rb
CHANGED
@@ -24,6 +24,24 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
|
|
24
24
|
assert_in_delta(tcm_obs[i,j], tcm_exp[i,k], 0.00001)
|
25
25
|
end
|
26
26
|
end
|
27
|
+
end
|
28
|
+
def test_polychoric
|
29
|
+
# Should be the same results as Tetrachoric for 2x2 matrix
|
30
|
+
|
31
|
+
matrix=Matrix[[rand(100)+10,rand(100)+10],[rand(100)+10,rand(100)+10]]
|
32
|
+
tetra = Statsample::Bivariate::Tetrachoric.new_with_matrix(matrix)
|
33
|
+
poly = Statsample::Bivariate::Polychoric.new(matrix)
|
34
|
+
assert_in_delta(tetra.r,poly.r,0.0001)
|
35
|
+
|
36
|
+
# Example for http://www.john-uebersax.com/stat/tetra.htm#exampl
|
37
|
+
|
38
|
+
matrix=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
39
|
+
poly=Statsample::Bivariate::Polychoric.new(matrix)
|
40
|
+
assert_in_delta(0.4199, poly.r, 0.0001)
|
41
|
+
assert_in_delta(-0.2397, poly.threshold_y[0],0.001)
|
42
|
+
assert_in_delta(-0.0276, poly.threshold_x[0],0.001)
|
43
|
+
|
44
|
+
|
27
45
|
end
|
28
46
|
def test_tetrachoric
|
29
47
|
a,b,c,d=0,0,0,0
|
@@ -51,8 +69,9 @@ class StatsampleBivariateTestCase < Test::Unit::TestCase
|
|
51
69
|
tc = Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
52
70
|
assert_in_delta(-0.53980,tc.r,0.0001)
|
53
71
|
assert_in_delta(0.09940,tc.se,0.0001)
|
54
|
-
assert_in_delta(0.
|
55
|
-
assert_in_delta(
|
72
|
+
assert_in_delta(-0.15731,tc.threshold_x, 0.0001)
|
73
|
+
assert_in_delta(0.31864,tc.threshold_y, 0.0001)
|
74
|
+
|
56
75
|
x=%w{a a a a b b b a b b a a b b}.to_vector
|
57
76
|
y=%w{0 0 1 1 0 0 1 1 1 1 0 0 1 1}.to_vector
|
58
77
|
# crosstab
|
data/test/test_distribution.rb
CHANGED
@@ -8,51 +8,69 @@ rescue LoadError
|
|
8
8
|
NOT_GSL=true
|
9
9
|
end
|
10
10
|
class DistributionTestCase < Test::Unit::TestCase
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
end
|
11
|
+
def test_chi
|
12
|
+
if !NOT_GSL
|
13
|
+
[2,3,4,5].each{|k|
|
14
|
+
chis=rand()*10
|
15
|
+
area=Distribution::ChiSquare.cdf(chis, k)
|
16
|
+
assert_in_delta(area, GSL::Cdf.chisq_P(chis,k),0.0001)
|
17
|
+
assert_in_delta(chis, Distribution::ChiSquare.p_value(area,k),0.0001,"Error on prob #{area} and k #{k}")
|
18
|
+
}
|
20
19
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
end
|
21
|
+
def test_t
|
22
|
+
if !NOT_GSL
|
23
|
+
[-2,0.1,0.5,1,2].each{|t|
|
24
|
+
[2,5,10].each{|n|
|
25
|
+
area=Distribution::T.cdf(t,n)
|
26
|
+
assert_in_delta(area, GSL::Cdf.tdist_P(t,n),0.0001)
|
27
|
+
assert_in_delta(Distribution::T.p_value(area,n), GSL::Cdf.tdist_Pinv(area,n),0.0001)
|
28
|
+
|
30
29
|
}
|
31
|
-
|
30
|
+
}
|
32
31
|
end
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
end
|
33
|
+
def test_normal
|
34
|
+
if !NOT_GSL
|
35
|
+
[-2,0.1,0.5,1,2].each{|x|
|
36
|
+
area=Distribution::Normal.cdf(x)
|
37
|
+
assert_in_delta(area, GSL::Cdf.ugaussian_P(x),0.0001)
|
38
|
+
assert_in_delta(Distribution::Normal.p_value(area), GSL::Cdf.ugaussian_Pinv(area),0.0001)
|
39
|
+
assert_in_delta(Distribution::Normal.pdf(x), GSL::Ran::ugaussian_pdf(x),0.0001)
|
40
|
+
}
|
42
41
|
end
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
42
|
+
end
|
43
|
+
def test_normal_bivariate
|
44
|
+
if !NOT_GSL
|
45
|
+
[0.2,0.4,0.6,0.8,0.9, 0.99,0.999,0.999999].each {|rho|
|
46
|
+
assert_equal(GSL::Ran::bivariate_gaussian_pdf(0, 0, 1,1,rho), Distribution::NormalBivariate.pdf(0,0, rho , 1,1))
|
47
|
+
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
[-3,-2,-1,0,1,1.5].each {|x|
|
52
|
+
assert_in_delta(Distribution::NormalBivariate.cdf_math(x,x,0.5), Distribution::NormalBivariate.cdf_iterate(x,x,0.5), 0.001)
|
53
|
+
}
|
54
|
+
|
55
|
+
assert_in_delta(0.686, Distribution::NormalBivariate.cdf(2,0.5,0.5), 0.001)
|
56
|
+
assert_in_delta(0.498, Distribution::NormalBivariate.cdf(2,0.0,0.5), 0.001)
|
57
|
+
assert_in_delta(0.671, Distribution::NormalBivariate.cdf(1.5,0.5,0.5), 0.001)
|
58
|
+
|
59
|
+
assert_in_delta(Distribution::Normal.cdf(0), Distribution::NormalBivariate.cdf(10,0,0.9), 0.001)
|
60
|
+
end
|
61
|
+
def test_f
|
62
|
+
if !NOT_GSL
|
63
|
+
[0.1,0.5,1,2,10,20,30].each{|f|
|
64
|
+
[2,5,10].each{|n2|
|
65
|
+
[2,5,10].each{|n1|
|
66
|
+
area=Distribution::F.cdf(f,n1,n2)
|
67
|
+
assert_in_delta(area, GSL::Cdf.fdist_P(f,n1,n2),0.0001)
|
68
|
+
assert_in_delta(Distribution::F.p_value(area,n1,n2), GSL::Cdf.fdist_Pinv(area,n1,n2),0.0001)
|
69
|
+
|
70
|
+
}
|
54
71
|
}
|
55
|
-
|
72
|
+
}
|
56
73
|
end
|
74
|
+
end
|
57
75
|
|
58
76
|
end
|
data/test/test_factor.rb
CHANGED
data/test/test_gsl.rb
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
2
|
require 'statsample'
|
3
3
|
require 'test/unit'
|
4
|
-
require 'matrix_extension'
|
5
4
|
class StatsampleGSLTestCase < Test::Unit::TestCase
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
5
|
+
def test_matrix_to_gsl
|
6
|
+
if HAS_GSL
|
7
|
+
a=[1,2,3,4,20].to_vector(:scale)
|
8
|
+
b=[3,2,3,4,50].to_vector(:scale)
|
9
|
+
c=[6,2,3,4,3].to_vector(:scale)
|
10
|
+
ds={'a'=>a,'b'=>b,'c'=>c}.to_dataset
|
11
|
+
gsl=ds.to_matrix.to_gsl
|
12
|
+
assert_equal(5,gsl.size1)
|
13
|
+
assert_equal(3,gsl.size2)
|
14
|
+
matrix=gsl.to_matrix
|
15
|
+
assert_equal(5,matrix.row_size)
|
16
|
+
assert_equal(3,matrix.column_size)
|
19
17
|
end
|
18
|
+
end
|
20
19
|
end
|
21
20
|
|
22
21
|
|
data/test/test_regression.rb
CHANGED
@@ -50,7 +50,7 @@ class StatsampleRegressionTestCase < Test::Unit::TestCase
|
|
50
50
|
assert_in_delta(residuals[i],c_residuals[i],0.001)
|
51
51
|
}
|
52
52
|
else
|
53
|
-
puts "Regression::Multiple::
|
53
|
+
puts "Regression::Multiple::GslEngine not tested (no Gsl)"
|
54
54
|
end
|
55
55
|
end
|
56
56
|
|
data/test/test_statistics.rb
CHANGED
@@ -24,13 +24,10 @@ class StatsampleStatisicsTestCase < Test::Unit::TestCase
|
|
24
24
|
|
25
25
|
end
|
26
26
|
def test_chi_square
|
27
|
-
assert_raise TypeError do
|
28
|
-
Statsample::Test.chi_square(1,1)
|
29
|
-
end
|
30
27
|
real=Matrix[[95,95],[45,155]]
|
31
28
|
expected=Matrix[[68,122],[72,128]]
|
32
29
|
assert_nothing_raised do
|
33
|
-
|
30
|
+
chi=Statsample::Test.chi_square(real,expected)
|
34
31
|
end
|
35
32
|
chi=Statsample::Test.chi_square(real,expected)
|
36
33
|
assert_in_delta(32.53,chi,0.1)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-11 00:00:00 -03:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -76,35 +76,23 @@ files:
|
|
76
76
|
- data/test_binomial.csv
|
77
77
|
- data/tetmat_matrix.txt
|
78
78
|
- data/tetmat_test.txt
|
79
|
-
- demo/
|
80
|
-
- demo/
|
81
|
-
- demo/
|
82
|
-
- demo/
|
83
|
-
- demo/
|
84
|
-
- demo/graph.rb
|
85
|
-
- demo/item_analysis.rb
|
86
|
-
- demo/mean.rb
|
87
|
-
- demo/nunnally_6.rb
|
88
|
-
- demo/pca.rb
|
89
|
-
- demo/proportion.rb
|
90
|
-
- demo/regression.rb
|
91
|
-
- demo/sample_test.csv
|
92
|
-
- demo/spss_matrix.rb
|
93
|
-
- demo/strata_proportion.rb
|
94
|
-
- demo/stratum.rb
|
95
|
-
- demo/t-student.rb
|
96
|
-
- demo/umann.rb
|
79
|
+
- demo/dominance_analysis_bootstrap.rb
|
80
|
+
- demo/dominanceanalysis.rb
|
81
|
+
- demo/multiple_regression.rb
|
82
|
+
- demo/polychoric.rb
|
83
|
+
- demo/tetrachoric.rb
|
97
84
|
- lib/distribution.rb
|
98
85
|
- lib/distribution/chisquare.rb
|
99
86
|
- lib/distribution/f.rb
|
100
87
|
- lib/distribution/normal.rb
|
88
|
+
- lib/distribution/normalbivariate.rb
|
101
89
|
- lib/distribution/t.rb
|
102
|
-
- lib/matrix_extension.rb
|
103
90
|
- lib/spss.rb
|
104
91
|
- lib/statistics2.rb
|
105
92
|
- lib/statsample.rb
|
106
93
|
- lib/statsample/anova.rb
|
107
94
|
- lib/statsample/bivariate.rb
|
95
|
+
- lib/statsample/bivariate/polychoric.rb
|
108
96
|
- lib/statsample/bivariate/tetrachoric.rb
|
109
97
|
- lib/statsample/codification.rb
|
110
98
|
- lib/statsample/combination.rb
|
@@ -127,6 +115,7 @@ files:
|
|
127
115
|
- lib/statsample/graph/svgscatterplot.rb
|
128
116
|
- lib/statsample/histogram.rb
|
129
117
|
- lib/statsample/htmlreport.rb
|
118
|
+
- lib/statsample/matrix.rb
|
130
119
|
- lib/statsample/mle.rb
|
131
120
|
- lib/statsample/mle/logit.rb
|
132
121
|
- lib/statsample/mle/normal.rb
|
data/demo/benchmark.rb
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
$:.unshift(File.dirname(__FILE__)+'/../lib/')
|
2
|
-
require 'statsample'
|
3
|
-
require 'benchmark'
|
4
|
-
v=(0..10000).collect{|n|
|
5
|
-
r=rand(100)
|
6
|
-
if(r<90)
|
7
|
-
r
|
8
|
-
else
|
9
|
-
nil
|
10
|
-
end
|
11
|
-
}.to_vector
|
12
|
-
v.missing_values=[5,10,20]
|
13
|
-
v.type=:scale
|
14
|
-
a=[]
|
15
|
-
b=[]
|
16
|
-
c=[]
|
17
|
-
(0..1000).each{|i|
|
18
|
-
a.push(rand())
|
19
|
-
b.push(rand())
|
20
|
-
c.push(rand())
|
21
|
-
}
|
22
|
-
ds=Statsample::Dataset.new({'a'=>a.to_vector(:scale),'b'=>b.to_vector(:scale), 'c'=>c.to_vector(:scale)})
|
23
|
-
|
24
|
-
|
25
|
-
n = 300
|
26
|
-
if (false)
|
27
|
-
Benchmark.bm(7) do |bench|
|
28
|
-
bench.report("missing or") { for i in 1..n; v.each {|x| !(x.nil? or v.missing_values.include? x) }; end }
|
29
|
-
bench.report("missing and") { for i in 1..n;v.each {|x| !x.nil? and !v.missing_values.include? x } ; end }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
if (false)
|
33
|
-
Benchmark.bm(7) do |bench|
|
34
|
-
bench.report("true") { Statsample::OPTIMIZED=true; for i in 1..n; v.set_valid_data ; end }
|
35
|
-
bench.report("false") { Statsample::OPTIMIZED=false; for i in 1..n; v.set_valid_data ; end }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
if (true)
|
40
|
-
Benchmark.bm(7) do |x|
|
41
|
-
x.report("Alglib coeffs") { for i in 1..n; lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,"c"); lr.coeffs; lr=nil;end }
|
42
|
-
|
43
|
-
x.report("GslEngine coeffs") { for i in 1..n; lr=Statsample::Regression::Multiple::GslEngine.new(ds,"c"); lr.coeffs;lr=nil; end }
|
44
|
-
end
|
45
|
-
end
|
46
|
-
if(true)
|
47
|
-
Benchmark.bm(7) do |x|
|
48
|
-
x.report("Alglib process") { for i in 1..n; lr=Statsample::Regression::Multiple::AlglibEngine.new(ds,"c"); lr.process([rand(10),rand(10)]); end }
|
49
|
-
x.report("GslEngine process") { for i in 1..n; lr=Statsample::Regression::Multiple::GslEngine.new(ds,"c"); lr.process([rand(10),rand(10)]); end }
|
50
|
-
|
51
|
-
end
|
52
|
-
end
|
53
|
-
if (false)
|
54
|
-
Benchmark.bm(7) do |x|
|
55
|
-
x.report("mean") { for i in 1..n; v.mean; end }
|
56
|
-
x.report("slow_mean") { for i in 1..n; v.mean_slow; end }
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
Benchmark.bm(7) do |x|
|
61
|
-
x.report("variance_sample") { for i in 1..n; v.variance_sample; end }
|
62
|
-
x.report("variance_slow") { for i in 1..n; v.slow_variance_sample; end }
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
Benchmark.bm(7) do |x|
|
68
|
-
|
69
|
-
x.report("Nominal.frequencies") { for i in 1..n; v.frequencies; end }
|
70
|
-
x.report("Nominal.frequencies_slow") { for i in 1..n; v.frequencies_slow; end }
|
71
|
-
|
72
|
-
x.report("_frequencies") { for i in 1..n; Statsample._frequencies(v.valid_data); end }
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
data/demo/chi-square.rb
DELETED
@@ -1,44 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__)+'/../lib/statsample'
|
2
|
-
require 'rbgsl'
|
3
|
-
require 'statsample/resample'
|
4
|
-
require 'statsample/test'
|
5
|
-
require 'matrix'
|
6
|
-
ideal=Matrix[[30,30,40]]
|
7
|
-
tests=10000
|
8
|
-
monte=Statsample::Resample.repeat_and_save(tests) {
|
9
|
-
observed=[0,0,0]
|
10
|
-
(1..100).each{|i|
|
11
|
-
r=rand(100)
|
12
|
-
if r<30
|
13
|
-
observed[0]+=1
|
14
|
-
elsif r<60
|
15
|
-
observed[1]+=1
|
16
|
-
else
|
17
|
-
observed[2]+=1
|
18
|
-
end
|
19
|
-
}
|
20
|
-
Statsample::Test::chi_square(Matrix[observed],ideal)
|
21
|
-
}
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
v=monte.to_vector(:scale)
|
26
|
-
|
27
|
-
x=[]
|
28
|
-
y=[]
|
29
|
-
y2=[]
|
30
|
-
y3=[]
|
31
|
-
y4=[]
|
32
|
-
prev=0
|
33
|
-
prev_chi=0
|
34
|
-
v.frequencies.sort.each{|k,v1|
|
35
|
-
x.push(k)
|
36
|
-
y.push(prev+v1)
|
37
|
-
prev=prev+v1
|
38
|
-
cdf_chi=GSL::Cdf.chisq_P(k,2)
|
39
|
-
y2.push(cdf_chi)
|
40
|
-
y4.push(prev.quo(tests))
|
41
|
-
}
|
42
|
-
|
43
|
-
|
44
|
-
GSL::graph(GSL::Vector.alloc(x), GSL::Vector.alloc(y2), GSL::Vector.alloc(y4))
|
data/demo/crosstab.rb
DELETED
data/demo/dice.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__)+"/../lib/statsample"
|
2
|
-
require 'statsample/srs'
|
3
|
-
require 'statsample/resample'
|
4
|
-
require 'gnuplot'
|
5
|
-
|
6
|
-
tests=3000
|
7
|
-
# rand a 50%
|
8
|
-
monte_with=Statsample::Resample.repeat_and_save(tests) {
|
9
|
-
(1+rand(6))+(1+rand(6))
|
10
|
-
}.to_vector(:scale)
|
11
|
-
|
12
|
-
p monte_with.mean
|
13
|
-
|
data/demo/distribution_t.rb
DELETED
@@ -1,95 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
require File.dirname(__FILE__)+"/../lib/statsample"
|
4
|
-
require 'statsample/resample'
|
5
|
-
require 'gnuplot'
|
6
|
-
r = GSL::Rng.alloc(GSL::Rng::TAUS, 1)
|
7
|
-
v=[]
|
8
|
-
population_size=10000
|
9
|
-
population_size.times{|i|
|
10
|
-
v.push(r.ugaussian)
|
11
|
-
}
|
12
|
-
|
13
|
-
v=v.to_vector(:scale)
|
14
|
-
vm=v.mean
|
15
|
-
vsd=v.sdp
|
16
|
-
puts "Population sd:#{v.sdp}"
|
17
|
-
tests=3000
|
18
|
-
Gnuplot.open do |gp|
|
19
|
-
Gnuplot::Plot.new( gp ) do |plot|
|
20
|
-
plot.boxwidth("0.9 absolute")
|
21
|
-
plot.xrange("[-3:3]")
|
22
|
-
plot.yrange("[0:1.1]")
|
23
|
-
plot.style("fill solid 1.00 border -1")
|
24
|
-
[2].each {|ss|
|
25
|
-
puts "Sample size:#{ss}"
|
26
|
-
ee=v.sdp.quo(Math::sqrt(ss))
|
27
|
-
puts "SE: #{ee}"
|
28
|
-
|
29
|
-
puts "Expected variance with replacement: #{v.variance_population.quo(ss)*(v.size-1).quo(v.size)}"
|
30
|
-
puts "Expected variance without replacement: #{v.variance_population.quo(ss)*(1-ss.quo(v.size))}"
|
31
|
-
|
32
|
-
sample_size=ss
|
33
|
-
sds_prom=[]
|
34
|
-
sds_prom_wo=[]
|
35
|
-
monte_wr=Statsample::Resample.repeat_and_save(tests) {
|
36
|
-
sample=v.sample_with_replacement(sample_size)
|
37
|
-
sds_prom.push(sample.sds)
|
38
|
-
sample.mean
|
39
|
-
}
|
40
|
-
monte_wor=Statsample::Resample.repeat_and_save(tests) {
|
41
|
-
sample=v.sample_without_replacement(sample_size)
|
42
|
-
sds_prom_wo.push(sample.sds)
|
43
|
-
sample.mean
|
44
|
-
}
|
45
|
-
xxz=[]
|
46
|
-
xxt=[]
|
47
|
-
xa=[]
|
48
|
-
xy=[]
|
49
|
-
xt=[]
|
50
|
-
xz=[]
|
51
|
-
|
52
|
-
s_wr=sds_prom.to_vector(:scale).mean
|
53
|
-
s_wor=sds_prom_wo.to_vector(:scale).mean
|
54
|
-
|
55
|
-
mw=monte_wr.to_vector(:scale)
|
56
|
-
mwo=monte_wor.to_vector(:scale)
|
57
|
-
puts "Sample variance with replacement: #{mw.variance_population}"
|
58
|
-
puts "Sample variance without replacement: #{monte_wor.to_vector(:scale).variance_population}"
|
59
|
-
puts "Mean sd estimadet :#{vsd*Math::sqrt(ss-1)}"
|
60
|
-
puts "Mean Sd W/R: #{s_wr}"
|
61
|
-
puts "Mean Sd WO/R: #{s_wor}"
|
62
|
-
|
63
|
-
mx=mw.mean
|
64
|
-
er=mw.sds
|
65
|
-
|
66
|
-
prev=0
|
67
|
-
mw.frequencies.sort.each{|x,y|
|
68
|
-
t=(x-vm).quo(s_wr.quo(Math::sqrt(ss))*s_wr.quo(ss-1))
|
69
|
-
z=(x-vm).quo(vsd.quo(Math::sqrt(ss)))
|
70
|
-
xxz.push(z)
|
71
|
-
xxt.push(t)
|
72
|
-
prev+=y
|
73
|
-
xy.push(prev.to_f/tests)
|
74
|
-
xt.push(Distribution::T.cdf(t, ss-1))
|
75
|
-
xz.push(Distribution::Normal.cdf(z))
|
76
|
-
|
77
|
-
}
|
78
|
-
plot.data << Gnuplot::DataSet.new( [xxt,xy] ) do |ds|
|
79
|
-
ds.with="lines"
|
80
|
-
ds.title = "sim #{sample_size}"
|
81
|
-
end
|
82
|
-
plot.data << Gnuplot::DataSet.new( [xxt,xt] ) do |ds|
|
83
|
-
ds.with="lines"
|
84
|
-
ds.title = "t #{sample_size}"
|
85
|
-
end
|
86
|
-
plot.data << Gnuplot::DataSet.new( [xxz,xz] ) do |ds|
|
87
|
-
ds.with="lines"
|
88
|
-
ds.title = "z"
|
89
|
-
end
|
90
|
-
|
91
|
-
}
|
92
|
-
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|