statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,169 @@
|
|
1
|
+
module Statsample
|
2
|
+
# Several methods to estimate parameters for simple random sampling
|
3
|
+
# == Reference:
|
4
|
+
# * Cochran, W.(1972). Sampling Techniques [spanish edition].
|
5
|
+
# * http://stattrek.com/Lesson6/SRS.aspx
|
6
|
+
|
7
|
+
module SRS
|
8
|
+
|
9
|
+
class << self
|
10
|
+
########################
|
11
|
+
#
|
12
|
+
# :SECTION: Proportion estimation
|
13
|
+
#
|
14
|
+
# Function for estimation of proportions
|
15
|
+
########################
|
16
|
+
|
17
|
+
#
|
18
|
+
# Finite population correction (over variance)
|
19
|
+
# Source: Cochran(1972)
|
20
|
+
def fpc_var(sam,pop)
|
21
|
+
(pop - sam).quo(pop - 1)
|
22
|
+
end
|
23
|
+
# Finite population correction (over standard deviation)
|
24
|
+
def fpc(sam,pop)
|
25
|
+
Math::sqrt((pop-sam).quo(pop-1))
|
26
|
+
end
|
27
|
+
|
28
|
+
# Non sample fraction.
|
29
|
+
#
|
30
|
+
# 1 - sample fraction
|
31
|
+
def qf(sam , pop)
|
32
|
+
1-(sam.quo(pop))
|
33
|
+
end
|
34
|
+
# Sample size estimation for proportions, infinite poblation
|
35
|
+
def estimation_n0(d,prop,margin=0.95)
|
36
|
+
t=Distribution::Normal.p_value(1-(1-margin).quo(2))
|
37
|
+
var=prop*(1-prop)
|
38
|
+
t**2*var.quo(d**2)
|
39
|
+
end
|
40
|
+
# Sample size estimation for proportions, finite poblation.
|
41
|
+
def estimation_n(d,prop,n_pobl,margin=0.95)
|
42
|
+
n0=estimation_n0(d,prop,margin)
|
43
|
+
n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
# Proportion confidence interval with t values
|
48
|
+
# Uses estimated proportion, sample without replacement.
|
49
|
+
|
50
|
+
def proportion_confidence_interval_t(prop, n_sample, n_population, margin=0.95)
|
51
|
+
t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
|
52
|
+
proportion_confidence_interval(prop,n_sample,n_population, t)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Proportion confidence interval with z values
|
56
|
+
# Uses estimated proportion, sample without replacement.
|
57
|
+
def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
|
58
|
+
z=Distribution::Normal.p_value(1-((1-margin).quo(2)))
|
59
|
+
proportion_confidence_interval(p,n_sample,n_population, z)
|
60
|
+
end
|
61
|
+
# Proportion confidence interval with x value
|
62
|
+
# Uses estimated proportion, sample without replacement
|
63
|
+
|
64
|
+
def proportion_confidence_interval(p, sam,pop , x)
|
65
|
+
#f=sam.quo(pop)
|
66
|
+
one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo(sam-1)) + (1.quo(sam * 2.0))
|
67
|
+
[p-one_range, p+one_range]
|
68
|
+
end
|
69
|
+
# Standard deviation for sample distribution of a proportion
|
70
|
+
# Know proportion, sample with replacement.
|
71
|
+
# Based on http://stattrek.com/Lesson6/SRS.aspx
|
72
|
+
def proportion_sd_kp_wr(p, n_sample)
|
73
|
+
Math::sqrt(p*(1-p).quo(n_sample))
|
74
|
+
end
|
75
|
+
# Standard deviation for sample distribution of a proportion
|
76
|
+
# Know proportion, sample without replacement.
|
77
|
+
#
|
78
|
+
# Sources:
|
79
|
+
# * Cochran(1972)
|
80
|
+
def proportion_sd_kp_wor(p, sam, pop)
|
81
|
+
fpc(sam,pop)*Math::sqrt(p*(1-p).quo(sam))
|
82
|
+
end
|
83
|
+
# Standard deviation for sample distribution of a proportion
|
84
|
+
# Estimated proportion, sample with replacement
|
85
|
+
# Based on http://stattrek.com/Lesson6/SRS.aspx.
|
86
|
+
def proportion_sd_ep_wr(p, n_sample)
|
87
|
+
Math::sqrt(p*(1-p).quo(n_sample-1))
|
88
|
+
end
|
89
|
+
# Standard deviation for sample distribution of a proportion.
|
90
|
+
# Estimated proportion, sample without replacement.
|
91
|
+
# Reference:
|
92
|
+
# * Cochran, 1972, Técnicas de muestreo
|
93
|
+
def proportion_sd_ep_wor(p, sam,pop)
|
94
|
+
fsc=(pop-sam).quo((sam-1)*pop)
|
95
|
+
Math::sqrt(fsc*p*(1-p))
|
96
|
+
end
|
97
|
+
|
98
|
+
# Total estimation sd based on sample.
|
99
|
+
# Known proportion, sample without replacement
|
100
|
+
# Reference:
|
101
|
+
# * Cochran(1972)
|
102
|
+
def proportion_total_sd_kp_wor(prop, sam, pop)
|
103
|
+
pob * proportion_sd_kp_wor(p, sam, pop)
|
104
|
+
end
|
105
|
+
# Total estimation sd based on sample.
|
106
|
+
# Estimated proportion, sample without replacement
|
107
|
+
# Source: Cochran(1972)
|
108
|
+
def proportion_total_sd_ep_wor(prop, sam, pop)
|
109
|
+
fsc=((pop - sam).to_f / ( sam - 1))
|
110
|
+
Math::sqrt(fsc*pop*prop*(1-prop))
|
111
|
+
end
|
112
|
+
|
113
|
+
########################
|
114
|
+
#
|
115
|
+
# :SECTION: Mean stimation
|
116
|
+
#
|
117
|
+
########################
|
118
|
+
|
119
|
+
|
120
|
+
# Standard error. Known variance, sample with replacement.
|
121
|
+
def standard_error_ksd_wr(s, sam, pop)
|
122
|
+
s.quo(Math::sqrt(sam)) * Math::sqrt((pop-1).quo(pop))
|
123
|
+
end
|
124
|
+
|
125
|
+
# Standard error of the mean. Known variance, sample w/o replacement
|
126
|
+
def standard_error_ksd_wor(s,sam,pop)
|
127
|
+
s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
|
128
|
+
end
|
129
|
+
|
130
|
+
alias_method :standard_error_esd_wr, :standard_error_ksd_wr
|
131
|
+
|
132
|
+
# Standard error of the mean.
|
133
|
+
# Estimated variance, without replacement
|
134
|
+
# Cochran (1972) p.47
|
135
|
+
def standard_error_esd_wor(s,sam,pop)
|
136
|
+
s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
|
137
|
+
end
|
138
|
+
|
139
|
+
alias_method :standard_error, :standard_error_esd_wor
|
140
|
+
alias_method :se, :standard_error_esd_wor
|
141
|
+
|
142
|
+
# Standard error of total estimation
|
143
|
+
|
144
|
+
def standard_error_total(s,sam,pop)
|
145
|
+
pop*se(s,sam,pop)
|
146
|
+
end
|
147
|
+
|
148
|
+
# Confidence Interval using T-Student
|
149
|
+
# Use with n < 60
|
150
|
+
def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0.95)
|
151
|
+
t=Distribution::T.p_value(1-((1-margin) / 2),n_sample-1)
|
152
|
+
mean_confidence_interval(mean,s,n_sample,n_population,t)
|
153
|
+
end
|
154
|
+
# Confidente Interval using Z
|
155
|
+
# Use with n > 60
|
156
|
+
def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0.95)
|
157
|
+
z=Distribution::Normal.p_value(1-((1-margin) / 2))
|
158
|
+
mean_confidence_interval(mean,s,n_sample,n_population, z)
|
159
|
+
end
|
160
|
+
# Confidente interval using X.
|
161
|
+
#
|
162
|
+
# Better use mean_confidence_interval_z or mean_confidence_interval_t
|
163
|
+
def mean_confidence_interval(mean,s,n_sample,n_population,x)
|
164
|
+
range=x*se(s,n_sample,n_population)
|
165
|
+
[mean-range,mean+range]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Statsample
|
2
|
+
# Module for several statistical tests
|
3
|
+
|
4
|
+
module Test
|
5
|
+
autoload(:UMannWhitney, 'statsample/test/umannwhitney')
|
6
|
+
autoload(:Levene, 'statsample/test/levene')
|
7
|
+
autoload(:T, 'statsample/test/t')
|
8
|
+
autoload(:F, 'statsample/test/f')
|
9
|
+
autoload(:ChiSquare, 'statsample/test/chisquare')
|
10
|
+
autoload(:BartlettSphericity, 'statsample/test/bartlettsphericity')
|
11
|
+
autoload(:KolmogorovSmirnov, 'statsample/test/kolmogorovsmirnov')
|
12
|
+
autoload(:WilcoxonSignedRank, 'statsample/test/wilcoxonsignedrank')
|
13
|
+
|
14
|
+
|
15
|
+
# Returns probability of getting a value lower or higher
|
16
|
+
# than sample, using cdf and number of tails.
|
17
|
+
#
|
18
|
+
# * <tt>:left</tt> : For one tail left, return the cdf
|
19
|
+
# * <tt>:right</tt> : For one tail right, return 1-cdf
|
20
|
+
# * <tt>:both</tt> : For both tails, returns 2*right_tail(cdf.abs)
|
21
|
+
def p_using_cdf(cdf, tails=:both)
|
22
|
+
tails=:both if tails==2 or tails==:two
|
23
|
+
tails=:right if tails==1 or tails==:positive
|
24
|
+
tails=:left if tails==:negative
|
25
|
+
case tails
|
26
|
+
when :left then cdf
|
27
|
+
when :right then 1-cdf
|
28
|
+
when :both
|
29
|
+
if cdf>=0.5
|
30
|
+
cdf=1-cdf
|
31
|
+
end
|
32
|
+
2*cdf
|
33
|
+
end
|
34
|
+
end
|
35
|
+
# Get critical t to create confidence interval
|
36
|
+
def t_critical(confidence_level, df)
|
37
|
+
-Distribution::T.p_value((1-confidence_level) / 2.0, df)
|
38
|
+
end
|
39
|
+
# Get critical z to create confidence interval
|
40
|
+
def z_critical(confidence_level)
|
41
|
+
-Distribution::Z.p_value((1-confidence_level) / 2.0)
|
42
|
+
end
|
43
|
+
|
44
|
+
extend self
|
45
|
+
# Calculate chi square for two Matrix
|
46
|
+
class << self
|
47
|
+
def chi_square(observed, expected=nil)
|
48
|
+
case observed
|
49
|
+
when Vector
|
50
|
+
ChiSquare::WithVector.new(observed,expected)
|
51
|
+
when Matrix
|
52
|
+
ChiSquare::WithMatrix.new(observed,expected)
|
53
|
+
else
|
54
|
+
raise "Not implemented for #{observed.class}"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
# Shorthand for Statsample::Test::UMannWhitney.new
|
58
|
+
#
|
59
|
+
# * <tt>v1</tt> and <tt>v2</tt> should be Statsample::Vector.
|
60
|
+
def u_mannwhitney(v1, v2)
|
61
|
+
Statsample::Test::UMannWhitney.new(v1,v2)
|
62
|
+
end
|
63
|
+
# Shorthand for Statsample::Test::T::OneSample.new
|
64
|
+
def t_one_sample(vector, opts=Hash.new)
|
65
|
+
Statsample::Test::T::OneSample.new(vector,opts)
|
66
|
+
end
|
67
|
+
# Shorthand for Statsample::Test::T::TwoSamplesIndependent.new
|
68
|
+
def t_two_samples_independent(v1,v2, opts=Hash.new)
|
69
|
+
Statsample::Test::T::TwoSamplesIndependent.new(v1,v2,opts)
|
70
|
+
end
|
71
|
+
# Shorthand for Statsample::Test::WilcoxonSignedRank.new
|
72
|
+
def wilcoxon_signed_rank(v1,v2,opts=Hash.new)
|
73
|
+
Statsample::Test::WilcoxonSignedRank.new(v1,v2,opts)
|
74
|
+
end
|
75
|
+
# Shorthand for Statsample::Test::Levene.new
|
76
|
+
def levene(input, opts=Hash.new)
|
77
|
+
Statsample::Test::Levene.new(input,opts)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Test
|
3
|
+
# == Bartlett's test of Sphericity.
|
4
|
+
# Test the hyphotesis that the sample correlation matrix
|
5
|
+
# comes from a multivariate normal population where variables
|
6
|
+
# are independent. In other words, the population correlation
|
7
|
+
# matrix is the identity matrix.
|
8
|
+
# == Reference
|
9
|
+
# * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
|
10
|
+
class BartlettSphericity
|
11
|
+
include Statsample::Test
|
12
|
+
include Summarizable
|
13
|
+
attr_accessor :name
|
14
|
+
attr_reader :ncases
|
15
|
+
attr_reader :nvars
|
16
|
+
attr_reader :value
|
17
|
+
attr_reader :df
|
18
|
+
# Args
|
19
|
+
# * _matrix_: correlation matrix
|
20
|
+
# * _ncases_: number of cases
|
21
|
+
def initialize(matrix,ncases)
|
22
|
+
@matrix=matrix
|
23
|
+
@ncases=ncases
|
24
|
+
@nvars=@matrix.row_size
|
25
|
+
@name=_("Bartlett's test of sphericity")
|
26
|
+
compute
|
27
|
+
end
|
28
|
+
# Uses SPSS formula.
|
29
|
+
# On Dziuban & Shirkey, the minus between the first and second
|
30
|
+
# statement is a *!!!
|
31
|
+
#
|
32
|
+
def compute
|
33
|
+
@value=-((@ncases-1)-(2*@nvars+5).quo(6))*Math::log(@matrix.determinant)
|
34
|
+
@df=(@nvars*(@nvars-1)) / 2
|
35
|
+
end
|
36
|
+
def probability
|
37
|
+
1-Distribution::ChiSquare.cdf(@value,@df)
|
38
|
+
end
|
39
|
+
def report_building(builder) # :nodoc:
|
40
|
+
builder.text "%s : X(%d) = %0.4f , p = %0.4f" % [@name, @df, @value, probability]
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Test
|
3
|
+
module ChiSquare
|
4
|
+
module Shared
|
5
|
+
attr_reader :df
|
6
|
+
attr_reader :value
|
7
|
+
|
8
|
+
def to_f
|
9
|
+
@value
|
10
|
+
end
|
11
|
+
|
12
|
+
def chi_square
|
13
|
+
@value
|
14
|
+
end
|
15
|
+
|
16
|
+
def probability
|
17
|
+
1-Distribution::ChiSquare.cdf(@value.to_f,@df)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class WithMatrix
|
22
|
+
include Statsample::Test::ChiSquare::Shared
|
23
|
+
|
24
|
+
def initialize(observed, expected=nil)
|
25
|
+
@observed=observed
|
26
|
+
@expected=expected or calculate_expected
|
27
|
+
raise "Observed size!=expected size" if @observed.row_size!=@expected.row_size or @observed.column_size!=@expected.column_size
|
28
|
+
@df=(@observed.row_size-1)*(@observed.column_size-1)
|
29
|
+
@value=compute_chi
|
30
|
+
end
|
31
|
+
|
32
|
+
def calculate_expected
|
33
|
+
sum=@observed.total_sum
|
34
|
+
@expected=Matrix.rows( @observed.row_size.times.map {|i|
|
35
|
+
@observed.column_size.times.map {|j|
|
36
|
+
(@observed.row_sum[i].quo(sum) * @observed.column_sum[j].quo(sum))*sum
|
37
|
+
}
|
38
|
+
})
|
39
|
+
end
|
40
|
+
|
41
|
+
def compute_chi
|
42
|
+
sum=0
|
43
|
+
(0...@observed.row_size).each {|i|
|
44
|
+
(0...@observed.column_size).each {|j|
|
45
|
+
sum+=((@observed[i, j] - @expected[i,j])**2).quo(@expected[i,j])
|
46
|
+
}
|
47
|
+
}
|
48
|
+
sum
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class WithVector
|
53
|
+
include Statsample::Test::ChiSquare::Shared
|
54
|
+
|
55
|
+
def initialize(observed, expected)
|
56
|
+
@observed = observed
|
57
|
+
@expected = expected
|
58
|
+
raise "Observed size!=expected size" if @observed.size!=@expected.size
|
59
|
+
@df = @observed.size - 1
|
60
|
+
@value = compute_chi
|
61
|
+
end
|
62
|
+
|
63
|
+
def compute_chi
|
64
|
+
sum=0
|
65
|
+
(0...@observed.size).each {|i|
|
66
|
+
sum+=((@observed[i] - @expected[i])**2).quo(@expected[i])
|
67
|
+
}
|
68
|
+
sum
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Test
|
3
|
+
# From Wikipedia:
|
4
|
+
# An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
|
5
|
+
class F
|
6
|
+
include Statsample::Test
|
7
|
+
include Summarizable
|
8
|
+
attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
|
9
|
+
# Tails for probability (:both, :left or :right)
|
10
|
+
attr_accessor :tails
|
11
|
+
# Name of F analysis
|
12
|
+
attr_accessor :name
|
13
|
+
|
14
|
+
# Parameters:
|
15
|
+
# * var_num: variance numerator
|
16
|
+
# * var_den: variance denominator
|
17
|
+
# * df_num: degrees of freedom numerator
|
18
|
+
# * df_den: degrees of freedom denominator
|
19
|
+
def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
|
20
|
+
@var_num=var_num
|
21
|
+
@var_den=var_den
|
22
|
+
@df_num=df_num
|
23
|
+
@df_den=df_den
|
24
|
+
@var_total=var_num+var_den
|
25
|
+
@df_total=df_num+df_den
|
26
|
+
opts_default={:tails=>:right, :name=>_("F Test")}
|
27
|
+
@opts=opts_default.merge(opts)
|
28
|
+
raise "Tails should be right or left, not both" if @opts[:tails]==:both
|
29
|
+
opts_default.keys.each {|k|
|
30
|
+
send("#{k}=", @opts[k])
|
31
|
+
}
|
32
|
+
end
|
33
|
+
def f
|
34
|
+
@var_num.quo(@var_den)
|
35
|
+
end
|
36
|
+
def to_f
|
37
|
+
f
|
38
|
+
end
|
39
|
+
# probability
|
40
|
+
def probability
|
41
|
+
p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
|
42
|
+
end
|
43
|
+
def report_building(builder) #:nodoc:
|
44
|
+
if @df_num.is_a? Integer and @df_den.is_a? Integer
|
45
|
+
builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
|
46
|
+
else
|
47
|
+
builder.text "%s : F(%0.2f, %0.2f) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Test
|
3
|
+
# == Kolmogorov-Smirnov's test of equality of distributions.
|
4
|
+
class KolmogorovSmirnov
|
5
|
+
|
6
|
+
attr_reader :d
|
7
|
+
include Statsample::Test
|
8
|
+
include Summarizable
|
9
|
+
# Creates a new Kolmogorov-Smirnov test
|
10
|
+
# d1 should have each method
|
11
|
+
# d2 could be a Distribution class, with a cdf method,
|
12
|
+
# a vector or a lambda
|
13
|
+
def initialize(d1,d2)
|
14
|
+
raise "First argument should have each method" unless d1.respond_to? :each
|
15
|
+
@d1=make_cdf(d1)
|
16
|
+
if d2.respond_to? :cdf or d2.is_a? Proc
|
17
|
+
@d2=d2
|
18
|
+
elsif d2.respond_to? :each
|
19
|
+
@d2=make_cdf(d2)
|
20
|
+
else
|
21
|
+
raise "Second argument should respond to cdf or each"
|
22
|
+
end
|
23
|
+
calculate
|
24
|
+
end
|
25
|
+
|
26
|
+
def calculate
|
27
|
+
d=0
|
28
|
+
@d1.each {|x|
|
29
|
+
v1=@d1.cdf(x);
|
30
|
+
v2=@d2.is_a?(Proc) ? @d2.call(x) : @d2.cdf(x)
|
31
|
+
d=(v1-v2).to_f.abs if (v1-v2).abs>d
|
32
|
+
}
|
33
|
+
@d=d
|
34
|
+
end
|
35
|
+
|
36
|
+
# Make a wrapper EmpiricDistribution to any method which implements
|
37
|
+
# each on Statsample::Vector, only uses non-missing data.
|
38
|
+
def make_cdf(v)
|
39
|
+
v.is_a?(Daru::Vector) ? EmpiricDistribution.new(v.only_valid.to_a) : EmpiricDistribution.new(v)
|
40
|
+
end
|
41
|
+
|
42
|
+
class EmpiricDistribution
|
43
|
+
def initialize(data)
|
44
|
+
@min=data.min
|
45
|
+
@max=data.max
|
46
|
+
@data=data.sort
|
47
|
+
@n=data.size
|
48
|
+
end
|
49
|
+
def each
|
50
|
+
@data.each {|x|
|
51
|
+
yield x
|
52
|
+
}
|
53
|
+
end
|
54
|
+
def cdf(x)
|
55
|
+
return 0 if x<@min
|
56
|
+
return 1 if x>=@max
|
57
|
+
v=@data.index{|v1| v1>=x}
|
58
|
+
v.nil? ? 0 : (v+(x==@data[v]? 1 : 0)).quo(@n)
|
59
|
+
end
|
60
|
+
end # End EmpiricDistribution
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|