statsample 0.6.5 → 0.6.7
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +15 -0
- data/Manifest.txt +6 -0
- data/README.txt +30 -12
- data/Rakefile +91 -0
- data/demo/levene.rb +9 -0
- data/demo/multiple_regression.rb +1 -7
- data/demo/polychoric.rb +1 -0
- data/demo/principal_axis.rb +8 -0
- data/lib/distribution/f.rb +22 -22
- data/lib/spss.rb +99 -99
- data/lib/statsample/bivariate/polychoric.rb +32 -22
- data/lib/statsample/bivariate/tetrachoric.rb +212 -207
- data/lib/statsample/bivariate.rb +6 -6
- data/lib/statsample/codification.rb +65 -65
- data/lib/statsample/combination.rb +60 -59
- data/lib/statsample/converter/csv19.rb +12 -12
- data/lib/statsample/converters.rb +1 -1
- data/lib/statsample/dataset.rb +93 -36
- data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
- data/lib/statsample/dominanceanalysis.rb +5 -6
- data/lib/statsample/factor/pca.rb +41 -11
- data/lib/statsample/factor/principalaxis.rb +105 -29
- data/lib/statsample/factor/rotation.rb +20 -3
- data/lib/statsample/factor.rb +1 -1
- data/lib/statsample/graph/gdchart.rb +13 -13
- data/lib/statsample/graph/svggraph.rb +166 -167
- data/lib/statsample/matrix.rb +22 -12
- data/lib/statsample/mle/logit.rb +3 -2
- data/lib/statsample/mle/probit.rb +7 -5
- data/lib/statsample/mle.rb +4 -2
- data/lib/statsample/multiset.rb +125 -124
- data/lib/statsample/permutation.rb +2 -1
- data/lib/statsample/regression/binomial/logit.rb +4 -3
- data/lib/statsample/regression/binomial/probit.rb +2 -1
- data/lib/statsample/regression/binomial.rb +62 -81
- data/lib/statsample/regression/multiple/baseengine.rb +1 -1
- data/lib/statsample/regression/multiple/gslengine.rb +1 -1
- data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
- data/lib/statsample/regression/multiple.rb +15 -42
- data/lib/statsample/regression/simple.rb +93 -78
- data/lib/statsample/regression.rb +74 -2
- data/lib/statsample/reliability.rb +117 -120
- data/lib/statsample/srs.rb +156 -153
- data/lib/statsample/test/levene.rb +90 -0
- data/lib/statsample/test/umannwhitney.rb +25 -9
- data/lib/statsample/test.rb +2 -0
- data/lib/statsample/vector.rb +388 -413
- data/lib/statsample.rb +74 -30
- data/po/es/statsample.mo +0 -0
- data/test/test_bivariate.rb +5 -4
- data/test/test_combination.rb +1 -1
- data/test/test_dataset.rb +2 -2
- data/test/test_factor.rb +53 -6
- data/test/test_gsl.rb +1 -1
- data/test/test_mle.rb +1 -1
- data/test/test_regression.rb +18 -33
- data/test/test_statistics.rb +15 -33
- data/test/test_stest.rb +35 -0
- data/test/test_svg_graph.rb +2 -2
- data/test/test_vector.rb +331 -333
- metadata +38 -11
@@ -0,0 +1,90 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Test
|
3
|
+
# = Levene Test for Equality of Variances
|
4
|
+
# From NIST/SEMATECH:
|
5
|
+
# <blockquote>Levene's test ( Levene, 1960) is used to test if k samples have equal variances. Equal variances across samples is called homogeneity of variance. Some statistical tests, for example the analysis of variance, assume that variances are equal across groups or samples. The Levene test can be used to verify that assumption.</blockquote>
|
6
|
+
# Use:
|
7
|
+
# require 'statsample'
|
8
|
+
# a=[1,2,3,4,5,6,7,8,100,10].to_scale
|
9
|
+
# b=[30,40,50,60,70,80,90,100,110,120].to_scale
|
10
|
+
#
|
11
|
+
# levene=Statsample::Test::Levene.new([a,b])
|
12
|
+
# puts levene.summary
|
13
|
+
#
|
14
|
+
# Output:
|
15
|
+
# Levene Test
|
16
|
+
# F: 0.778121319848449
|
17
|
+
# p: 0.389344552595791
|
18
|
+
#
|
19
|
+
# Reference:
|
20
|
+
# * NIST/SEMATECH e-Handbook of Statistical Methods. Available on http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
|
21
|
+
class Levene
|
22
|
+
# Degrees of freedom 1 (k-1)
|
23
|
+
attr_reader :d1
|
24
|
+
# Degrees of freedom 2 (n-k)
|
25
|
+
attr_reader :d2
|
26
|
+
# Name of test
|
27
|
+
attr_accessor :name
|
28
|
+
# Input could be an array of vectors or a dataset
|
29
|
+
def initialize(input, opts=Hash.new())
|
30
|
+
@vectors=input
|
31
|
+
@name="Levene Test"
|
32
|
+
opts.each{|k,v|
|
33
|
+
self.send("#{k}=",v) if self.respond_to? k
|
34
|
+
}
|
35
|
+
compute
|
36
|
+
end
|
37
|
+
# Value of the test
|
38
|
+
def f
|
39
|
+
@w
|
40
|
+
end
|
41
|
+
|
42
|
+
def to_reportbuilder(generator) # :nodoc:
|
43
|
+
generator.add_text(summary)
|
44
|
+
|
45
|
+
end
|
46
|
+
# Summary of results
|
47
|
+
def summary
|
48
|
+
"#{@name}
|
49
|
+
F: #{f}
|
50
|
+
p: #{probability}"
|
51
|
+
end
|
52
|
+
|
53
|
+
def compute
|
54
|
+
n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
|
55
|
+
|
56
|
+
zi=@vectors.collect {|vector|
|
57
|
+
mean=vector.mean
|
58
|
+
vector.collect {|v| (v-mean).abs }.to_scale
|
59
|
+
}
|
60
|
+
|
61
|
+
total_mean=zi.inject([]) {|ac,vector|
|
62
|
+
ac+vector.valid_data
|
63
|
+
}.to_scale.mean
|
64
|
+
|
65
|
+
k=@vectors.size
|
66
|
+
|
67
|
+
sum_num=zi.inject(0) {|ac,vector|
|
68
|
+
ac+(vector.size*(vector.mean-total_mean)**2)
|
69
|
+
}
|
70
|
+
|
71
|
+
sum_den=zi.inject(0) {|ac,vector|
|
72
|
+
z_mean=vector.mean
|
73
|
+
ac+vector.valid_data.inject(0) {|acp,zij|
|
74
|
+
acp+(zij-z_mean)**2
|
75
|
+
}
|
76
|
+
}
|
77
|
+
@w=((n-k)*sum_num).quo((k-1)*sum_den)
|
78
|
+
@d1=k-1
|
79
|
+
@d2=n-k
|
80
|
+
end
|
81
|
+
private :compute
|
82
|
+
# Probability.
|
83
|
+
# With H_0 = Sum(s2)=0, probability of getting a value of the test upper or equal to the obtained on the sample
|
84
|
+
def probability
|
85
|
+
1-Distribution::F.cdf(f, @d1, @d2)
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -24,7 +24,8 @@ module Statsample
|
|
24
24
|
# Parameters:
|
25
25
|
# * n1: group 1 size
|
26
26
|
# * n2: group 2 size
|
27
|
-
# Reference:
|
27
|
+
# Reference:
|
28
|
+
# * Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
|
28
29
|
#
|
29
30
|
def self.u_sampling_distribution_as62(n1,n2)
|
30
31
|
|
@@ -98,14 +99,18 @@ module Statsample
|
|
98
99
|
attr_reader :r1
|
99
100
|
# Sample 2 Rank sum
|
100
101
|
attr_reader :r2
|
101
|
-
# Sample 1 U
|
102
|
+
# Sample 1 U (useful for demostration)
|
102
103
|
attr_reader :u1
|
103
|
-
# Sample 2 U
|
104
|
+
# Sample 2 U (useful for demostration)
|
104
105
|
attr_reader :u2
|
105
|
-
# U Value
|
106
|
+
# U Value
|
106
107
|
attr_reader :u
|
107
|
-
#
|
108
|
+
# Value of compensation for ties (useful for demostration)
|
108
109
|
attr_reader :t
|
110
|
+
#
|
111
|
+
# Create a new U Mann-Whitney test
|
112
|
+
# Params: Two Statsample::Vectors
|
113
|
+
#
|
109
114
|
def initialize(v1,v2)
|
110
115
|
@n1=v1.valid_data.size
|
111
116
|
@n2=v2.valid_data.size
|
@@ -128,6 +133,7 @@ module Statsample
|
|
128
133
|
@u2=r2-((@n2*(@n2+1)).quo(2))
|
129
134
|
@u=(u1<u2) ? u1 : u2
|
130
135
|
end
|
136
|
+
# Report results.
|
131
137
|
def summary
|
132
138
|
out=<<-HEREDOC
|
133
139
|
Mann-Whitney U
|
@@ -141,8 +147,11 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
|
|
141
147
|
end
|
142
148
|
out
|
143
149
|
end
|
144
|
-
|
145
|
-
|
150
|
+
def to_reportbuilder(generator) # :nodoc:
|
151
|
+
generator.add_text(summary)
|
152
|
+
end
|
153
|
+
# Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
|
154
|
+
# Uses u_sampling_distribution_as62
|
146
155
|
def exact_probability
|
147
156
|
dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
|
148
157
|
sum=0
|
@@ -151,16 +160,23 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
|
|
151
160
|
}
|
152
161
|
sum
|
153
162
|
end
|
154
|
-
#
|
163
|
+
# Adjunt for ties.
|
164
|
+
#
|
165
|
+
# Reference:
|
166
|
+
# * http://europe.isixsigma.com/library/content/c080806a.asp
|
155
167
|
def adjust_for_ties(data)
|
156
168
|
@t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
|
157
169
|
a+(v[1]**3-v[1]).quo(12)
|
158
170
|
}
|
159
171
|
end
|
172
|
+
|
173
|
+
private :adjust_for_ties
|
174
|
+
|
160
175
|
# Z value for U, with adjust for ties.
|
161
176
|
# For large samples, U is approximately normally distributed.
|
162
177
|
# In that case, you can use z to obtain probabily for U.
|
163
|
-
# Reference:
|
178
|
+
# Reference:
|
179
|
+
# * SPSS Manual
|
164
180
|
def z
|
165
181
|
mu=(@n1*@n2).quo(2)
|
166
182
|
if(!@ties)
|