statsample 0.6.5 → 0.6.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/History.txt +15 -0
  2. data/Manifest.txt +6 -0
  3. data/README.txt +30 -12
  4. data/Rakefile +91 -0
  5. data/demo/levene.rb +9 -0
  6. data/demo/multiple_regression.rb +1 -7
  7. data/demo/polychoric.rb +1 -0
  8. data/demo/principal_axis.rb +8 -0
  9. data/lib/distribution/f.rb +22 -22
  10. data/lib/spss.rb +99 -99
  11. data/lib/statsample/bivariate/polychoric.rb +32 -22
  12. data/lib/statsample/bivariate/tetrachoric.rb +212 -207
  13. data/lib/statsample/bivariate.rb +6 -6
  14. data/lib/statsample/codification.rb +65 -65
  15. data/lib/statsample/combination.rb +60 -59
  16. data/lib/statsample/converter/csv19.rb +12 -12
  17. data/lib/statsample/converters.rb +1 -1
  18. data/lib/statsample/dataset.rb +93 -36
  19. data/lib/statsample/dominanceanalysis/bootstrap.rb +66 -3
  20. data/lib/statsample/dominanceanalysis.rb +5 -6
  21. data/lib/statsample/factor/pca.rb +41 -11
  22. data/lib/statsample/factor/principalaxis.rb +105 -29
  23. data/lib/statsample/factor/rotation.rb +20 -3
  24. data/lib/statsample/factor.rb +1 -1
  25. data/lib/statsample/graph/gdchart.rb +13 -13
  26. data/lib/statsample/graph/svggraph.rb +166 -167
  27. data/lib/statsample/matrix.rb +22 -12
  28. data/lib/statsample/mle/logit.rb +3 -2
  29. data/lib/statsample/mle/probit.rb +7 -5
  30. data/lib/statsample/mle.rb +4 -2
  31. data/lib/statsample/multiset.rb +125 -124
  32. data/lib/statsample/permutation.rb +2 -1
  33. data/lib/statsample/regression/binomial/logit.rb +4 -3
  34. data/lib/statsample/regression/binomial/probit.rb +2 -1
  35. data/lib/statsample/regression/binomial.rb +62 -81
  36. data/lib/statsample/regression/multiple/baseengine.rb +1 -1
  37. data/lib/statsample/regression/multiple/gslengine.rb +1 -1
  38. data/lib/statsample/regression/multiple/matrixengine.rb +12 -6
  39. data/lib/statsample/regression/multiple.rb +15 -42
  40. data/lib/statsample/regression/simple.rb +93 -78
  41. data/lib/statsample/regression.rb +74 -2
  42. data/lib/statsample/reliability.rb +117 -120
  43. data/lib/statsample/srs.rb +156 -153
  44. data/lib/statsample/test/levene.rb +90 -0
  45. data/lib/statsample/test/umannwhitney.rb +25 -9
  46. data/lib/statsample/test.rb +2 -0
  47. data/lib/statsample/vector.rb +388 -413
  48. data/lib/statsample.rb +74 -30
  49. data/po/es/statsample.mo +0 -0
  50. data/test/test_bivariate.rb +5 -4
  51. data/test/test_combination.rb +1 -1
  52. data/test/test_dataset.rb +2 -2
  53. data/test/test_factor.rb +53 -6
  54. data/test/test_gsl.rb +1 -1
  55. data/test/test_mle.rb +1 -1
  56. data/test/test_regression.rb +18 -33
  57. data/test/test_statistics.rb +15 -33
  58. data/test/test_stest.rb +35 -0
  59. data/test/test_svg_graph.rb +2 -2
  60. data/test/test_vector.rb +331 -333
  61. metadata +38 -11
@@ -0,0 +1,90 @@
1
+ module Statsample
2
+ module Test
3
+ # = Levene Test for Equality of Variances
4
+ # From NIST/SEMATECH:
5
+ # <blockquote>Levene's test ( Levene, 1960) is used to test if k samples have equal variances. Equal variances across samples is called homogeneity of variance. Some statistical tests, for example the analysis of variance, assume that variances are equal across groups or samples. The Levene test can be used to verify that assumption.</blockquote>
6
+ # Use:
7
+ # require 'statsample'
8
+ # a=[1,2,3,4,5,6,7,8,100,10].to_scale
9
+ # b=[30,40,50,60,70,80,90,100,110,120].to_scale
10
+ #
11
+ # levene=Statsample::Test::Levene.new([a,b])
12
+ # puts levene.summary
13
+ #
14
+ # Output:
15
+ # Levene Test
16
+ # F: 0.778121319848449
17
+ # p: 0.389344552595791
18
+ #
19
+ # Reference:
20
+ # * NIST/SEMATECH e-Handbook of Statistical Methods. Available on http://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
21
+ class Levene
22
+ # Degrees of freedom 1 (k-1)
23
+ attr_reader :d1
24
+ # Degrees of freedom 2 (n-k)
25
+ attr_reader :d2
26
+ # Name of test
27
+ attr_accessor :name
28
+ # Input could be an array of vectors or a dataset
29
+ def initialize(input, opts=Hash.new())
30
+ @vectors=input
31
+ @name="Levene Test"
32
+ opts.each{|k,v|
33
+ self.send("#{k}=",v) if self.respond_to? k
34
+ }
35
+ compute
36
+ end
37
+ # Value of the test
38
+ def f
39
+ @w
40
+ end
41
+
42
+ def to_reportbuilder(generator) # :nodoc:
43
+ generator.add_text(summary)
44
+
45
+ end
46
+ # Summary of results
47
+ def summary
48
+ "#{@name}
49
+ F: #{f}
50
+ p: #{probability}"
51
+ end
52
+
53
+ def compute
54
+ n=@vectors.inject(0) {|ac,v| ac+v.n_valid}
55
+
56
+ zi=@vectors.collect {|vector|
57
+ mean=vector.mean
58
+ vector.collect {|v| (v-mean).abs }.to_scale
59
+ }
60
+
61
+ total_mean=zi.inject([]) {|ac,vector|
62
+ ac+vector.valid_data
63
+ }.to_scale.mean
64
+
65
+ k=@vectors.size
66
+
67
+ sum_num=zi.inject(0) {|ac,vector|
68
+ ac+(vector.size*(vector.mean-total_mean)**2)
69
+ }
70
+
71
+ sum_den=zi.inject(0) {|ac,vector|
72
+ z_mean=vector.mean
73
+ ac+vector.valid_data.inject(0) {|acp,zij|
74
+ acp+(zij-z_mean)**2
75
+ }
76
+ }
77
+ @w=((n-k)*sum_num).quo((k-1)*sum_den)
78
+ @d1=k-1
79
+ @d2=n-k
80
+ end
81
+ private :compute
82
+ # Probability.
83
+ # With H_0 = Sum(s2)=0, probability of getting a value of the test upper or equal to the obtained on the sample
84
+ def probability
85
+ 1-Distribution::F.cdf(f, @d1, @d2)
86
+ end
87
+
88
+ end
89
+ end
90
+ end
@@ -24,7 +24,8 @@ module Statsample
24
24
  # Parameters:
25
25
  # * n1: group 1 size
26
26
  # * n2: group 2 size
27
- # Reference: Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. Journal of the Royal Statistical Society, 22(2), 269-273
27
+ # Reference:
28
+ # * Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
28
29
  #
29
30
  def self.u_sampling_distribution_as62(n1,n2)
30
31
 
@@ -98,14 +99,18 @@ module Statsample
98
99
  attr_reader :r1
99
100
  # Sample 2 Rank sum
100
101
  attr_reader :r2
101
- # Sample 1 U
102
+ # Sample 1 U (useful for demostration)
102
103
  attr_reader :u1
103
- # Sample 2 U
104
+ # Sample 2 U (useful for demostration)
104
105
  attr_reader :u2
105
- # U Value
106
+ # U Value
106
107
  attr_reader :u
107
- # Compensation for ties
108
+ # Value of compensation for ties (useful for demostration)
108
109
  attr_reader :t
110
+ #
111
+ # Create a new U Mann-Whitney test
112
+ # Params: Two Statsample::Vectors
113
+ #
109
114
  def initialize(v1,v2)
110
115
  @n1=v1.valid_data.size
111
116
  @n2=v2.valid_data.size
@@ -128,6 +133,7 @@ module Statsample
128
133
  @u2=r2-((@n2*(@n2+1)).quo(2))
129
134
  @u=(u1<u2) ? u1 : u2
130
135
  end
136
+ # Report results.
131
137
  def summary
132
138
  out=<<-HEREDOC
133
139
  Mann-Whitney U
@@ -141,8 +147,11 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
141
147
  end
142
148
  out
143
149
  end
144
- # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000
145
- # Reference: Dinneen & Blakesley (1973)
150
+ def to_reportbuilder(generator) # :nodoc:
151
+ generator.add_text(summary)
152
+ end
153
+ # Exact probability of finding values of U lower or equal to sample on U distribution. Use with caution with m*n>100000.
154
+ # Uses u_sampling_distribution_as62
146
155
  def exact_probability
147
156
  dist=UMannWhitney.u_sampling_distribution_as62(@n1,@n2)
148
157
  sum=0
@@ -151,16 +160,23 @@ Z: #{sprintf("%0.3f",z)} (p: #{sprintf("%0.3f",z_probability)})
151
160
  }
152
161
  sum
153
162
  end
154
- # Reference: http://europe.isixsigma.com/library/content/c080806a.asp
163
+ # Adjunt for ties.
164
+ #
165
+ # Reference:
166
+ # * http://europe.isixsigma.com/library/content/c080806a.asp
155
167
  def adjust_for_ties(data)
156
168
  @t=data.frequencies.find_all{|k,v| v>1}.inject(0) {|a,v|
157
169
  a+(v[1]**3-v[1]).quo(12)
158
170
  }
159
171
  end
172
+
173
+ private :adjust_for_ties
174
+
160
175
  # Z value for U, with adjust for ties.
161
176
  # For large samples, U is approximately normally distributed.
162
177
  # In that case, you can use z to obtain probabily for U.
163
- # Reference: SPSS Manual
178
+ # Reference:
179
+ # * SPSS Manual
164
180
  def z
165
181
  mu=(@n1*@n2).quo(2)
166
182
  if(!@ties)
@@ -3,6 +3,8 @@ module Statsample
3
3
 
4
4
  module Test
5
5
  autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
+ autoload(:Levene, 'statsample/test/levene')
7
+
6
8
  # Calculate chi square for two Matrix
7
9
  class << self
8
10
  def chi_square(real,expected)