statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,169 @@
1
+ module Statsample
2
+ # Several methods to estimate parameters for simple random sampling
3
+ # == Reference:
4
+ # * Cochran, W.(1972). Sampling Techniques [spanish edition].
5
+ # * http://stattrek.com/Lesson6/SRS.aspx
6
+
7
+ module SRS
8
+
9
+ class << self
10
+ ########################
11
+ #
12
+ # :SECTION: Proportion estimation
13
+ #
14
+ # Function for estimation of proportions
15
+ ########################
16
+
17
+ #
18
+ # Finite population correction (over variance)
19
+ # Source: Cochran(1972)
20
+ def fpc_var(sam,pop)
21
+ (pop - sam).quo(pop - 1)
22
+ end
23
+ # Finite population correction (over standard deviation)
24
+ def fpc(sam,pop)
25
+ Math::sqrt((pop-sam).quo(pop-1))
26
+ end
27
+
28
+ # Non sample fraction.
29
+ #
30
+ # 1 - sample fraction
31
+ def qf(sam , pop)
32
+ 1-(sam.quo(pop))
33
+ end
34
+ # Sample size estimation for proportions, infinite poblation
35
+ def estimation_n0(d,prop,margin=0.95)
36
+ t=Distribution::Normal.p_value(1-(1-margin).quo(2))
37
+ var=prop*(1-prop)
38
+ t**2*var.quo(d**2)
39
+ end
40
+ # Sample size estimation for proportions, finite poblation.
41
+ def estimation_n(d,prop,n_pobl,margin=0.95)
42
+ n0=estimation_n0(d,prop,margin)
43
+ n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
44
+ end
45
+
46
+
47
+ # Proportion confidence interval with t values
48
+ # Uses estimated proportion, sample without replacement.
49
+
50
+ def proportion_confidence_interval_t(prop, n_sample, n_population, margin=0.95)
51
+ t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
52
+ proportion_confidence_interval(prop,n_sample,n_population, t)
53
+ end
54
+
55
+ # Proportion confidence interval with z values
56
+ # Uses estimated proportion, sample without replacement.
57
+ def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
58
+ z=Distribution::Normal.p_value(1-((1-margin).quo(2)))
59
+ proportion_confidence_interval(p,n_sample,n_population, z)
60
+ end
61
+ # Proportion confidence interval with x value
62
+ # Uses estimated proportion, sample without replacement
63
+
64
+ def proportion_confidence_interval(p, sam,pop , x)
65
+ #f=sam.quo(pop)
66
+ one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo(sam-1)) + (1.quo(sam * 2.0))
67
+ [p-one_range, p+one_range]
68
+ end
69
+ # Standard deviation for sample distribution of a proportion
70
+ # Know proportion, sample with replacement.
71
+ # Based on http://stattrek.com/Lesson6/SRS.aspx
72
+ def proportion_sd_kp_wr(p, n_sample)
73
+ Math::sqrt(p*(1-p).quo(n_sample))
74
+ end
75
+ # Standard deviation for sample distribution of a proportion
76
+ # Know proportion, sample without replacement.
77
+ #
78
+ # Sources:
79
+ # * Cochran(1972)
80
+ def proportion_sd_kp_wor(p, sam, pop)
81
+ fpc(sam,pop)*Math::sqrt(p*(1-p).quo(sam))
82
+ end
83
+ # Standard deviation for sample distribution of a proportion
84
+ # Estimated proportion, sample with replacement
85
+ # Based on http://stattrek.com/Lesson6/SRS.aspx.
86
+ def proportion_sd_ep_wr(p, n_sample)
87
+ Math::sqrt(p*(1-p).quo(n_sample-1))
88
+ end
89
+ # Standard deviation for sample distribution of a proportion.
90
+ # Estimated proportion, sample without replacement.
91
+ # Reference:
92
+ # * Cochran, 1972, Técnicas de muestreo
93
+ def proportion_sd_ep_wor(p, sam,pop)
94
+ fsc=(pop-sam).quo((sam-1)*pop)
95
+ Math::sqrt(fsc*p*(1-p))
96
+ end
97
+
98
+ # Total estimation sd based on sample.
99
+ # Known proportion, sample without replacement
100
+ # Reference:
101
+ # * Cochran(1972)
102
+ def proportion_total_sd_kp_wor(prop, sam, pop)
103
+ pob * proportion_sd_kp_wor(p, sam, pop)
104
+ end
105
+ # Total estimation sd based on sample.
106
+ # Estimated proportion, sample without replacement
107
+ # Source: Cochran(1972)
108
+ def proportion_total_sd_ep_wor(prop, sam, pop)
109
+ fsc=((pop - sam).to_f / ( sam - 1))
110
+ Math::sqrt(fsc*pop*prop*(1-prop))
111
+ end
112
+
113
+ ########################
114
+ #
115
+ # :SECTION: Mean stimation
116
+ #
117
+ ########################
118
+
119
+
120
+ # Standard error. Known variance, sample with replacement.
121
+ def standard_error_ksd_wr(s, sam, pop)
122
+ s.quo(Math::sqrt(sam)) * Math::sqrt((pop-1).quo(pop))
123
+ end
124
+
125
+ # Standard error of the mean. Known variance, sample w/o replacement
126
+ def standard_error_ksd_wor(s,sam,pop)
127
+ s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
128
+ end
129
+
130
+ alias_method :standard_error_esd_wr, :standard_error_ksd_wr
131
+
132
+ # Standard error of the mean.
133
+ # Estimated variance, without replacement
134
+ # Cochran (1972) p.47
135
+ def standard_error_esd_wor(s,sam,pop)
136
+ s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
137
+ end
138
+
139
+ alias_method :standard_error, :standard_error_esd_wor
140
+ alias_method :se, :standard_error_esd_wor
141
+
142
+ # Standard error of total estimation
143
+
144
+ def standard_error_total(s,sam,pop)
145
+ pop*se(s,sam,pop)
146
+ end
147
+
148
+ # Confidence Interval using T-Student
149
+ # Use with n < 60
150
+ def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0.95)
151
+ t=Distribution::T.p_value(1-((1-margin) / 2),n_sample-1)
152
+ mean_confidence_interval(mean,s,n_sample,n_population,t)
153
+ end
154
+ # Confidente Interval using Z
155
+ # Use with n > 60
156
+ def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0.95)
157
+ z=Distribution::Normal.p_value(1-((1-margin) / 2))
158
+ mean_confidence_interval(mean,s,n_sample,n_population, z)
159
+ end
160
+ # Confidente interval using X.
161
+ #
162
+ # Better use mean_confidence_interval_z or mean_confidence_interval_t
163
+ def mean_confidence_interval(mean,s,n_sample,n_population,x)
164
+ range=x*se(s,n_sample,n_population)
165
+ [mean-range,mean+range]
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,82 @@
1
+ module Statsample
2
+ # Module for several statistical tests
3
+
4
+ module Test
5
+ autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
+ autoload(:Levene, 'statsample/test/levene')
7
+ autoload(:T, 'statsample/test/t')
8
+ autoload(:F, 'statsample/test/f')
9
+ autoload(:ChiSquare, 'statsample/test/chisquare')
10
+ autoload(:BartlettSphericity, 'statsample/test/bartlettsphericity')
11
+ autoload(:KolmogorovSmirnov, 'statsample/test/kolmogorovsmirnov')
12
+ autoload(:WilcoxonSignedRank, 'statsample/test/wilcoxonsignedrank')
13
+
14
+
15
+ # Returns probability of getting a value lower or higher
16
+ # than sample, using cdf and number of tails.
17
+ #
18
+ # * <tt>:left</tt> : For one tail left, return the cdf
19
+ # * <tt>:right</tt> : For one tail right, return 1-cdf
20
+ # * <tt>:both</tt> : For both tails, returns 2*right_tail(cdf.abs)
21
+ def p_using_cdf(cdf, tails=:both)
22
+ tails=:both if tails==2 or tails==:two
23
+ tails=:right if tails==1 or tails==:positive
24
+ tails=:left if tails==:negative
25
+ case tails
26
+ when :left then cdf
27
+ when :right then 1-cdf
28
+ when :both
29
+ if cdf>=0.5
30
+ cdf=1-cdf
31
+ end
32
+ 2*cdf
33
+ end
34
+ end
35
+ # Get critical t to create confidence interval
36
+ def t_critical(confidence_level, df)
37
+ -Distribution::T.p_value((1-confidence_level) / 2.0, df)
38
+ end
39
+ # Get critical z to create confidence interval
40
+ def z_critical(confidence_level)
41
+ -Distribution::Z.p_value((1-confidence_level) / 2.0)
42
+ end
43
+
44
+ extend self
45
+ # Calculate chi square for two Matrix
46
+ class << self
47
+ def chi_square(observed, expected=nil)
48
+ case observed
49
+ when Vector
50
+ ChiSquare::WithVector.new(observed,expected)
51
+ when Matrix
52
+ ChiSquare::WithMatrix.new(observed,expected)
53
+ else
54
+ raise "Not implemented for #{observed.class}"
55
+ end
56
+ end
57
+ # Shorthand for Statsample::Test::UMannWhitney.new
58
+ #
59
+ # * <tt>v1</tt> and <tt>v2</tt> should be Statsample::Vector.
60
+ def u_mannwhitney(v1, v2)
61
+ Statsample::Test::UMannWhitney.new(v1,v2)
62
+ end
63
+ # Shorthand for Statsample::Test::T::OneSample.new
64
+ def t_one_sample(vector, opts=Hash.new)
65
+ Statsample::Test::T::OneSample.new(vector,opts)
66
+ end
67
+ # Shorthand for Statsample::Test::T::TwoSamplesIndependent.new
68
+ def t_two_samples_independent(v1,v2, opts=Hash.new)
69
+ Statsample::Test::T::TwoSamplesIndependent.new(v1,v2,opts)
70
+ end
71
+ # Shorthand for Statsample::Test::WilcoxonSignedRank.new
72
+ def wilcoxon_signed_rank(v1,v2,opts=Hash.new)
73
+ Statsample::Test::WilcoxonSignedRank.new(v1,v2,opts)
74
+ end
75
+ # Shorthand for Statsample::Test::Levene.new
76
+ def levene(input, opts=Hash.new)
77
+ Statsample::Test::Levene.new(input,opts)
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,45 @@
1
+ module Statsample
2
+ module Test
3
+ # == Bartlett's test of Sphericity.
4
+ # Test the hyphotesis that the sample correlation matrix
5
+ # comes from a multivariate normal population where variables
6
+ # are independent. In other words, the population correlation
7
+ # matrix is the identity matrix.
8
+ # == Reference
9
+ # * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
10
+ class BartlettSphericity
11
+ include Statsample::Test
12
+ include Summarizable
13
+ attr_accessor :name
14
+ attr_reader :ncases
15
+ attr_reader :nvars
16
+ attr_reader :value
17
+ attr_reader :df
18
+ # Args
19
+ # * _matrix_: correlation matrix
20
+ # * _ncases_: number of cases
21
+ def initialize(matrix,ncases)
22
+ @matrix=matrix
23
+ @ncases=ncases
24
+ @nvars=@matrix.row_size
25
+ @name=_("Bartlett's test of sphericity")
26
+ compute
27
+ end
28
+ # Uses SPSS formula.
29
+ # On Dziuban & Shirkey, the minus between the first and second
30
+ # statement is a *!!!
31
+ #
32
+ def compute
33
+ @value=-((@ncases-1)-(2*@nvars+5).quo(6))*Math::log(@matrix.determinant)
34
+ @df=(@nvars*(@nvars-1)) / 2
35
+ end
36
+ def probability
37
+ 1-Distribution::ChiSquare.cdf(@value,@df)
38
+ end
39
+ def report_building(builder) # :nodoc:
40
+ builder.text "%s : X(%d) = %0.4f , p = %0.4f" % [@name, @df, @value, probability]
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,73 @@
1
+ module Statsample
2
+ module Test
3
+ module ChiSquare
4
+ module Shared
5
+ attr_reader :df
6
+ attr_reader :value
7
+
8
+ def to_f
9
+ @value
10
+ end
11
+
12
+ def chi_square
13
+ @value
14
+ end
15
+
16
+ def probability
17
+ 1-Distribution::ChiSquare.cdf(@value.to_f,@df)
18
+ end
19
+ end
20
+
21
+ class WithMatrix
22
+ include Statsample::Test::ChiSquare::Shared
23
+
24
+ def initialize(observed, expected=nil)
25
+ @observed=observed
26
+ @expected=expected or calculate_expected
27
+ raise "Observed size!=expected size" if @observed.row_size!=@expected.row_size or @observed.column_size!=@expected.column_size
28
+ @df=(@observed.row_size-1)*(@observed.column_size-1)
29
+ @value=compute_chi
30
+ end
31
+
32
+ def calculate_expected
33
+ sum=@observed.total_sum
34
+ @expected=Matrix.rows( @observed.row_size.times.map {|i|
35
+ @observed.column_size.times.map {|j|
36
+ (@observed.row_sum[i].quo(sum) * @observed.column_sum[j].quo(sum))*sum
37
+ }
38
+ })
39
+ end
40
+
41
+ def compute_chi
42
+ sum=0
43
+ (0...@observed.row_size).each {|i|
44
+ (0...@observed.column_size).each {|j|
45
+ sum+=((@observed[i, j] - @expected[i,j])**2).quo(@expected[i,j])
46
+ }
47
+ }
48
+ sum
49
+ end
50
+ end
51
+
52
+ class WithVector
53
+ include Statsample::Test::ChiSquare::Shared
54
+
55
+ def initialize(observed, expected)
56
+ @observed = observed
57
+ @expected = expected
58
+ raise "Observed size!=expected size" if @observed.size!=@expected.size
59
+ @df = @observed.size - 1
60
+ @value = compute_chi
61
+ end
62
+
63
+ def compute_chi
64
+ sum=0
65
+ (0...@observed.size).each {|i|
66
+ sum+=((@observed[i] - @expected[i])**2).quo(@expected[i])
67
+ }
68
+ sum
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,52 @@
1
+ module Statsample
2
+ module Test
3
+ # From Wikipedia:
4
+ # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
+ class F
6
+ include Statsample::Test
7
+ include Summarizable
8
+ attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
9
+ # Tails for probability (:both, :left or :right)
10
+ attr_accessor :tails
11
+ # Name of F analysis
12
+ attr_accessor :name
13
+
14
+ # Parameters:
15
+ # * var_num: variance numerator
16
+ # * var_den: variance denominator
17
+ # * df_num: degrees of freedom numerator
18
+ # * df_den: degrees of freedom denominator
19
+ def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
20
+ @var_num=var_num
21
+ @var_den=var_den
22
+ @df_num=df_num
23
+ @df_den=df_den
24
+ @var_total=var_num+var_den
25
+ @df_total=df_num+df_den
26
+ opts_default={:tails=>:right, :name=>_("F Test")}
27
+ @opts=opts_default.merge(opts)
28
+ raise "Tails should be right or left, not both" if @opts[:tails]==:both
29
+ opts_default.keys.each {|k|
30
+ send("#{k}=", @opts[k])
31
+ }
32
+ end
33
+ def f
34
+ @var_num.quo(@var_den)
35
+ end
36
+ def to_f
37
+ f
38
+ end
39
+ # probability
40
+ def probability
41
+ p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
42
+ end
43
+ def report_building(builder) #:nodoc:
44
+ if @df_num.is_a? Integer and @df_den.is_a? Integer
45
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
46
+ else
47
+ builder.text "%s : F(%0.2f, %0.2f) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,63 @@
1
+ module Statsample
2
+ module Test
3
+ # == Kolmogorov-Smirnov's test of equality of distributions.
4
+ class KolmogorovSmirnov
5
+
6
+ attr_reader :d
7
+ include Statsample::Test
8
+ include Summarizable
9
+ # Creates a new Kolmogorov-Smirnov test
10
+ # d1 should have each method
11
+ # d2 could be a Distribution class, with a cdf method,
12
+ # a vector or a lambda
13
+ def initialize(d1,d2)
14
+ raise "First argument should have each method" unless d1.respond_to? :each
15
+ @d1=make_cdf(d1)
16
+ if d2.respond_to? :cdf or d2.is_a? Proc
17
+ @d2=d2
18
+ elsif d2.respond_to? :each
19
+ @d2=make_cdf(d2)
20
+ else
21
+ raise "Second argument should respond to cdf or each"
22
+ end
23
+ calculate
24
+ end
25
+
26
+ def calculate
27
+ d=0
28
+ @d1.each {|x|
29
+ v1=@d1.cdf(x);
30
+ v2=@d2.is_a?(Proc) ? @d2.call(x) : @d2.cdf(x)
31
+ d=(v1-v2).to_f.abs if (v1-v2).abs>d
32
+ }
33
+ @d=d
34
+ end
35
+
36
+ # Make a wrapper EmpiricDistribution to any method which implements
37
+ # each on Statsample::Vector, only uses non-missing data.
38
+ def make_cdf(v)
39
+ v.is_a?(Daru::Vector) ? EmpiricDistribution.new(v.only_valid.to_a) : EmpiricDistribution.new(v)
40
+ end
41
+
42
+ class EmpiricDistribution
43
+ def initialize(data)
44
+ @min=data.min
45
+ @max=data.max
46
+ @data=data.sort
47
+ @n=data.size
48
+ end
49
+ def each
50
+ @data.each {|x|
51
+ yield x
52
+ }
53
+ end
54
+ def cdf(x)
55
+ return 0 if x<@min
56
+ return 1 if x>=@max
57
+ v=@data.index{|v1| v1>=x}
58
+ v.nil? ? 0 : (v+(x==@data[v]? 1 : 0)).quo(@n)
59
+ end
60
+ end # End EmpiricDistribution
61
+ end
62
+ end
63
+ end