statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,169 @@
1
+ module Statsample
2
+ # Several methods to estimate parameters for simple random sampling
3
+ # == Reference:
4
+ # * Cochran, W.(1972). Sampling Techniques [spanish edition].
5
+ # * http://stattrek.com/Lesson6/SRS.aspx
6
+
7
+ module SRS
8
+
9
+ class << self
10
+ ########################
11
+ #
12
+ # :SECTION: Proportion estimation
13
+ #
14
+ # Function for estimation of proportions
15
+ ########################
16
+
17
+ #
18
+ # Finite population correction (over variance)
19
+ # Source: Cochran(1972)
20
+ def fpc_var(sam,pop)
21
+ (pop - sam).quo(pop - 1)
22
+ end
23
+ # Finite population correction (over standard deviation)
24
+ def fpc(sam,pop)
25
+ Math::sqrt((pop-sam).quo(pop-1))
26
+ end
27
+
28
+ # Non sample fraction.
29
+ #
30
+ # 1 - sample fraction
31
+ def qf(sam , pop)
32
+ 1-(sam.quo(pop))
33
+ end
34
+ # Sample size estimation for proportions, infinite poblation
35
+ def estimation_n0(d,prop,margin=0.95)
36
+ t=Distribution::Normal.p_value(1-(1-margin).quo(2))
37
+ var=prop*(1-prop)
38
+ t**2*var.quo(d**2)
39
+ end
40
+ # Sample size estimation for proportions, finite poblation.
41
+ def estimation_n(d,prop,n_pobl,margin=0.95)
42
+ n0=estimation_n0(d,prop,margin)
43
+ n0.quo( 1 + ((n0 - 1).quo(n_pobl)))
44
+ end
45
+
46
+
47
+ # Proportion confidence interval with t values
48
+ # Uses estimated proportion, sample without replacement.
49
+
50
+ def proportion_confidence_interval_t(prop, n_sample, n_population, margin=0.95)
51
+ t = Distribution::T.p_value(1-((1-margin).quo(2)) , n_sample-1)
52
+ proportion_confidence_interval(prop,n_sample,n_population, t)
53
+ end
54
+
55
+ # Proportion confidence interval with z values
56
+ # Uses estimated proportion, sample without replacement.
57
+ def proportion_confidence_interval_z(p, n_sample, n_population, margin=0.95)
58
+ z=Distribution::Normal.p_value(1-((1-margin).quo(2)))
59
+ proportion_confidence_interval(p,n_sample,n_population, z)
60
+ end
61
+ # Proportion confidence interval with x value
62
+ # Uses estimated proportion, sample without replacement
63
+
64
+ def proportion_confidence_interval(p, sam,pop , x)
65
+ #f=sam.quo(pop)
66
+ one_range=x * Math::sqrt((qf(sam, pop) * p * (1-p)).quo(sam-1)) + (1.quo(sam * 2.0))
67
+ [p-one_range, p+one_range]
68
+ end
69
+ # Standard deviation for sample distribution of a proportion
70
+ # Know proportion, sample with replacement.
71
+ # Based on http://stattrek.com/Lesson6/SRS.aspx
72
+ def proportion_sd_kp_wr(p, n_sample)
73
+ Math::sqrt(p*(1-p).quo(n_sample))
74
+ end
75
+ # Standard deviation for sample distribution of a proportion
76
+ # Know proportion, sample without replacement.
77
+ #
78
+ # Sources:
79
+ # * Cochran(1972)
80
+ def proportion_sd_kp_wor(p, sam, pop)
81
+ fpc(sam,pop)*Math::sqrt(p*(1-p).quo(sam))
82
+ end
83
+ # Standard deviation for sample distribution of a proportion
84
+ # Estimated proportion, sample with replacement
85
+ # Based on http://stattrek.com/Lesson6/SRS.aspx.
86
+ def proportion_sd_ep_wr(p, n_sample)
87
+ Math::sqrt(p*(1-p).quo(n_sample-1))
88
+ end
89
+ # Standard deviation for sample distribution of a proportion.
90
+ # Estimated proportion, sample without replacement.
91
+ # Reference:
92
+ # * Cochran, 1972, Técnicas de muestreo
93
+ def proportion_sd_ep_wor(p, sam,pop)
94
+ fsc=(pop-sam).quo((sam-1)*pop)
95
+ Math::sqrt(fsc*p*(1-p))
96
+ end
97
+
98
+ # Total estimation sd based on sample.
99
+ # Known proportion, sample without replacement
100
+ # Reference:
101
+ # * Cochran(1972)
102
+ def proportion_total_sd_kp_wor(prop, sam, pop)
103
+ pob * proportion_sd_kp_wor(p, sam, pop)
104
+ end
105
+ # Total estimation sd based on sample.
106
+ # Estimated proportion, sample without replacement
107
+ # Source: Cochran(1972)
108
+ def proportion_total_sd_ep_wor(prop, sam, pop)
109
+ fsc=((pop - sam).to_f / ( sam - 1))
110
+ Math::sqrt(fsc*pop*prop*(1-prop))
111
+ end
112
+
113
+ ########################
114
+ #
115
+ # :SECTION: Mean stimation
116
+ #
117
+ ########################
118
+
119
+
120
+ # Standard error. Known variance, sample with replacement.
121
+ def standard_error_ksd_wr(s, sam, pop)
122
+ s.quo(Math::sqrt(sam)) * Math::sqrt((pop-1).quo(pop))
123
+ end
124
+
125
+ # Standard error of the mean. Known variance, sample w/o replacement
126
+ def standard_error_ksd_wor(s,sam,pop)
127
+ s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
128
+ end
129
+
130
+ alias_method :standard_error_esd_wr, :standard_error_ksd_wr
131
+
132
+ # Standard error of the mean.
133
+ # Estimated variance, without replacement
134
+ # Cochran (1972) p.47
135
+ def standard_error_esd_wor(s,sam,pop)
136
+ s.quo(Math::sqrt(sam)) * Math::sqrt(qf(sam,pop))
137
+ end
138
+
139
+ alias_method :standard_error, :standard_error_esd_wor
140
+ alias_method :se, :standard_error_esd_wor
141
+
142
+ # Standard error of total estimation
143
+
144
+ def standard_error_total(s,sam,pop)
145
+ pop*se(s,sam,pop)
146
+ end
147
+
148
+ # Confidence Interval using T-Student
149
+ # Use with n < 60
150
+ def mean_confidence_interval_t(mean,s,n_sample,n_population,margin=0.95)
151
+ t=Distribution::T.p_value(1-((1-margin) / 2),n_sample-1)
152
+ mean_confidence_interval(mean,s,n_sample,n_population,t)
153
+ end
154
+ # Confidente Interval using Z
155
+ # Use with n > 60
156
+ def mean_confidence_interval_z(mean,s,n_sample,n_population,margin=0.95)
157
+ z=Distribution::Normal.p_value(1-((1-margin) / 2))
158
+ mean_confidence_interval(mean,s,n_sample,n_population, z)
159
+ end
160
+ # Confidente interval using X.
161
+ #
162
+ # Better use mean_confidence_interval_z or mean_confidence_interval_t
163
+ def mean_confidence_interval(mean,s,n_sample,n_population,x)
164
+ range=x*se(s,n_sample,n_population)
165
+ [mean-range,mean+range]
166
+ end
167
+ end
168
+ end
169
+ end
@@ -0,0 +1,82 @@
1
+ module Statsample
2
+ # Module for several statistical tests
3
+
4
+ module Test
5
+ autoload(:UMannWhitney, 'statsample/test/umannwhitney')
6
+ autoload(:Levene, 'statsample/test/levene')
7
+ autoload(:T, 'statsample/test/t')
8
+ autoload(:F, 'statsample/test/f')
9
+ autoload(:ChiSquare, 'statsample/test/chisquare')
10
+ autoload(:BartlettSphericity, 'statsample/test/bartlettsphericity')
11
+ autoload(:KolmogorovSmirnov, 'statsample/test/kolmogorovsmirnov')
12
+ autoload(:WilcoxonSignedRank, 'statsample/test/wilcoxonsignedrank')
13
+
14
+
15
+ # Returns probability of getting a value lower or higher
16
+ # than sample, using cdf and number of tails.
17
+ #
18
+ # * <tt>:left</tt> : For one tail left, return the cdf
19
+ # * <tt>:right</tt> : For one tail right, return 1-cdf
20
+ # * <tt>:both</tt> : For both tails, returns 2*right_tail(cdf.abs)
21
+ def p_using_cdf(cdf, tails=:both)
22
+ tails=:both if tails==2 or tails==:two
23
+ tails=:right if tails==1 or tails==:positive
24
+ tails=:left if tails==:negative
25
+ case tails
26
+ when :left then cdf
27
+ when :right then 1-cdf
28
+ when :both
29
+ if cdf>=0.5
30
+ cdf=1-cdf
31
+ end
32
+ 2*cdf
33
+ end
34
+ end
35
+ # Get critical t to create confidence interval
36
+ def t_critical(confidence_level, df)
37
+ -Distribution::T.p_value((1-confidence_level) / 2.0, df)
38
+ end
39
+ # Get critical z to create confidence interval
40
+ def z_critical(confidence_level)
41
+ -Distribution::Z.p_value((1-confidence_level) / 2.0)
42
+ end
43
+
44
+ extend self
45
+ # Calculate chi square for two Matrix
46
+ class << self
47
+ def chi_square(observed, expected=nil)
48
+ case observed
49
+ when Vector
50
+ ChiSquare::WithVector.new(observed,expected)
51
+ when Matrix
52
+ ChiSquare::WithMatrix.new(observed,expected)
53
+ else
54
+ raise "Not implemented for #{observed.class}"
55
+ end
56
+ end
57
+ # Shorthand for Statsample::Test::UMannWhitney.new
58
+ #
59
+ # * <tt>v1</tt> and <tt>v2</tt> should be Statsample::Vector.
60
+ def u_mannwhitney(v1, v2)
61
+ Statsample::Test::UMannWhitney.new(v1,v2)
62
+ end
63
+ # Shorthand for Statsample::Test::T::OneSample.new
64
+ def t_one_sample(vector, opts=Hash.new)
65
+ Statsample::Test::T::OneSample.new(vector,opts)
66
+ end
67
+ # Shorthand for Statsample::Test::T::TwoSamplesIndependent.new
68
+ def t_two_samples_independent(v1,v2, opts=Hash.new)
69
+ Statsample::Test::T::TwoSamplesIndependent.new(v1,v2,opts)
70
+ end
71
+ # Shorthand for Statsample::Test::WilcoxonSignedRank.new
72
+ def wilcoxon_signed_rank(v1,v2,opts=Hash.new)
73
+ Statsample::Test::WilcoxonSignedRank.new(v1,v2,opts)
74
+ end
75
+ # Shorthand for Statsample::Test::Levene.new
76
+ def levene(input, opts=Hash.new)
77
+ Statsample::Test::Levene.new(input,opts)
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,45 @@
1
+ module Statsample
2
+ module Test
3
+ # == Bartlett's test of Sphericity.
4
+ # Test the hyphotesis that the sample correlation matrix
5
+ # comes from a multivariate normal population where variables
6
+ # are independent. In other words, the population correlation
7
+ # matrix is the identity matrix.
8
+ # == Reference
9
+ # * Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
10
+ class BartlettSphericity
11
+ include Statsample::Test
12
+ include Summarizable
13
+ attr_accessor :name
14
+ attr_reader :ncases
15
+ attr_reader :nvars
16
+ attr_reader :value
17
+ attr_reader :df
18
+ # Args
19
+ # * _matrix_: correlation matrix
20
+ # * _ncases_: number of cases
21
+ def initialize(matrix,ncases)
22
+ @matrix=matrix
23
+ @ncases=ncases
24
+ @nvars=@matrix.row_size
25
+ @name=_("Bartlett's test of sphericity")
26
+ compute
27
+ end
28
+ # Uses SPSS formula.
29
+ # On Dziuban & Shirkey, the minus between the first and second
30
+ # statement is a *!!!
31
+ #
32
+ def compute
33
+ @value=-((@ncases-1)-(2*@nvars+5).quo(6))*Math::log(@matrix.determinant)
34
+ @df=(@nvars*(@nvars-1)) / 2
35
+ end
36
+ def probability
37
+ 1-Distribution::ChiSquare.cdf(@value,@df)
38
+ end
39
+ def report_building(builder) # :nodoc:
40
+ builder.text "%s : X(%d) = %0.4f , p = %0.4f" % [@name, @df, @value, probability]
41
+ end
42
+
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,73 @@
1
+ module Statsample
2
+ module Test
3
+ module ChiSquare
4
+ module Shared
5
+ attr_reader :df
6
+ attr_reader :value
7
+
8
+ def to_f
9
+ @value
10
+ end
11
+
12
+ def chi_square
13
+ @value
14
+ end
15
+
16
+ def probability
17
+ 1-Distribution::ChiSquare.cdf(@value.to_f,@df)
18
+ end
19
+ end
20
+
21
+ class WithMatrix
22
+ include Statsample::Test::ChiSquare::Shared
23
+
24
+ def initialize(observed, expected=nil)
25
+ @observed=observed
26
+ @expected=expected or calculate_expected
27
+ raise "Observed size!=expected size" if @observed.row_size!=@expected.row_size or @observed.column_size!=@expected.column_size
28
+ @df=(@observed.row_size-1)*(@observed.column_size-1)
29
+ @value=compute_chi
30
+ end
31
+
32
+ def calculate_expected
33
+ sum=@observed.total_sum
34
+ @expected=Matrix.rows( @observed.row_size.times.map {|i|
35
+ @observed.column_size.times.map {|j|
36
+ (@observed.row_sum[i].quo(sum) * @observed.column_sum[j].quo(sum))*sum
37
+ }
38
+ })
39
+ end
40
+
41
+ def compute_chi
42
+ sum=0
43
+ (0...@observed.row_size).each {|i|
44
+ (0...@observed.column_size).each {|j|
45
+ sum+=((@observed[i, j] - @expected[i,j])**2).quo(@expected[i,j])
46
+ }
47
+ }
48
+ sum
49
+ end
50
+ end
51
+
52
+ class WithVector
53
+ include Statsample::Test::ChiSquare::Shared
54
+
55
+ def initialize(observed, expected)
56
+ @observed = observed
57
+ @expected = expected
58
+ raise "Observed size!=expected size" if @observed.size!=@expected.size
59
+ @df = @observed.size - 1
60
+ @value = compute_chi
61
+ end
62
+
63
+ def compute_chi
64
+ sum=0
65
+ (0...@observed.size).each {|i|
66
+ sum+=((@observed[i] - @expected[i])**2).quo(@expected[i])
67
+ }
68
+ sum
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,52 @@
1
+ module Statsample
2
+ module Test
3
+ # From Wikipedia:
4
+ # An F-test is any statistical test in which the test statistic has an F-distribution under the null hypothesis. It is most often used when comparing statistical models that have been fit to a data set, in order to identify the model that best fits the population from which the data were sampled.
5
+ class F
6
+ include Statsample::Test
7
+ include Summarizable
8
+ attr_reader :var_num, :var_den, :df_num, :df_den, :var_total, :df_total
9
+ # Tails for probability (:both, :left or :right)
10
+ attr_accessor :tails
11
+ # Name of F analysis
12
+ attr_accessor :name
13
+
14
+ # Parameters:
15
+ # * var_num: variance numerator
16
+ # * var_den: variance denominator
17
+ # * df_num: degrees of freedom numerator
18
+ # * df_den: degrees of freedom denominator
19
+ def initialize(var_num, var_den, df_num, df_den, opts=Hash.new)
20
+ @var_num=var_num
21
+ @var_den=var_den
22
+ @df_num=df_num
23
+ @df_den=df_den
24
+ @var_total=var_num+var_den
25
+ @df_total=df_num+df_den
26
+ opts_default={:tails=>:right, :name=>_("F Test")}
27
+ @opts=opts_default.merge(opts)
28
+ raise "Tails should be right or left, not both" if @opts[:tails]==:both
29
+ opts_default.keys.each {|k|
30
+ send("#{k}=", @opts[k])
31
+ }
32
+ end
33
+ def f
34
+ @var_num.quo(@var_den)
35
+ end
36
+ def to_f
37
+ f
38
+ end
39
+ # probability
40
+ def probability
41
+ p_using_cdf(Distribution::F.cdf(f, @df_num, @df_den), tails)
42
+ end
43
+ def report_building(builder) #:nodoc:
44
+ if @df_num.is_a? Integer and @df_den.is_a? Integer
45
+ builder.text "%s : F(%d, %d) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
46
+ else
47
+ builder.text "%s : F(%0.2f, %0.2f) = %0.4f , p = %0.4f" % [@name, @df_num, @df_den, f, probability]
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,63 @@
1
+ module Statsample
2
+ module Test
3
+ # == Kolmogorov-Smirnov's test of equality of distributions.
4
+ class KolmogorovSmirnov
5
+
6
+ attr_reader :d
7
+ include Statsample::Test
8
+ include Summarizable
9
+ # Creates a new Kolmogorov-Smirnov test
10
+ # d1 should have each method
11
+ # d2 could be a Distribution class, with a cdf method,
12
+ # a vector or a lambda
13
+ def initialize(d1,d2)
14
+ raise "First argument should have each method" unless d1.respond_to? :each
15
+ @d1=make_cdf(d1)
16
+ if d2.respond_to? :cdf or d2.is_a? Proc
17
+ @d2=d2
18
+ elsif d2.respond_to? :each
19
+ @d2=make_cdf(d2)
20
+ else
21
+ raise "Second argument should respond to cdf or each"
22
+ end
23
+ calculate
24
+ end
25
+
26
+ def calculate
27
+ d=0
28
+ @d1.each {|x|
29
+ v1=@d1.cdf(x);
30
+ v2=@d2.is_a?(Proc) ? @d2.call(x) : @d2.cdf(x)
31
+ d=(v1-v2).to_f.abs if (v1-v2).abs>d
32
+ }
33
+ @d=d
34
+ end
35
+
36
+ # Make a wrapper EmpiricDistribution to any method which implements
37
+ # each on Statsample::Vector, only uses non-missing data.
38
+ def make_cdf(v)
39
+ v.is_a?(Daru::Vector) ? EmpiricDistribution.new(v.only_valid.to_a) : EmpiricDistribution.new(v)
40
+ end
41
+
42
+ class EmpiricDistribution
43
+ def initialize(data)
44
+ @min=data.min
45
+ @max=data.max
46
+ @data=data.sort
47
+ @n=data.size
48
+ end
49
+ def each
50
+ @data.each {|x|
51
+ yield x
52
+ }
53
+ end
54
+ def cdf(x)
55
+ return 0 if x<@min
56
+ return 1 if x>=@max
57
+ v=@data.index{|v1| v1>=x}
58
+ v.nil? ? 0 : (v+(x==@data[v]? 1 : 0)).quo(@n)
59
+ end
60
+ end # End EmpiricDistribution
61
+ end
62
+ end
63
+ end